/* POLE - Portable C++ library to access OLE Storage Copyright (C) 2002-2005 Ariya Hidayat Performance optimization: Dmitry Fedorov Copyright 2009 Fix for more than 236 mbat block entries : Michel Boudinot Copyright 2010 Considerable rework to allow for creation and updating of structured storage : Stephen Baum Copyright 2013 Added GetAllStreams, reworked datatypes Copyright 2013 Felix Gorny from Bitplane More datatype changes to allow for 32 and 64 bit code, some fixes involving incremental updates, flushing Copyright 2013 Version: 0.5.2 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the authors nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include "pole.h" #include "../../../DesktopEditor/common/File.h" // enable to activate debugging output // #define POLE_DEBUG #define CACHEBUFSIZE 4096 //a presumably reasonable size for the read cache namespace POLE { class Header { public: unsigned char id[8]; // signature, or magic identifier uint64 b_shift; // bbat->blockSize = 1 << b_shift uint64 s_shift; // sbat->blockSize = 1 << s_shift uint64 num_bat; // blocks allocated for big bat uint64 dirent_start; // starting block for directory info uint64 threshold; // switch from small to big file (usually 4K) uint64 sbat_start; // starting block index to store small bat uint64 num_sbat; // blocks allocated for small bat uint64 mbat_start; // starting block to store meta bat uint64 num_mbat; // blocks allocated for meta bat uint64 bb_blocks[109]; bool dirty; // Needs to be written Header(); bool valid(); void load( const unsigned char* buffer ); void save( unsigned char* buffer ); void debug(); }; class AllocTable { public: AllocTable(); static const uint64 Eof; static const uint64 Avail; static const uint64 Bat; static const uint64 MetaBat; uint64 blockSize; void clear() { data.clear(); dirtyBlocks.clear(); } uint64 count(); uint64 unusedCount(); void resize( uint64 newsize ); void preserve( uint64 n ); void set( uint64 index, uint64 val ); unsigned unused(); void setChain( std::vector ); std::vector follow( uint64 start ); uint64 operator[](uint64 index ); void load( const unsigned char* buffer, uint64 len ); void save( unsigned char* buffer ); uint64 size(); void debug(); bool isDirty(); void markAsDirty(uint64 dataIndex, int64 bigBlockSize); void flush(std::vector blocks, StorageIO *const io, int64 bigBlockSize); private: std::vector data; std::map dirtyBlocks; bool bMaybeFragmented; AllocTable( const AllocTable& ); AllocTable& operator=( const AllocTable& ); }; class DirEntry { public: DirEntry(): valid(), name(), dir(), size(), start(), prev(), next(), child() {} bool valid; // false if invalid (should be skipped) wchar_t prefix; std::wstring name; // the name, not in unicode anymore bool dir; // true if directory uint64 size; // size (not valid if directory) uint64 start; // starting block uint64 prev; // previous sibling uint64 next; // next sibling uint64 child; // first child int compare(const DirEntry& de); int compare(const std::wstring& name2); }; class DirTree { public: static const uint64 End; DirTree(int64 bigBlockSize); void clear(int64 bigBlockSize); inline uint64 entryCount(); uint64 unusedEntryCount(); DirEntry* entry( uint64 index ); DirEntry* entry( const std::wstring& name, bool create = false, int64 bigBlockSize = 0, StorageIO *const io = 0, int64 streamSize = 0); int64 indexOf( DirEntry* e ); int64 parent( uint64 index ); std::wstring fullName( uint64 index ); std::vector children( uint64 index ); uint64 find_child( uint64 index, const std::wstring& name, uint64 &closest ); void load( unsigned char* buffer, uint64 len ); void save( unsigned char* buffer ); uint64 size(); void debug(); bool isDirty(); void markAsDirty(uint64 dataIndex, int64 bigBlockSize); void flush(std::vector blocks, StorageIO *const io, int64 bigBlockSize, uint64 sb_start, uint64 sb_size); uint64 unused(); void findParentAndSib(uint64 inIdx, const std::wstring& inFullName, uint64 &parentIdx, uint64 &sibIdx); uint64 findSib(uint64 inIdx, uint64 sibIdx); void deleteEntry(DirEntry *entry, const std::wstring& inFullName, int64 bigBlockSize); private: std::vector entries; std::map dirtyBlocks; DirTree( const DirTree& ); DirTree& operator=( const DirTree& ); }; class StorageIO { public: Storage* storage; // owner std::wstring filename; // filename std::fstream file; // associated with above name int64 result; // result of operation bool opened; // true if file is opened uint64 filesize; // size of the file bool writeable; // true if the file can be modified Header* header; // storage header DirTree* dirtree; // directory tree AllocTable* bbat; // allocation table for big blocks AllocTable* sbat; // allocation table for small blocks std::vector sb_blocks; // blocks for "small" files std::vector mbat_blocks; // blocks for doubly indirect indices to big blocks std::vector mbat_data; // the additional indices to big blocks bool mbatDirty; // If true, mbat_blocks need to be written std::list streams; StorageIO( Storage* storage, const wchar_t* filename ); ~StorageIO(); bool open(bool bWriteAccess = false, bool bCreate = false); void close(); void flush(); void load(bool bWriteAccess); void create(); void init(); bool deleteByName(const std::wstring& fullName); bool deleteNode(DirEntry *entry, const std::wstring& fullName); bool deleteLeaf(DirEntry *entry, const std::wstring& fullName); uint64 loadBigBlocks( std::vector blocks, unsigned char* buffer, uint64 maxlen ); uint64 loadBigBlock( uint64 block, unsigned char* buffer, uint64 maxlen ); uint64 saveBigBlocks( std::vector blocks, uint64 offset, unsigned char* buffer, uint64 len ); uint64 saveBigBlock( uint64 block, uint64 offset, unsigned char*buffer, uint64 len ); uint64 loadSmallBlocks( std::vector blocks, unsigned char* buffer, uint64 maxlen ); uint64 loadSmallBlock( uint64 block, unsigned char* buffer, uint64 maxlen ); uint64 saveSmallBlocks( std::vector blocks, uint64 offset, unsigned char* buffer, uint64 len, int64 startAtBlock = 0 ); uint64 saveSmallBlock( uint64 block, uint64 offset, unsigned char* buffer, uint64 len ); StreamIO* streamIO( const std::wstring& name, bool bCreate = false, int64 streamSize = 0 ); void flushbbat(); void flushsbat(); std::vector getbbatBlocks(bool bLoading); uint64 ExtendFile( std::vector *chain ); void addbbatBlock(); private: // no copy or assign StorageIO( const StorageIO& ); StorageIO& operator=( const StorageIO& ); }; class StreamIO { public: StorageIO* io; int64 entryIdx; //needed because a pointer to DirEntry will change whenever entries vector changes. std::wstring fullName; bool eof; bool fail; StreamIO( StorageIO* io, DirEntry* entry ); ~StreamIO(); uint64 size(); void setSize(uint64 newSize); void seek( uint64 pos ); uint64 tell(); int64 getch(); uint64 read( unsigned char* data, uint64 maxlen ); uint64 read( uint64 pos, unsigned char* data, uint64 maxlen ); uint64 write( unsigned char* data, uint64 len ); uint64 write( uint64 pos, unsigned char* data, uint64 len ); void flush(); private: std::vector blocks; // no copy or assign StreamIO( const StreamIO& ); StreamIO& operator=( const StreamIO& ); // pointer for read uint64 m_pos; // simple cache system to speed-up getch() unsigned char* cache_data; uint64 cache_size; uint64 cache_pos; void updateCache(); }; }; // namespace POLE using namespace POLE; #ifdef POLE_USE_UTF16_FILENAMES #else #include "../../../DesktopEditor/common/File.h" static std::string stringWToUtf8String (const std::wstring& aaSrc) { return U_TO_UTF8(aaSrc); } #endif //POLE_USE_UTF16_FILENAMES static void fileCheck(std::fstream &file) { bool bGood, bFail, bEof, bBad; bool bNOTOK; bGood = file.good(); bFail = file.fail(); bEof = file.eof(); bBad = file.bad(); if (bFail || bEof || bBad) bNOTOK = true; //this doesn't really do anything, but it is a good place to set a breakpoint! file.clear(); } static inline uint32 readU16( const unsigned char* ptr ) { return ptr[0]+(ptr[1]<<8); } static inline uint32 readU32( const unsigned char* ptr ) { return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24); } static inline void writeU16( unsigned char* ptr, uint32 data ) { ptr[0] = (unsigned char)(data & 0xff); ptr[1] = (unsigned char)((data >> 8) & 0xff); } static inline void writeU32( unsigned char* ptr, uint32 data ) { ptr[0] = (unsigned char)(data & 0xff); ptr[1] = (unsigned char)((data >> 8) & 0xff); ptr[2] = (unsigned char)((data >> 16) & 0xff); ptr[3] = (unsigned char)((data >> 24) & 0xff); } static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; // =========== Header ========== Header::Header() : b_shift(9), // [1EH,02] size of sectors in power-of-two; typically 9 indicating 512-byte sectors s_shift(6), // [20H,02] size of mini-sectors in power-of-two; typically 6 indicating 64-byte mini-sectors num_bat(0), // [2CH,04] number of SECTs in the FAT chain dirent_start(0), // [30H,04] first SECT in the directory chain threshold(4096), // [38H,04] maximum size for a mini stream; typically 4096 bytes sbat_start(0), // [3CH,04] first SECT in the MiniFAT chain num_sbat(0), // [40H,04] number of SECTs in the MiniFAT chain mbat_start(AllocTable::Eof),// [44H,04] first SECT in the DIFAT chain num_mbat(0), // [48H,04] number of SECTs in the DIFAT chain dirty(true) { for( unsigned int i = 0; i < 8; i++ ) id[i] = pole_magic[i]; for( unsigned int i=0; i<109; i++ ) bb_blocks[i] = AllocTable::Avail; } bool Header::valid() { if( threshold != 4096 ) return false; if( num_bat == 0 ) return false; //if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false; // dima: incorrect check, number may be arbitrary larger if( (num_bat < 109) && (num_mbat != 0) ) return false; if( s_shift > b_shift ) return false; if( b_shift <= 6 ) return false; if( b_shift >=31 ) return false; return true; } void Header::load( const unsigned char* buffer ) { b_shift = readU16( buffer + 0x1e ); // [1EH,02] size of sectors in power-of-two; typically 9 indicating 512-byte sectors and 12 for 4096 s_shift = readU16( buffer + 0x20 ); // [20H,02] size of mini-sectors in power-of-two; typically 6 indicating 64-byte mini-sectors num_bat = readU32( buffer + 0x2c ); // [2CH,04] number of SECTs in the FAT chain dirent_start = readU32( buffer + 0x30 ); // [30H,04] first SECT in the directory chain threshold = readU32( buffer + 0x38 ); // [38H,04] maximum size for a mini stream; typically 4096 bytes sbat_start = readU32( buffer + 0x3c ); // [3CH,04] first SECT in the MiniFAT chain num_sbat = readU32( buffer + 0x40 ); // [40H,04] number of SECTs in the MiniFAT chain mbat_start = readU32( buffer + 0x44 ); // [44H,04] first SECT in the DIFAT chain num_mbat = readU32( buffer + 0x48 ); // [48H,04] number of SECTs in the DIFAT chain if (threshold == 0) threshold = 4096; //поле не задано ... файл xls от полльзователя for( unsigned int i = 0; i < 8; i++ ) id[i] = buffer[i]; // [4CH,436] the SECTs of first 109 FAT sectors for( unsigned int i=0; i<109; i++ ) bb_blocks[i] = readU32( buffer + 0x4C+i*4 ); dirty = false; } void Header::save( unsigned char* buffer ) { memset( buffer, 0, 0x4c ); memcpy( buffer, pole_magic, 8 ); // ole signature writeU32( buffer + 8, 0 ); // reserved must be zero writeU32( buffer + 12, 0 ); // reserved must be zero writeU32( buffer + 16, 0 ); // reserved must be zero writeU16( buffer + 24, 0x003e ); // minor version of the format: 33 writeU16( buffer + 26, 3 ); // major version (512 clasters) writeU16( buffer + 28, 0xfffe ); // indicates Intel byte-ordering writeU16( buffer + 0x1e, (uint32) b_shift ); //size of sectors in power-of-two writeU16( buffer + 0x20, (uint32) s_shift ); writeU32( buffer + 0x2c, (uint32) num_bat ); writeU32( buffer + 0x30, (uint32) dirent_start ); writeU32( buffer + 0x38, (uint32) threshold ); writeU32( buffer + 0x3c, (uint32) sbat_start ); writeU32( buffer + 0x40, (uint32) num_sbat ); writeU32( buffer + 0x44, (uint32) mbat_start ); writeU32( buffer + 0x48, (uint32) num_mbat ); for( unsigned int i = 0; i < 109; i++ ) writeU32( buffer + 0x4C + i * 4, (uint32) bb_blocks[i] ); dirty = false; } void Header::debug() { std::cout << std::endl; std::cout << "b_shift " << b_shift << std::endl; std::cout << "s_shift " << s_shift << std::endl; std::cout << "num_bat " << num_bat << std::endl; std::cout << "dirent_start " << dirent_start << std::endl; std::cout << "threshold " << threshold << std::endl; std::cout << "sbat_start " << sbat_start << std::endl; std::cout << "num_sbat " << num_sbat << std::endl; std::cout << "mbat_start " << mbat_start << std::endl; std::cout << "num_mbat " << num_mbat << std::endl; uint64 s = (num_bat<=109) ? num_bat : 109; std::cout << "bat blocks: "; for( uint64 i = 0; i < s; i++ ) std::cout << bb_blocks[i] << " "; std::cout << std::endl; } // =========== AllocTable ========== const uint64 AllocTable::Avail = 0xffffffff; const uint64 AllocTable::Eof = 0xfffffffe; const uint64 AllocTable::Bat = 0xfffffffd; const uint64 AllocTable::MetaBat = 0xfffffffc; AllocTable::AllocTable() : blockSize(4096), bMaybeFragmented(true) { // initial size resize( 128 ); } uint64 AllocTable::count() { return static_cast(data.size()); } uint64 AllocTable::unusedCount() { uint64 maxIdx = count(); uint64 nFound = 0; for (uint64 idx = 0; idx < maxIdx; idx++) { if( data[idx] == Avail ) nFound++; } return nFound; } void AllocTable::resize( uint64 newsize ) { uint64 oldsize = static_cast(data.size()); data.resize( newsize ); if( newsize > oldsize ) for( uint64 i = oldsize; i pre; for( unsigned i=0; i < n; i++ ) pre.push_back( unused() ); } uint64 AllocTable::operator[]( uint64 index ) { uint64 result; result = data[index]; return result; } void AllocTable::set( uint64 index, uint64 value ) { if( index >= count() ) resize( index + 1); data[ index ] = value; if (value == Avail) bMaybeFragmented = true; } void AllocTable::setChain( std::vector chain ) { if( chain.size() ) { for( unsigned i=0; i AllocTable::follow( uint64 start ) { std::vector chain; if( start >= count() ) return chain; uint64 p = start; std::map used; used.insert(std::make_pair(start, (char)0)); while (p < count()) { if (p == (uint64)Eof) break; if (p == (uint64)Bat) break; if (p == (uint64)MetaBat) break; if (p >= count()) break; chain.push_back(p); if (data[p] >= count()) break; if (used.find(data[p]) != used.end()) { //cycle break; } p = data[p]; used.insert(std::make_pair(p, (char)0)); } return chain; } unsigned AllocTable::unused() { // find first available block unsigned int maxIdx = (unsigned int) data.size(); if (bMaybeFragmented) { for( unsigned i = 0; i < maxIdx; i++ ) if( data[i] == Avail ) return i; } // completely full, so enlarge the table unsigned int block = maxIdx; resize( maxIdx ); bMaybeFragmented = false; return block; } void AllocTable::load( const unsigned char* buffer, uint64 len ) { resize( len / 4 ); for( unsigned i = 0; i < count(); i++ ) set( i, readU32( buffer + i*4 ) ); } // return space required to save this dirtree uint64 AllocTable::size() { return count() * 4; } void AllocTable::save( unsigned char* buffer ) { for( uint64 i = 0; i < count(); i++ ) writeU32( buffer + i*4, (uint32) data[i] ); } bool AllocTable::isDirty() { return (false == dirtyBlocks.empty()); } void AllocTable::markAsDirty(uint64 dataIndex, int64 bigBlockSize) { uint64 dbidx = dataIndex / (bigBlockSize / sizeof(uint32)); std::map::iterator pFind = dirtyBlocks.find(dbidx); if (pFind == dirtyBlocks.end()) dirtyBlocks.insert(std::make_pair(dbidx, 0)); } void AllocTable::flush(std::vector blocks, StorageIO *const io, int64 bigBlockSize) { unsigned char *buffer = new unsigned char[bigBlockSize * blocks.size()]; memset(buffer, 0, bigBlockSize * blocks.size()); save(buffer); for (uint64 idx = 0; idx < static_cast(blocks.size()); idx++) { std::map::iterator pFind = dirtyBlocks.find(idx); if (pFind != dirtyBlocks.end()) io->saveBigBlock(blocks[idx], 0, &buffer[bigBlockSize * idx], bigBlockSize); } dirtyBlocks.clear(); delete[] buffer; } void AllocTable::debug() { std::cout << "block size " << data.size() << std::endl; for( unsigned i=0; i< data.size(); i++ ) { if( data[i] == Avail ) continue; std::cout << i << ": "; if( data[i] == Eof ) std::cout << "[eof]"; else if( data[i] == Bat ) std::cout << "[bat]"; else if( data[i] == MetaBat ) std::cout << "[metabat]"; else std::cout << data[i]; std::cout << std::endl; } } // =========== DirEntry ========== // "A node with a shorter name is less than a node with a inter name" // "For nodes with the same length names, compare the two names." // --Windows Compound Binary File Format Specification, Section 2.5 int DirEntry::compare(const DirEntry& de) { return compare(de.name); } int DirEntry::compare(const std::wstring& name2) { if (name.length() < name2.length()) return -1; else if (name.length() > name2.length()) return 1; else return name.compare(name2); } // =========== DirTree ========== const uint64 DirTree::End = 0xffffffff; DirTree::DirTree(int64 bigBlockSize) { clear(bigBlockSize); } void DirTree::clear(int64 bigBlockSize) { // leave only root entry entries.resize( 1 ); entries[0].valid = true; entries[0].name = L"Root Entry"; entries[0].dir = true; entries[0].size = 0; entries[0].start = End; entries[0].prev = End; entries[0].next = End; entries[0].child = End; markAsDirty(0, bigBlockSize); } inline uint64 DirTree::entryCount() { return entries.size(); } uint64 DirTree::unusedEntryCount() { uint64 nFound = 0; for (uint64 idx = 0; idx < entryCount(); idx++) { if (!entries[idx].valid) nFound++; } return nFound; } DirEntry* DirTree::entry( uint64 index ) { if( index >= entryCount() ) return (DirEntry*) 0; return &entries[ index ]; } int64 DirTree::indexOf( DirEntry* e ) { for( uint64 i = 0; i < entryCount(); i++ ) if( entry( i ) == e ) return i; return -1; } int64 DirTree::parent( uint64 index ) { // brute-force, basically we iterate for each entries, find its children // and check if one of the children is 'index' for( uint64 j=0; j chi = children( j ); for( size_t i=0; iname; result.insert( 0, L"/" ); uint64 p = parent( index ); DirEntry * _entry = 0; while( p > 0 ) { _entry = entry( p ); if (_entry->dir && _entry->valid) { result.insert( 0, _entry->name); result.insert( 0, L"/" ); } --p; index = p; if( index <= 0 ) break; } return result; } // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry // if not found and create is false, return 0 // if create is true, a new entry is returned DirEntry* DirTree::entry( const std::wstring& name, bool create, int64 bigBlockSize, StorageIO *const io, int64 streamSize) { if( name.empty() ) return (DirEntry*)0; // quick check for "/" (that's root) if( name == L"/" ) return entry( 0 ); // split the names, e.g "/ObjectPool/_1020961869" will become: // "ObjectPool" and "_1020961869" std::list names; std::wstring::size_type start = 0, end = 0; if( name[0] == '/' ) start++; int levelsLeft = 0; while( start < name.length() ) { end = name.find_first_of( L'/', start ); if( end == std::wstring::npos ) end = name.length(); names.push_back( name.substr( start, end-start ) ); levelsLeft++; start = end+1; } // start from root int64 index = 0; // trace one by one std::list::iterator it; for( it = names.begin(); it != names.end(); ++it ) { // find among the children of index levelsLeft--; uint64 child = 0; // dima: this block is really inefficient //std::vector chi = children( index ); //for( size_t i = 0; i < chi.size(); i++ ) //{ // DirEntry* ce = entry( chi[i] ); // if( ce ) // if( ce->valid && ( ce->name !="/" ) ) // if( ce->name == *it ) { // child = chi[i]; // break; // } //} // dima: performance optimisation of the previous uint64 closest = End; child = find_child( index, *it, closest ); // traverse to the child if( child > 0 ) { index = child; } else if( !create || !io->writeable) { std::vector chi = children( index ); for( size_t i = 0; i < chi.size(); i++ ) { DirEntry* ce = entry( chi[i] ); if( ce ) if( ce->valid && ( ce->name != L"/" )/*( ce->name.length()>1 )*/ ) if( ce->name == *it ) { child = chi[i]; break; } } } if( child > 0 ) { index = child; } else { // not found among children ..wrong header??? if( !create || !io->writeable) { return (DirEntry*)0; } // create a new entry uint64 parent2 = index; index = unused(); DirEntry* e = entry( index ); e->valid = true; e->name = *it; e->dir = (levelsLeft > 0); if (!e->dir) e->size = streamSize; else e->size = 0; e->start = AllocTable::Eof; e->child = End; if (closest == End) { e->prev = End; e->next = entry(parent2)->child; entry(parent2)->child = index; markAsDirty(parent2, bigBlockSize); } else { DirEntry* closeE = entry( closest ); if (closeE->compare(*e) < 0) { e->prev = closeE->next; e->next = End; closeE->next = index; } else { e->next = closeE->prev; e->prev = End; closeE->prev = index; } markAsDirty(closest, bigBlockSize); } markAsDirty(index, bigBlockSize); uint64 bbidx = index / (bigBlockSize / 128); std::vector blocks = io->bbat->follow(io->header->dirent_start); while (blocks.size() <= bbidx) { uint64 nblock = io->bbat->unused(); if (blocks.size() > 0) { io->bbat->set(blocks[static_cast(blocks.size())-1], nblock); io->bbat->markAsDirty(blocks[static_cast(blocks.size())-1], bigBlockSize); } io->bbat->set(nblock, AllocTable::Eof); io->bbat->markAsDirty(nblock, bigBlockSize); blocks.push_back(nblock); uint64 bbidx = nblock / (io->bbat->blockSize / sizeof(uint64)); while (bbidx >= io->header->num_bat) io->addbbatBlock(); } } } return entry( index ); } // helper function: recursively find siblings of index void dirtree_find_siblings( DirTree* dirtree, std::vector& result, uint64 index ) { DirEntry* e = dirtree->entry( index ); if (!e) return; if (e->prev != DirTree::End) dirtree_find_siblings(dirtree, result, e->prev); result.push_back(index); if (e->next != DirTree::End) dirtree_find_siblings(dirtree, result, e->next); } std::vector DirTree::children( uint64 index ) { std::vector result; DirEntry* e = entry( index ); if( e ) if( e->valid && e->child < entryCount() ) dirtree_find_siblings( this, result, e->child ); return result; } uint64 dirtree_find_sibling( DirTree* dirtree, uint64 index, const std::wstring& name, uint64& closest ) { uint64 count = dirtree->entryCount(); DirEntry* e = dirtree->entry( index ); if (!e || !e->valid) return 0; int cval = e->compare(name); if (cval == 0) return index; if (cval > 0) { if (e->prev > 0 && e->prev < count) return dirtree_find_sibling( dirtree, e->prev, name, closest ); } else { if (e->next > 0 && e->next < count) return dirtree_find_sibling( dirtree, e->next, name, closest ); } closest = index; return 0; } uint64 DirTree::find_child( uint64 index, const std::wstring& name, uint64& closest ) { uint64 count = entryCount(); DirEntry* p = entry( index ); if (p && p->valid && p->child < count ) return dirtree_find_sibling( this, p->child, name, closest ); return 0; } void DirTree::load( unsigned char* buffer, uint64 size ) { entries.clear(); for( uint64 i = 0; i < size/128; i++ ) { uint64 p = i * 128; // would be < 32 if first char in the name isn't printable unsigned char prefix = 32; // parse name of this entry, which stored as Unicode 16-bit int name_len = readU16( buffer + 0x40 + p ); if( name_len > 64 ) name_len = 64; //std::string name; //for( int j = 0; ( buffer[j + p]) && ( j < name_len); j+= 2 ) // name.append( 1, buffer[j + p] ); //name = std::string((char*)buffer + p, name_len); // first char isn't printable ? remove it... std::wstring name; if (name_len > 1) { if (sizeof(wchar_t) == 2) { name = std::wstring((wchar_t*)(buffer + p), name_len / 2 - 1); } else { name = convertUtf16ToWString((UTF16*)(buffer + p), name_len / 2 - 1); } if( name[0] < 32 ) { prefix = name[0]; name.erase( 0, 1 ); } } // 2 = file (aka stream), 1 = directory (aka storage), 5 = root unsigned type = buffer[ 0x42 + p]; DirEntry e; e.valid = ( type != 0 ); e.name = name; e.prefix = prefix; e.start = readU32( buffer + 0x74+p ); e.size = readU32( buffer + 0x78+p ); e.prev = readU32( buffer + 0x44+p ); e.next = readU32( buffer + 0x48+p ); e.child = readU32( buffer + 0x4C+p ); e.dir = ( type!=2 ); // sanity checks if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false; if( name_len < 1 && false == entries.empty()) e.valid = false; //conv_WiwbV5sztXdFmZC6XhI__xlsx.xls entries.push_back( e ); } } // return space required to save this dirtree uint64 DirTree::size() { return entryCount() * 128; } void DirTree::save( unsigned char* buffer ) { memset( buffer, 0, size() ); // root is fixed as "Root Entry" DirEntry* root = entry( 0 ); std::wstring name = L"Root Entry"; if (sizeof(wchar_t) == 2) { memcpy( buffer, (unsigned char*)name.c_str(), name.length() * 2); } else { unsigned char* pData = NULL; int size = 0; convertWStringToUtf16(name, pData, size); if (pData) { memcpy( buffer, pData, size); delete []pData; } } writeU16( buffer + 0x40, static_cast(name.length() * 2 + 2) ); writeU32( buffer + 0x74, 0xffffffff ); writeU32( buffer + 0x78, 0 ); writeU32( buffer + 0x44, 0xffffffff ); writeU32( buffer + 0x48, 0xffffffff ); writeU32( buffer + 0x4c, (uint32) root->child ); buffer[ 0x42 ] = 5; //buffer[ 0x43 ] = 1; for( unsigned int i = 1; i < entryCount(); i++ ) { DirEntry* e = entry( i ); if( !e ) continue; if( e->dir ) { e->start = 0xffffffff; e->size = 0; } // max length for name is 32 chars std::wstring name = e->name; if( name.length() > 32 ) { name.erase( 32, name.length() ); } if (sizeof(wchar_t) == 2) { memcpy( buffer + i * 128, (unsigned char*)name.c_str(), name.length() * 2); } else { unsigned char* pData = NULL; int size = 0; convertWStringToUtf16(name, pData, size); if (pData) { memcpy( buffer + i * 128, pData, size); delete []pData; } } writeU16( buffer + i*128 + 0x40, static_cast(name.length()*2 + 2) ); writeU32( buffer + i*128 + 0x74, (uint32) e->start ); writeU32( buffer + i*128 + 0x78, (uint32) e->size ); writeU32( buffer + i*128 + 0x44, (uint32) e->prev ); writeU32( buffer + i*128 + 0x48, (uint32) e->next ); writeU32( buffer + i*128 + 0x4c, (uint32) e->child ); if (!e->valid) buffer[ i*128 + 0x42 ] = 0; //STGTY_INVALID else buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2; //STGTY_STREAM or STGTY_STORAGE buffer[ i*128 + 0x43 ] = 1; // always black } } bool DirTree::isDirty() { return (dirtyBlocks.empty() == false); } void DirTree::markAsDirty(uint64 dataIndex, int64 bigBlockSize) { uint64 dbidx = dataIndex / (bigBlockSize / 128); std::map::iterator pFind = dirtyBlocks.find(dbidx); if (pFind == dirtyBlocks.end()) dirtyBlocks.insert(std::make_pair(dbidx, 0)); } void DirTree::flush(std::vector blocks, StorageIO *const io, int64 bigBlockSize, uint64 sb_start, uint64 sb_size) { uint64 bufLen = size(); unsigned char *buffer = new unsigned char[bufLen]; memset(buffer, 0, bufLen); save(buffer); writeU32( buffer + 0x74, (uint32) sb_start ); writeU32( buffer + 0x78, (uint32) sb_size ); for (uint64 idx = 0; idx < static_cast(blocks.size()); idx++) { std::map::iterator pFind = dirtyBlocks.find(idx); if (pFind != dirtyBlocks.end()) { uint64 bytesToWrite = bigBlockSize; uint64 pos = bigBlockSize * idx; if ((bufLen - pos) < bytesToWrite) bytesToWrite = bufLen - pos; io->saveBigBlock(blocks[idx], 0, &buffer[pos], bytesToWrite); } } dirtyBlocks.clear(); delete[] buffer; } uint64 DirTree::unused() { for (uint64 idx = 0; idx < static_cast(entryCount()); idx++) { if (!entries[idx].valid) return idx; } entries.push_back(DirEntry()); return entryCount()-1; } // Utility function to get the index of the parent dirEntry, given that we already have a full name it is relatively fast. // Then look for a sibling dirEntry that points to inIdx. In some circumstances, the dirEntry at inIdx will be the direct child // of the parent, in which case sibIdx will be returned as 0. A failure is indicated if both parentIdx and sibIdx are returned as 0. void DirTree::findParentAndSib(uint64 inIdx, const std::wstring& inFullName, uint64& parentIdx, uint64& sibIdx) { sibIdx = 0; parentIdx = 0; if (inIdx == 0 || inIdx >= entryCount() || inFullName == L"/" || inFullName == L"") return; std::wstring localName = inFullName; if (localName[0] != L'/') localName = L'/' + localName; std::wstring parentName = localName; if (parentName[parentName.size()-1] == L'/') parentName = parentName.substr(0, parentName.size()-1); std::wstring::size_type lastSlash; lastSlash = parentName.find_last_of(L'/'); if (lastSlash == std::wstring::npos) return; if (lastSlash == 0) lastSlash = 1; //leave root parentName = parentName.substr(0, lastSlash); DirEntry *parent2 = entry(parentName); parentIdx = indexOf(parent2); if (parent2->child == inIdx) return; //successful return, no sibling points to inIdx sibIdx = findSib(inIdx, parent2->child); } // Utility function to get the index of the sibling dirEntry which points to inIdx. It is the responsibility of the original caller // to start with the root sibling - i.e., sibIdx should be pointed to by the parent node's child. uint64 DirTree::findSib(uint64 inIdx, uint64 sibIdx) { DirEntry *sib = entry(sibIdx); if (!sib || !sib->valid) return 0; if (sib->next == inIdx || sib->prev == inIdx) return sibIdx; DirEntry *targetSib = entry(inIdx); int cval = sib->compare(*targetSib); if (cval > 0) return findSib(inIdx, sib->prev); else return findSib(inIdx, sib->next); } void DirTree::deleteEntry(DirEntry *dirToDel, const std::wstring& inFullName, int64 bigBlockSize) { uint64 parentIdx; uint64 sibIdx; uint64 inIdx = indexOf(dirToDel); uint64 nEntries = entryCount(); findParentAndSib(inIdx, inFullName, parentIdx, sibIdx); uint64 replIdx; if (!dirToDel->next || dirToDel->next > nEntries) replIdx = dirToDel->prev; else { DirEntry *sibNext = entry(dirToDel->next); if (!sibNext->prev || sibNext->prev > nEntries) { replIdx = dirToDel->next; sibNext->prev = dirToDel->prev; markAsDirty(replIdx, bigBlockSize); } else { DirEntry *smlSib = sibNext; int64 smlIdx = dirToDel->next; DirEntry *smlrSib; int64 smlrIdx = -1; for ( ; ; ) { smlrIdx = smlSib->prev; smlrSib = entry(smlrIdx); if (!smlrSib->prev || smlrSib->prev > nEntries) break; smlSib = smlrSib; smlIdx = smlrIdx; } replIdx = smlSib->prev; smlSib->prev = smlrSib->next; smlrSib->prev = dirToDel->prev; smlrSib->next = dirToDel->next; markAsDirty(smlIdx, bigBlockSize); markAsDirty(smlrIdx, bigBlockSize); } } if (sibIdx) { DirEntry *sib = entry(sibIdx); if (sib->next == inIdx) sib->next = replIdx; else sib->prev = replIdx; markAsDirty(sibIdx, bigBlockSize); } else { DirEntry *parNode = entry(parentIdx); parNode->child = replIdx; markAsDirty(parentIdx, bigBlockSize); } dirToDel->valid = false; //indicating that this entry is not in use markAsDirty(inIdx, bigBlockSize); } void DirTree::debug() { for( unsigned i = 0; i < entryCount(); i++ ) { DirEntry* e = entry( i ); if( !e ) continue; std::cout << i << ": "; if( !e->valid ) std::cout << L"INVALID "; std::wcout << e->name << L" "; if( e->dir ) std::cout << L"(Dir) "; else std::cout << L"(File) "; std::cout << e->size << L" "; std::cout << L"s:" << e->start << L" "; std::cout << L"("; if( e->child == End ) std::cout << L"-"; else std::cout << e->child; std::cout << " "; if( e->prev == End ) std::cout << L"-"; else std::cout << e->prev; std::cout << L":"; if( e->next == End ) std::cout << L"-"; else std::cout << e->next; std::cout << L")"; std::cout << std::endl; } } // =========== StorageIO ========== StorageIO::StorageIO( Storage* st, const wchar_t* fname ) : storage(st), filename(fname), file(), result(Storage::Ok), opened(false), filesize(0), writeable(false), header(new Header()), dirtree(new DirTree(1 << header->b_shift)), bbat(new AllocTable()), sbat(new AllocTable()), sb_blocks(), mbat_blocks(), mbat_data(), mbatDirty(), streams() { bbat->blockSize = (uint64) 1 << header->b_shift; sbat->blockSize = (uint64) 1 << header->s_shift; } StorageIO::~StorageIO() { if( opened ) close(); delete sbat; delete bbat; delete dirtree; delete header; } bool StorageIO::open(bool bWriteAccess, bool bCreate) { // already opened ? close first if (opened) close(); if (bCreate) { create(); init(); writeable = true; } else { writeable = bWriteAccess; load(bWriteAccess); } return result == Storage::Ok; } void StorageIO::load(bool bWriteAccess) { unsigned char* buffer = 0; uint64 buflen = 0; std::vector blocks; // open the file, check for error result = Storage::OpenFailed; #if defined(POLE_USE_UTF16_FILENAMES) if (bWriteAccess) file.open(filename.c_str(), std::ios::binary | std::ios::in | std::ios::out); else file.open(filename.c_str(), std::ios::binary | std::ios::in); #else if (bWriteAccess) file.open(stringWToUtf8String(filename), std::ios::binary | std::ios::in | std::ios::out); else file.open(stringWToUtf8String(filename), std::ios::binary | std::ios::in); #endif //defined(POLE_USE_UTF16_FILENAMES) && defined(POLE_WIN) if( !file.good() ) return; // find size of input file file.seekg(0, std::ios::end ); filesize = static_cast(file.tellg()); // load header buffer = new unsigned char[512]; file.seekg( 0 ); file.read( (char*)buffer, 512 ); fileCheck(file); header->load( buffer ); delete[] buffer; // check OLE magic id result = Storage::NotOLE; for( unsigned i=0; i<8; i++ ) if( header->id[i] != pole_magic[i] ) return; // sanity checks result = Storage::BadOLE; if( !header->valid() ) return; if( header->threshold != 4096 ) return; // important block size bbat->blockSize = (uint64) 1 << header->b_shift; sbat->blockSize = (uint64) 1 << header->s_shift; blocks = getbbatBlocks(true); // load big bat buflen = static_cast(blocks.size())*bbat->blockSize; if( buflen > 0 ) { buffer = new unsigned char[ buflen ]; loadBigBlocks( blocks, buffer, buflen ); bbat->load( buffer, buflen ); delete[] buffer; } // load small bat blocks.clear(); blocks = bbat->follow( header->sbat_start ); buflen = static_cast(blocks.size())*bbat->blockSize; if( buflen > 0 ) { buffer = new unsigned char[ buflen ]; loadBigBlocks( blocks, buffer, buflen ); sbat->load( buffer, buflen ); delete[] buffer; } // load directory tree blocks.clear(); blocks = bbat->follow( header->dirent_start ); buflen = static_cast(blocks.size())*bbat->blockSize; if (buflen > 0) { buffer = new unsigned char[ buflen ]; loadBigBlocks( blocks, buffer, buflen ); dirtree->load( buffer, buflen ); unsigned sb_start = readU32( buffer + 0x74 ); delete[] buffer; // fetch block chain as data for small-files sb_blocks = bbat->follow( sb_start ); // small files // for troubleshooting, just enable this block #if 0 header->debug(); sbat->debug(); bbat->debug(); dirtree->debug(); #endif // so far so good result = Storage::Ok; opened = true; } } void StorageIO::create() { // std::cout << "Creating " << filename << std::endl; #if defined(POLE_USE_UTF16_FILENAMES) file.open(filename.c_str(), std::ios::binary | std::ios::in | std::ios::out | std::ios::trunc); #else file.open( stringWToUtf8String(filename).c_str(), std::ios::binary | std::ios::in | std::ios::out | std::ios::trunc); #endif if( !file.good() ) { //std::cerr << "Can't create " << filename << std::endl; result = Storage::OpenFailed; return; } // so far so good opened = true; result = Storage::Ok; } void StorageIO::init() { // Initialize parts of the header, directory entries, and big and small allocation tables header->bb_blocks[0] = 0; header->dirent_start = 1; header->sbat_start = 2; header->num_bat = 1; header->num_sbat = 1; header->dirty = true; bbat->set(0, AllocTable::Eof); bbat->markAsDirty(0, bbat->blockSize); bbat->set(1, AllocTable::Eof); bbat->markAsDirty(1, bbat->blockSize); bbat->set(2, AllocTable::Eof); bbat->markAsDirty(2, bbat->blockSize); bbat->set(3, AllocTable::Eof); bbat->markAsDirty(3, bbat->blockSize); sb_blocks = bbat->follow( 3 ); mbatDirty = false; } void StorageIO::flush() { if (header->dirty) { unsigned char *buffer = new unsigned char[512]; memset(buffer, 0, 512); header->save( buffer ); file.seekp( 0 ); file.write( (char*)buffer, 512 ); fileCheck(file); delete[] buffer; } if (bbat->isDirty()) flushbbat(); if (sbat->isDirty()) flushsbat(); if (dirtree->isDirty()) { std::vector blocks; blocks = bbat->follow(header->dirent_start); uint64 sb_start = 0xffffffff; if (sb_blocks.size() > 0) sb_start = sb_blocks[0]; dirtree->flush(blocks, this, bbat->blockSize, sb_start, static_cast(sb_blocks.size())*bbat->blockSize); } if (mbatDirty && mbat_blocks.size() > 0) { uint64 nBytes = bbat->blockSize * static_cast(mbat_blocks.size()); unsigned char *buffer = new unsigned char[nBytes]; memset(buffer, 0, nBytes); uint64 sIdx = 0; uint64 dcount = 0; uint64 blockCapacity = bbat->blockSize / sizeof(uint64) - 1; uint64 blockIdx = 0; for (size_t mdIdx = 0; mdIdx < mbat_data.size(); mdIdx++) { writeU32(buffer + sIdx, (uint32) mbat_data[mdIdx]); sIdx += 4; dcount++; if (dcount == blockCapacity) { blockIdx++; if (blockIdx == mbat_blocks.size()) writeU32(buffer + sIdx, AllocTable::Eof); else writeU32(buffer + sIdx, (uint32) mbat_blocks[blockIdx]); sIdx += 4; dcount = 0; } } saveBigBlocks(mbat_blocks, 0, buffer, nBytes); delete[] buffer; mbatDirty = false; } file.flush(); fileCheck(file); /* Note on Microsoft implementation: - directory entries are stored in the last block(s) - BATs are as second to the last - Meta BATs are third to the last */ } void StorageIO::close() { if( !opened ) return; if (writeable) { file.seekg(0, std::ios::end ); filesize = static_cast(file.tellg()); if (filesize % 512 != 0) { char padding[512] = {}; POLE::uint64 sz = (filesize / 512 + 1) * 512 - filesize; file.write(padding, sz); fileCheck(file); } } file.close(); opened = false; std::list::iterator it; for( it = streams.begin(); it != streams.end(); ++it ) delete *it; streams.clear(); } StreamIO* StorageIO::streamIO( const std::wstring& name, bool bCreate, int64 streamSize ) { // sanity check if( !name.length() ) return (StreamIO*)0; // search in the entries DirEntry* entry = dirtree->entry( name, bCreate, bbat->blockSize, this, streamSize ); //if( entry) std::cout << "FOUND\n"; if( !entry ) return (StreamIO*)0; //if( !entry->dir ) std::cout << " NOT DIR\n"; if( entry->dir ) return (StreamIO*)0; StreamIO* result2 = new StreamIO( this, entry ); result2->fullName = name; return result2; } bool StorageIO::deleteByName(const std::wstring& fullName) { if (!fullName.length()) return false; if (!writeable) return false; DirEntry* entry = dirtree->entry(fullName); if (!entry) return false; bool retVal; if (entry->dir) retVal = deleteNode(entry, fullName); else retVal = deleteLeaf(entry, fullName); if (retVal) flush(); return retVal; } bool StorageIO::deleteNode(DirEntry *entry, const std::wstring& fullName) { std::wstring lclName = fullName; if (lclName[lclName.size()-1] != '/') lclName += '/'; bool retVal = true; while (entry->child && entry->child < dirtree->entryCount()) { DirEntry* childEnt = dirtree->entry(entry->child); std::wstring childFullName = lclName + childEnt->name; if (childEnt->dir) retVal = deleteNode(childEnt, childFullName); else retVal = deleteLeaf(childEnt, childFullName); if (!retVal) return false; } dirtree->deleteEntry(entry, fullName, bbat->blockSize); return retVal; } bool StorageIO::deleteLeaf(DirEntry *entry, const std::wstring& fullName) { std::vector blocks; if (entry->size >= header->threshold) { blocks = bbat->follow(entry->start); for (unsigned idx = 0; idx < blocks.size(); idx++) { bbat->set(blocks[idx], AllocTable::Avail); bbat->markAsDirty(idx, bbat->blockSize); } } else { blocks = sbat->follow(entry->start); for (unsigned idx = 0; idx < blocks.size(); idx++) { sbat->set(blocks[idx], AllocTable::Avail); sbat->markAsDirty(idx, bbat->blockSize); } } dirtree->deleteEntry(entry, fullName, bbat->blockSize); return true; } uint64 StorageIO::loadBigBlocks( std::vector blocks, unsigned char* data, uint64 maxlen ) { // sentinel if( !data ) return 0; fileCheck(file); if( !file.good() ) return 0; if( blocks.size() < 1 ) return 0; if( maxlen == 0 ) return 0; // read block one by one, seems fast enough uint64 bytes = 0; for( unsigned int i=0; (i < blocks.size() ) & ( bytesblockSize * ( block+1 ); uint64 p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes; if( pos + p > filesize ) p = filesize - pos; file.seekg( pos ); file.read( (char*)data + bytes, p ); fileCheck(file); // should use gcount to see how many bytes were really returned - eof check... bytes += p; } return bytes; } uint64 StorageIO::loadBigBlock( uint64 block, unsigned char* data, uint64 maxlen ) { // sentinel if( !data ) return 0; fileCheck(file); if( !file.good() ) return 0; // wraps call for loadBigBlocks std::vector blocks; blocks.resize( 1 ); blocks[ 0 ] = block; return loadBigBlocks( blocks, data, maxlen ); } uint64 StorageIO::saveBigBlocks( std::vector blocks, uint64 offset, unsigned char* data, uint64 len ) { // sentinel if( !data ) return 0; fileCheck(file); if( !file.good() ) return 0; if( blocks.size() < 1 ) return 0; if( len == 0 ) return 0; // write block one by one, seems fast enough uint64 bytes = 0; for ( size_t i = 0; (i < blocks.size() ) & ( bytes < len ); i++ ) { uint64 block = blocks[i]; uint64 pos = (bbat->blockSize * ( block + 1 ) ) + offset; uint64 maxWrite = bbat->blockSize - offset; uint64 tobeWritten = len - bytes; if (tobeWritten > maxWrite) tobeWritten = maxWrite; file.seekp( pos ); file.write( (char*)data + bytes, tobeWritten ); fileCheck(file); bytes += tobeWritten; offset = 0; if (filesize < pos + tobeWritten) filesize = pos + tobeWritten; } return bytes; } uint64 StorageIO::saveBigBlock( uint64 block, uint64 offset, unsigned char* data, uint64 len ) { if ( !data ) return 0; fileCheck(file); if ( !file.good() ) return 0; //wrap call for saveBigBlocks std::vector blocks; blocks.resize( 1 ); blocks[ 0 ] = block; return saveBigBlocks(blocks, offset, data, len ); } // return number of bytes which has been read uint64 StorageIO::loadSmallBlocks( std::vector blocks, unsigned char* data, uint64 maxlen ) { // sentinel if( !data ) return 0; fileCheck(file); if( !file.good() ) return 0; if( blocks.size() < 1 ) return 0; if( maxlen == 0 ) return 0; // our own local buffer unsigned char* buf = new unsigned char[ bbat->blockSize ]; // read small block one by one uint64 bytes = 0; for( unsigned int i=0; ( iblockSize; uint64 bbindex = pos / bbat->blockSize; if( bbindex >= sb_blocks.size() ) break; loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize ); // copy the data uint64 offset = pos % bbat->blockSize; uint64 p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes : bbat->blockSize-offset; p = (sbat->blockSize

blockSize : p; memcpy( data + bytes, buf + offset, p ); bytes += p; } delete[] buf; return bytes; } uint64 StorageIO::loadSmallBlock( uint64 block, unsigned char* data, uint64 maxlen ) { // sentinel if( !data ) return 0; fileCheck(file); if( !file.good() ) return 0; // wraps call for loadSmallBlocks std::vector blocks; blocks.resize( 1 ); blocks.assign( 1, block ); return loadSmallBlocks( blocks, data, maxlen ); } uint64 StorageIO::saveSmallBlocks( std::vector blocks, uint64 offset, unsigned char* data, uint64 len, int64 startAtBlock ) { // sentinel if( !data ) return 0; fileCheck(file); if( !file.good() ) return 0; if( blocks.size() < 1 ) return 0; if( len == 0 ) return 0; // write block one by one, seems fast enough uint64 bytes = 0; for( uint64 i = startAtBlock; (i < blocks.size() ) & ( bytesblockSize; uint64 bbindex = pos / bbat->blockSize; if( bbindex >= sb_blocks.size() ) break; uint64 offset2 = pos % bbat->blockSize; uint64 maxWrite = sbat->blockSize - offset; uint64 tobeWritten = len - bytes; if (tobeWritten > maxWrite) tobeWritten = maxWrite; saveBigBlock( sb_blocks[ bbindex ], offset2 + offset, data + bytes, tobeWritten); bytes += tobeWritten; offset = 0; if (filesize < pos + tobeWritten) filesize = pos + tobeWritten; } return bytes; } uint64 StorageIO::saveSmallBlock( uint64 block, uint64 offset, unsigned char* data, uint64 len ) { if ( !data ) return 0; fileCheck(file); if ( !file.good() ) return 0; //wrap call for saveSmallBlocks std::vector blocks; blocks.resize( 1 ); blocks[ 0 ] = block; return saveSmallBlocks(blocks, offset, data, len ); } void StorageIO::flushbbat() { std::vector blocks; blocks = getbbatBlocks(false); bbat->flush(blocks, this, bbat->blockSize); } void StorageIO::flushsbat() { std::vector blocks; blocks = bbat->follow(header->sbat_start); sbat->flush(blocks, this, bbat->blockSize); } std::vector StorageIO::getbbatBlocks(bool bLoading) { std::vector blocks; // find blocks allocated to store big bat // the first 109 blocks are in header, the rest in meta bat blocks.clear(); blocks.resize( header->num_bat ); for( unsigned i = 0; i < 109; i++ ) { if( i >= header->num_bat ) break; else blocks[i] = header->bb_blocks[i]; } if (bLoading) { mbat_blocks.clear(); mbat_data.clear(); if( (header->num_bat > 109) && (header->num_mbat > 0) ) { unsigned char* buffer2 = new unsigned char[ bbat->blockSize ]; uint64 k = 109; uint64 sector; uint64 mdidx = 0; for( uint64 r = 0; r < header->num_mbat; r++ ) { if(r == 0) // 1st meta bat location is in file header. sector = header->mbat_start; else // next meta bat location is the last current block value. { sector = blocks[--k]; mdidx--; } mbat_blocks.push_back(sector); mbat_data.resize(mbat_blocks.size()*(bbat->blockSize/4)); loadBigBlock( sector, buffer2, bbat->blockSize ); for( uint64 s=0; s < bbat->blockSize; s+=4 ) { if( k >= header->num_bat ) break; else { blocks[k] = readU32( buffer2 + s ); mbat_data[mdidx++] = blocks[k]; k++; } } } if (mbat_data.size() != mdidx) mbat_data.resize(mdidx); delete[] buffer2; } } else { unsigned i = 109; for (unsigned int idx = 0; idx < mbat_data.size(); idx++) { blocks[i++] = mbat_data[idx]; if (i == header->num_bat) break; } } return blocks; } uint64 StorageIO::ExtendFile( std::vector *chain ) { uint64 newblockIdx = bbat->unused(); bbat->set(newblockIdx, AllocTable::Eof); uint64 bbidx = newblockIdx / (bbat->blockSize / sizeof(uint64)); while (bbidx >= header->num_bat) addbbatBlock(); bbat->markAsDirty(newblockIdx, bbat->blockSize); if (chain->size() > 0) { bbat->set((*chain)[chain->size()-1], newblockIdx); bbat->markAsDirty((*chain)[chain->size()-1], bbat->blockSize); } chain->push_back(newblockIdx); return newblockIdx; } void StorageIO::addbbatBlock() { uint64 newblockIdx = bbat->unused(); bbat->set(newblockIdx, AllocTable::MetaBat); if (header->num_bat < 109) header->bb_blocks[header->num_bat] = newblockIdx; else { mbatDirty = true; mbat_data.push_back(newblockIdx); uint64 metaIdx = header->num_bat - 109; uint64 idxPerBlock = bbat->blockSize / sizeof(uint64) - 1; //reserve room for index to next block uint64 idxBlock = metaIdx / idxPerBlock; if (idxBlock == mbat_blocks.size()) { uint64 newmetaIdx = bbat->unused(); bbat->set(newmetaIdx, AllocTable::MetaBat); mbat_blocks.push_back(newmetaIdx); if (header->num_mbat == 0) header->mbat_start = newmetaIdx; header->num_mbat++; } } header->num_bat++; header->dirty = true; } // =========== StreamIO ========== StreamIO::StreamIO( StorageIO* s, DirEntry* e) : io(s), entryIdx(io->dirtree->indexOf(e)), fullName(), blocks(), eof(false), fail(false), m_pos(0), cache_data(new unsigned char[CACHEBUFSIZE]), cache_size(0), // indicating an empty cache cache_pos(0) { if( e->size >= io->header->threshold ) blocks = io->bbat->follow( e->start ); else blocks = io->sbat->follow( e->start ); } // FIXME tell parent we're gone StreamIO::~StreamIO() { delete[] cache_data; } void StreamIO::setSize(uint64 newSize) { bool bThresholdCrossed = false; bool bOver = false; if(!io->writeable ) return; DirEntry *entry = io->dirtree->entry(entryIdx); if (newSize >= io->header->threshold && entry->size < io->header->threshold) { bThresholdCrossed = true; bOver = true; } else if (newSize < io->header->threshold && entry->size >= io->header->threshold) { bThresholdCrossed = true; bOver = false; } if (bThresholdCrossed) { // first, read what is already in the stream, limited by the requested new size. Note // that the read can work precisely because we have not yet reset the size. uint64 len = newSize; if (len > entry->size) len = entry->size; unsigned char *buffer = 0; uint64 savePos = tell(); if (len) { buffer = new unsigned char[len]; seek(0); read(buffer, len); } // Now get rid of the existing blocks if (bOver) { for (unsigned int idx = 0; idx < blocks.size(); idx++) { io->sbat->set(blocks[idx], AllocTable::Avail); io->sbat->markAsDirty(idx, io->bbat->blockSize); } } else { for (unsigned int idx = 0; idx < blocks.size(); idx++) { io->bbat->set(blocks[idx], AllocTable::Avail); io->bbat->markAsDirty(idx, io->bbat->blockSize); } } blocks.clear(); entry->start = DirTree::End; // Now change the size, and write the old data back into the stream, if any entry->size = newSize; io->dirtree->markAsDirty(io->dirtree->indexOf(entry), io->bbat->blockSize); if (len) { write(0, buffer, len); delete buffer; } if (savePos <= entry->size) seek(savePos); } else if (entry->size != newSize) //simple case - no threshold was crossed, so just change the size { entry->size = newSize; io->dirtree->markAsDirty(io->dirtree->indexOf(entry), io->bbat->blockSize); } } void StreamIO::seek( uint64 pos ) { m_pos = pos; } uint64 StreamIO::tell() { return m_pos; } int64 StreamIO::getch() { // past end-of-file ? DirEntry *entry = io->dirtree->entry(entryIdx); if( m_pos >= entry->size ) return -1; // need to update cache ? if( !cache_size || ( m_pos < cache_pos ) || ( m_pos >= cache_pos + cache_size ) ) updateCache(); // something bad if we don't get good cache if( !cache_size ) return -1; int64 data = cache_data[m_pos - cache_pos]; m_pos++; return data; } uint64 StreamIO::read( uint64 pos, unsigned char* data, uint64 maxlen ) { // sanity checks if( !data ) return 0; if( maxlen == 0 ) return 0; uint64 totalbytes = 0; DirEntry *entry = io->dirtree->entry(entryIdx); if (pos + maxlen > entry->size) maxlen = entry->size - pos; if ( entry->size < io->header->threshold ) { // small file uint64 index = pos / io->sbat->blockSize; if( index >= blocks.size() ) return 0; unsigned char* buf = new unsigned char[ io->sbat->blockSize ]; uint64 offset = pos % io->sbat->blockSize; while( totalbytes < maxlen ) { if( index >= blocks.size() ) break; io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize ); uint64 count = io->sbat->blockSize - offset; if( count > maxlen-totalbytes ) count = maxlen-totalbytes; memcpy( data+totalbytes, buf + offset, count ); totalbytes += count; offset = 0; index++; } delete[] buf; } else { // big file uint64 index = pos / io->bbat->blockSize; if( index >= blocks.size() ) return 0; unsigned char* buf = new unsigned char[ io->bbat->blockSize ]; uint64 offset = pos % io->bbat->blockSize; while( totalbytes < maxlen ) { if( index >= blocks.size() ) break; io->loadBigBlock( blocks[index], buf, io->bbat->blockSize ); uint64 count = io->bbat->blockSize - offset; if( count > maxlen-totalbytes ) count = maxlen-totalbytes; memcpy( data+totalbytes, buf + offset, count ); totalbytes += count; index++; offset = 0; } delete [] buf; } return totalbytes; } uint64 StreamIO::read( unsigned char* data, uint64 maxlen ) { uint64 bytes = read( tell(), data, maxlen ); m_pos += bytes; return bytes; } uint64 StreamIO::write( unsigned char* data, uint64 len ) { return write( tell(), data, len ); } uint64 StreamIO::write( uint64 pos, unsigned char* data, uint64 len ) { // sanity checks if( !data ) return 0; if( len == 0 ) return 0; if( !io->writeable ) return 0; DirEntry *entry = io->dirtree->entry(entryIdx); if (pos + len > entry->size) setSize(pos + len); //reset size, possibly changing from small to large blocks uint64 totalbytes = 0; if ( entry->size < io->header->threshold ) { // small file uint64 index = (pos + len - 1) / io->sbat->blockSize; while (index >= blocks.size()) { uint64 nblock = io->sbat->unused(); if (blocks.size() > 0) { io->sbat->set(blocks[blocks.size()-1], nblock); io->sbat->markAsDirty(blocks[blocks.size()-1], io->bbat->blockSize); } io->sbat->set(nblock, AllocTable::Eof); io->sbat->markAsDirty(nblock, io->bbat->blockSize); blocks.resize(blocks.size()+1); blocks[blocks.size()-1] = nblock; uint64 bbidx = nblock / (io->bbat->blockSize / sizeof(unsigned int)); while (bbidx >= io->header->num_sbat) { std::vector sbat_blocks = io->bbat->follow(io->header->sbat_start); io->ExtendFile(&sbat_blocks); io->header->num_sbat++; io->header->dirty = true; //Header will have to be rewritten } uint64 sidx = nblock * io->sbat->blockSize / io->bbat->blockSize; while (sidx >= io->sb_blocks.size()) { io->ExtendFile(&io->sb_blocks); io->dirtree->markAsDirty(0, io->bbat->blockSize); //make sure to rewrite first directory block } } uint64 offset = pos % io->sbat->blockSize; index = pos / io->sbat->blockSize; //if (index == 0) totalbytes = io->saveSmallBlocks(blocks, offset, data, len, index); } else { uint64 index = (pos + len - 1) / io->bbat->blockSize; while (index >= blocks.size()) io->ExtendFile(&blocks); uint64 offset = pos % io->bbat->blockSize; uint64 remainder = len; index = pos / io->bbat->blockSize; while( remainder > 0 ) { if( index >= blocks.size() ) break; uint64 count = io->bbat->blockSize - offset; if ( remainder < count ) count = remainder; io->saveBigBlock( blocks[index], offset, data + totalbytes, count ); totalbytes += count; remainder -= count; index++; offset = 0; } } if (blocks.size() > 0 && entry->start != blocks[0]) { entry->start = blocks[0]; io->dirtree->markAsDirty(io->dirtree->indexOf(entry), io->bbat->blockSize); } m_pos += len; return totalbytes; } void StreamIO::flush() { io->flush(); } void StreamIO::updateCache() { // sanity check if( !cache_data ) return; DirEntry *entry = io->dirtree->entry(entryIdx); cache_pos = m_pos - (m_pos % CACHEBUFSIZE); uint64 bytes = CACHEBUFSIZE; if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos; cache_size = read( cache_pos, cache_data, bytes ); } // =========== Storage ========== Storage::Storage( const wchar_t* filename ) { io = new StorageIO( this, filename ); } Storage::~Storage() { delete io; } int Storage::result() { return (int) io->result; } bool Storage::open(bool bWriteAccess, bool bCreate) { return io->open(bWriteAccess, bCreate); } void Storage::close() { io->close(); } std::list Storage::entries( const std::wstring& path ) { std::list localResult; DirTree* dt = io->dirtree; DirEntry* e = dt->entry( path, false ); if( e && e->dir ) { uint64 parent = dt->indexOf( e ); std::vector children = dt->children( parent ); for( uint64 i = 0; i < children.size(); i++ ) { localResult.push_back( dt->entry( children[i] )->name ); } } return localResult; } std::list Storage::entries_with_prefix( const std::wstring& path ) { std::list localResult; DirTree* dt = io->dirtree; DirEntry* e = dt->entry( path, false ); if( e && e->dir ) { uint64 parent = dt->indexOf( e ); std::vector children = dt->children( parent ); for( uint64 i = 0; i < children.size(); i++ ) { std::wstring val; if (dt->entry( children[i] )->prefix != 32) { val = dt->entry( children[i] )->prefix; } val += dt->entry( children[i] )->name; localResult.push_back(val); } } return localResult; } bool Storage::isDirectory( const std::wstring& name ) { DirEntry* e = io->dirtree->entry( name, false ); return e ? e->dir : false; } bool Storage::exists( const std::wstring& name ) { DirEntry* e = io->dirtree->entry( name, false ); return (e != 0); } bool Storage::isWriteable() { return io->writeable; } bool Storage::deleteByName( const std::wstring& name ) { return io->deleteByName(name); } void Storage::GetStats(uint64 *pEntries, uint64 *pUnusedEntries, uint64 *pBigBlocks, uint64 *pUnusedBigBlocks, uint64 *pSmallBlocks, uint64 *pUnusedSmallBlocks) { *pEntries = io->dirtree->entryCount(); *pUnusedEntries = io->dirtree->unusedEntryCount(); *pBigBlocks = io->bbat->count(); *pUnusedBigBlocks = io->bbat->unusedCount(); *pSmallBlocks = io->sbat->count(); *pUnusedSmallBlocks = io->sbat->unusedCount(); } // recursively collect stream names void CollectStreams( std::list& result, DirTree* tree, DirEntry* parent, const std::wstring& path ) { DirEntry* c = tree->entry( parent->child ); std::queue queue; if ( c ) queue.push( c ); while ( !queue.empty() ) { DirEntry* e = queue.front(); queue.pop(); if ( e->dir ) CollectStreams( result, tree, e, path + e->name + L"/" ); else result.push_back( path + e->name ); DirEntry* p = tree->entry( e->prev ); if ( p ) queue.push( p ); DirEntry* n = tree->entry( e->next ); if ( n ) queue.push( n ); // not testing if p or n have already been processed; potential infinite loop in case of closed Entry chain // it seems not to happen, though } } std::list Storage::GetAllStreams( const std::wstring& storageName ) { std::list vresult; DirEntry* e = io->dirtree->entry( storageName, false ); if ( e && e->dir ) CollectStreams( vresult, io->dirtree, e, storageName ); return vresult; } // =========== Stream ========== Stream::Stream( Storage* storage, const std::wstring& name, bool bCreate, int64 streamSize ) : io(storage->io->streamIO( name, bCreate, (int) streamSize )) { } // FIXME tell parent we're gone Stream::~Stream() { delete io; } std::wstring Stream::fullName() { return io ? io->fullName : std::wstring(); } uint64 Stream::tell() { return io ? io->tell() : 0; } void Stream::seek( uint64 newpos ) { if( io ) io->seek(newpos); } uint64 Stream::size() { if (!io) return 0; DirEntry *entry = io->io->dirtree->entry(io->entryIdx); return entry->size; } void Stream::setSize(int64 newSize) { if (!io) return; if (newSize < 0) return; //if (newSize > std::numeric_limits::max()) // return; io->setSize(newSize); } int64 Stream::getch() { return io ? io->getch() : 0; } uint64 Stream::read( unsigned char* data, uint64 maxlen ) { return io ? io->read( data, maxlen ) : 0; } uint64 Stream::write( unsigned char* data, uint64 len ) { return io ? io->write( data, len ) : 0; } void Stream::flush() { if (io) io->flush(); } bool Stream::eof() { return io ? io->eof : false; } bool Stream::fail() { return io ? io->fail : true; }