Files
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

2503 lines
68 KiB
C++

/* POLE - Portable C++ library to access OLE Storage
Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
Performance optimization: Dmitry Fedorov
Copyright 2009 <www.bioimage.ucsb.edu> <www.dimin.net>
Fix for more than 236 mbat block entries : Michel Boudinot
Copyright 2010 <Michel.Boudinot@inaf.cnrs-gif.fr>
Considerable rework to allow for creation and updating of structured storage : Stephen Baum
Copyright 2013 <srbaum@gmail.com>
Added GetAllStreams, reworked datatypes
Copyright 2013 Felix Gorny from Bitplane
More datatype changes to allow for 32 and 64 bit code, some fixes involving incremental updates, flushing
Copyright 2013 <srbaum@gmail.com>
Version: 0.5.2
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fstream>
#include <iostream>
#include <list>
#include <string>
#include <vector>
#include <queue>
#include <limits>
#include <map>
#include <cstring>
#include "pole.h"
#include "../../../DesktopEditor/common/File.h"
// enable to activate debugging output
// #define POLE_DEBUG
#define CACHEBUFSIZE 4096 //a presumably reasonable size for the read cache
namespace POLE
{
class Header
{
public:
unsigned char id[8]; // signature, or magic identifier
uint64 b_shift; // bbat->blockSize = 1 << b_shift
uint64 s_shift; // sbat->blockSize = 1 << s_shift
uint64 num_bat; // blocks allocated for big bat
uint64 dirent_start; // starting block for directory info
uint64 threshold; // switch from small to big file (usually 4K)
uint64 sbat_start; // starting block index to store small bat
uint64 num_sbat; // blocks allocated for small bat
uint64 mbat_start; // starting block to store meta bat
uint64 num_mbat; // blocks allocated for meta bat
uint64 bb_blocks[109];
bool dirty; // Needs to be written
Header();
bool valid();
void load( const unsigned char* buffer );
void save( unsigned char* buffer );
void debug();
};
class AllocTable
{
public:
AllocTable();
static const uint64 Eof;
static const uint64 Avail;
static const uint64 Bat;
static const uint64 MetaBat;
uint64 blockSize;
void clear()
{
data.clear();
dirtyBlocks.clear();
}
uint64 count();
uint64 unusedCount();
void resize( uint64 newsize );
void preserve( uint64 n );
void set( uint64 index, uint64 val );
unsigned unused();
void setChain( std::vector<uint64> );
std::vector<uint64> follow( uint64 start );
uint64 operator[](uint64 index );
void load( const unsigned char* buffer, uint64 len );
void save( unsigned char* buffer );
uint64 size();
void debug();
bool isDirty();
void markAsDirty(uint64 dataIndex, int64 bigBlockSize);
void flush(std::vector<uint64> blocks, StorageIO *const io, int64 bigBlockSize);
private:
std::vector<uint64> data;
std::map<uint64, char> dirtyBlocks;
bool bMaybeFragmented;
AllocTable( const AllocTable& );
AllocTable& operator=( const AllocTable& );
};
class DirEntry
{
public:
DirEntry(): valid(), name(), dir(), size(), start(), prev(), next(), child() {}
bool valid; // false if invalid (should be skipped)
wchar_t prefix;
std::wstring name; // the name, not in unicode anymore
bool dir; // true if directory
uint64 size; // size (not valid if directory)
uint64 start; // starting block
uint64 prev; // previous sibling
uint64 next; // next sibling
uint64 child; // first child
int compare(const DirEntry& de);
int compare(const std::wstring& name2);
};
class DirTree
{
public:
static const uint64 End;
DirTree(int64 bigBlockSize);
void clear(int64 bigBlockSize);
inline uint64 entryCount();
uint64 unusedEntryCount();
DirEntry* entry( uint64 index );
DirEntry* entry( const std::wstring& name, bool create = false, int64 bigBlockSize = 0, StorageIO *const io = 0, int64 streamSize = 0);
int64 indexOf( DirEntry* e );
int64 parent( uint64 index );
std::wstring fullName( uint64 index );
std::vector<uint64> children( uint64 index );
uint64 find_child( uint64 index, const std::wstring& name, uint64 &closest );
void load( unsigned char* buffer, uint64 len );
void save( unsigned char* buffer );
uint64 size();
void debug();
bool isDirty();
void markAsDirty(uint64 dataIndex, int64 bigBlockSize);
void flush(std::vector<uint64> blocks, StorageIO *const io, int64 bigBlockSize, uint64 sb_start, uint64 sb_size);
uint64 unused();
void findParentAndSib(uint64 inIdx, const std::wstring& inFullName, uint64 &parentIdx, uint64 &sibIdx);
uint64 findSib(uint64 inIdx, uint64 sibIdx);
void deleteEntry(DirEntry *entry, const std::wstring& inFullName, int64 bigBlockSize);
private:
std::vector<DirEntry> entries;
std::map<uint64, char> dirtyBlocks;
DirTree( const DirTree& );
DirTree& operator=( const DirTree& );
};
class StorageIO
{
public:
Storage* storage; // owner
std::wstring filename; // filename
std::fstream file; // associated with above name
int64 result; // result of operation
bool opened; // true if file is opened
uint64 filesize; // size of the file
bool writeable; // true if the file can be modified
Header* header; // storage header
DirTree* dirtree; // directory tree
AllocTable* bbat; // allocation table for big blocks
AllocTable* sbat; // allocation table for small blocks
std::vector<uint64> sb_blocks; // blocks for "small" files
std::vector<uint64> mbat_blocks; // blocks for doubly indirect indices to big blocks
std::vector<uint64> mbat_data; // the additional indices to big blocks
bool mbatDirty; // If true, mbat_blocks need to be written
std::list<Stream*> streams;
StorageIO( Storage* storage, const wchar_t* filename );
~StorageIO();
bool open(bool bWriteAccess = false, bool bCreate = false);
void close();
void flush();
void load(bool bWriteAccess);
void create();
void init();
bool deleteByName(const std::wstring& fullName);
bool deleteNode(DirEntry *entry, const std::wstring& fullName);
bool deleteLeaf(DirEntry *entry, const std::wstring& fullName);
uint64 loadBigBlocks( std::vector<uint64> blocks, unsigned char* buffer, uint64 maxlen );
uint64 loadBigBlock( uint64 block, unsigned char* buffer, uint64 maxlen );
uint64 saveBigBlocks( std::vector<uint64> blocks, uint64 offset, unsigned char* buffer, uint64 len );
uint64 saveBigBlock( uint64 block, uint64 offset, unsigned char*buffer, uint64 len );
uint64 loadSmallBlocks( std::vector<uint64> blocks, unsigned char* buffer, uint64 maxlen );
uint64 loadSmallBlock( uint64 block, unsigned char* buffer, uint64 maxlen );
uint64 saveSmallBlocks( std::vector<uint64> blocks, uint64 offset, unsigned char* buffer, uint64 len, int64 startAtBlock = 0 );
uint64 saveSmallBlock( uint64 block, uint64 offset, unsigned char* buffer, uint64 len );
StreamIO* streamIO( const std::wstring& name, bool bCreate = false, int64 streamSize = 0 );
void flushbbat();
void flushsbat();
std::vector<uint64> getbbatBlocks(bool bLoading);
uint64 ExtendFile( std::vector<uint64> *chain );
void addbbatBlock();
private:
// no copy or assign
StorageIO( const StorageIO& );
StorageIO& operator=( const StorageIO& );
};
class StreamIO
{
public:
StorageIO* io;
int64 entryIdx; //needed because a pointer to DirEntry will change whenever entries vector changes.
std::wstring fullName;
bool eof;
bool fail;
StreamIO( StorageIO* io, DirEntry* entry );
~StreamIO();
uint64 size();
void setSize(uint64 newSize);
void seek( uint64 pos );
uint64 tell();
int64 getch();
uint64 read( unsigned char* data, uint64 maxlen );
uint64 read( uint64 pos, unsigned char* data, uint64 maxlen );
uint64 write( unsigned char* data, uint64 len );
uint64 write( uint64 pos, unsigned char* data, uint64 len );
void flush();
private:
std::vector<uint64> blocks;
// no copy or assign
StreamIO( const StreamIO& );
StreamIO& operator=( const StreamIO& );
// pointer for read
uint64 m_pos;
// simple cache system to speed-up getch()
unsigned char* cache_data;
uint64 cache_size;
uint64 cache_pos;
void updateCache();
};
}; // namespace POLE
using namespace POLE;
#ifdef POLE_USE_UTF16_FILENAMES
#else
#include "../../../DesktopEditor/common/File.h"
static std::string stringWToUtf8String (const std::wstring& aaSrc)
{
return U_TO_UTF8(aaSrc);
}
#endif //POLE_USE_UTF16_FILENAMES
static void fileCheck(std::fstream &file)
{
bool bGood, bFail, bEof, bBad;
bool bNOTOK;
bGood = file.good();
bFail = file.fail();
bEof = file.eof();
bBad = file.bad();
if (bFail || bEof || bBad)
bNOTOK = true; //this doesn't really do anything, but it is a good place to set a breakpoint!
file.clear();
}
static inline uint32 readU16( const unsigned char* ptr )
{
return ptr[0]+(ptr[1]<<8);
}
static inline uint32 readU32( const unsigned char* ptr )
{
return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
}
static inline void writeU16( unsigned char* ptr, uint32 data )
{
ptr[0] = (unsigned char)(data & 0xff);
ptr[1] = (unsigned char)((data >> 8) & 0xff);
}
static inline void writeU32( unsigned char* ptr, uint32 data )
{
ptr[0] = (unsigned char)(data & 0xff);
ptr[1] = (unsigned char)((data >> 8) & 0xff);
ptr[2] = (unsigned char)((data >> 16) & 0xff);
ptr[3] = (unsigned char)((data >> 24) & 0xff);
}
static const unsigned char pole_magic[] =
{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
// =========== Header ==========
Header::Header()
: b_shift(9), // [1EH,02] size of sectors in power-of-two; typically 9 indicating 512-byte sectors
s_shift(6), // [20H,02] size of mini-sectors in power-of-two; typically 6 indicating 64-byte mini-sectors
num_bat(0), // [2CH,04] number of SECTs in the FAT chain
dirent_start(0), // [30H,04] first SECT in the directory chain
threshold(4096), // [38H,04] maximum size for a mini stream; typically 4096 bytes
sbat_start(0), // [3CH,04] first SECT in the MiniFAT chain
num_sbat(0), // [40H,04] number of SECTs in the MiniFAT chain
mbat_start(AllocTable::Eof),// [44H,04] first SECT in the DIFAT chain
num_mbat(0), // [48H,04] number of SECTs in the DIFAT chain
dirty(true)
{
for( unsigned int i = 0; i < 8; i++ )
id[i] = pole_magic[i];
for( unsigned int i=0; i<109; i++ )
bb_blocks[i] = AllocTable::Avail;
}
bool Header::valid()
{
if( threshold != 4096 ) return false;
if( num_bat == 0 ) return false;
//if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false; // dima: incorrect check, number may be arbitrary larger
if( (num_bat < 109) && (num_mbat != 0) ) return false;
if( s_shift > b_shift ) return false;
if( b_shift <= 6 ) return false;
if( b_shift >=31 ) return false;
return true;
}
void Header::load( const unsigned char* buffer ) {
b_shift = readU16( buffer + 0x1e ); // [1EH,02] size of sectors in power-of-two; typically 9 indicating 512-byte sectors and 12 for 4096
s_shift = readU16( buffer + 0x20 ); // [20H,02] size of mini-sectors in power-of-two; typically 6 indicating 64-byte mini-sectors
num_bat = readU32( buffer + 0x2c ); // [2CH,04] number of SECTs in the FAT chain
dirent_start = readU32( buffer + 0x30 ); // [30H,04] first SECT in the directory chain
threshold = readU32( buffer + 0x38 ); // [38H,04] maximum size for a mini stream; typically 4096 bytes
sbat_start = readU32( buffer + 0x3c ); // [3CH,04] first SECT in the MiniFAT chain
num_sbat = readU32( buffer + 0x40 ); // [40H,04] number of SECTs in the MiniFAT chain
mbat_start = readU32( buffer + 0x44 ); // [44H,04] first SECT in the DIFAT chain
num_mbat = readU32( buffer + 0x48 ); // [48H,04] number of SECTs in the DIFAT chain
if (threshold == 0)
threshold = 4096; //поле не задано ... файл xls от полльзователя
for( unsigned int i = 0; i < 8; i++ )
id[i] = buffer[i];
// [4CH,436] the SECTs of first 109 FAT sectors
for( unsigned int i=0; i<109; i++ )
bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
dirty = false;
}
void Header::save( unsigned char* buffer )
{
memset( buffer, 0, 0x4c );
memcpy( buffer, pole_magic, 8 ); // ole signature
writeU32( buffer + 8, 0 ); // reserved must be zero
writeU32( buffer + 12, 0 ); // reserved must be zero
writeU32( buffer + 16, 0 ); // reserved must be zero
writeU16( buffer + 24, 0x003e ); // minor version of the format: 33
writeU16( buffer + 26, 3 ); // major version (512 clasters)
writeU16( buffer + 28, 0xfffe ); // indicates Intel byte-ordering
writeU16( buffer + 0x1e, (uint32) b_shift ); //size of sectors in power-of-two
writeU16( buffer + 0x20, (uint32) s_shift );
writeU32( buffer + 0x2c, (uint32) num_bat );
writeU32( buffer + 0x30, (uint32) dirent_start );
writeU32( buffer + 0x38, (uint32) threshold );
writeU32( buffer + 0x3c, (uint32) sbat_start );
writeU32( buffer + 0x40, (uint32) num_sbat );
writeU32( buffer + 0x44, (uint32) mbat_start );
writeU32( buffer + 0x48, (uint32) num_mbat );
for( unsigned int i = 0; i < 109; i++ )
writeU32( buffer + 0x4C + i * 4, (uint32) bb_blocks[i] );
dirty = false;
}
void Header::debug()
{
std::cout << std::endl;
std::cout << "b_shift " << b_shift << std::endl;
std::cout << "s_shift " << s_shift << std::endl;
std::cout << "num_bat " << num_bat << std::endl;
std::cout << "dirent_start " << dirent_start << std::endl;
std::cout << "threshold " << threshold << std::endl;
std::cout << "sbat_start " << sbat_start << std::endl;
std::cout << "num_sbat " << num_sbat << std::endl;
std::cout << "mbat_start " << mbat_start << std::endl;
std::cout << "num_mbat " << num_mbat << std::endl;
uint64 s = (num_bat<=109) ? num_bat : 109;
std::cout << "bat blocks: ";
for( uint64 i = 0; i < s; i++ )
std::cout << bb_blocks[i] << " ";
std::cout << std::endl;
}
// =========== AllocTable ==========
const uint64 AllocTable::Avail = 0xffffffff;
const uint64 AllocTable::Eof = 0xfffffffe;
const uint64 AllocTable::Bat = 0xfffffffd;
const uint64 AllocTable::MetaBat = 0xfffffffc;
AllocTable::AllocTable()
: blockSize(4096),
bMaybeFragmented(true)
{
// initial size
resize( 128 );
}
uint64 AllocTable::count()
{
return static_cast<uint64>(data.size());
}
uint64 AllocTable::unusedCount()
{
uint64 maxIdx = count();
uint64 nFound = 0;
for (uint64 idx = 0; idx < maxIdx; idx++)
{
if( data[idx] == Avail )
nFound++;
}
return nFound;
}
void AllocTable::resize( uint64 newsize )
{
uint64 oldsize = static_cast<uint64>(data.size());
data.resize( newsize );
if( newsize > oldsize )
for( uint64 i = oldsize; i<newsize; i++ )
data[i] = Avail;
}
// make sure there're still free blocks
void AllocTable::preserve( uint64 n )
{
std::vector<uint64> pre;
for( unsigned i=0; i < n; i++ )
pre.push_back( unused() );
}
uint64 AllocTable::operator[]( uint64 index )
{
uint64 result;
result = data[index];
return result;
}
void AllocTable::set( uint64 index, uint64 value )
{
if( index >= count() ) resize( index + 1);
data[ index ] = value;
if (value == Avail)
bMaybeFragmented = true;
}
void AllocTable::setChain( std::vector<uint64> chain )
{
if( chain.size() )
{
for( unsigned i=0; i<chain.size()-1; i++ )
set( chain[i], chain[i+1] );
set( chain[ chain.size()-1 ], AllocTable::Eof );
}
}
// follow
std::vector<uint64> AllocTable::follow( uint64 start )
{
std::vector<uint64> chain;
if( start >= count() ) return chain;
uint64 p = start;
std::map<uint64, char> used;
used.insert(std::make_pair(start, (char)0));
while (p < count())
{
if (p == (uint64)Eof) break;
if (p == (uint64)Bat) break;
if (p == (uint64)MetaBat) break;
if (p >= count()) break;
chain.push_back(p);
if (data[p] >= count()) break;
if (used.find(data[p]) != used.end())
{
//cycle
break;
}
p = data[p];
used.insert(std::make_pair(p, (char)0));
}
return chain;
}
unsigned AllocTable::unused()
{
// find first available block
unsigned int maxIdx = (unsigned int) data.size();
if (bMaybeFragmented)
{
for( unsigned i = 0; i < maxIdx; i++ )
if( data[i] == Avail )
return i;
}
// completely full, so enlarge the table
unsigned int block = maxIdx;
resize( maxIdx );
bMaybeFragmented = false;
return block;
}
void AllocTable::load( const unsigned char* buffer, uint64 len )
{
resize( len / 4 );
for( unsigned i = 0; i < count(); i++ )
set( i, readU32( buffer + i*4 ) );
}
// return space required to save this dirtree
uint64 AllocTable::size()
{
return count() * 4;
}
void AllocTable::save( unsigned char* buffer )
{
for( uint64 i = 0; i < count(); i++ )
writeU32( buffer + i*4, (uint32) data[i] );
}
bool AllocTable::isDirty()
{
return (false == dirtyBlocks.empty());
}
void AllocTable::markAsDirty(uint64 dataIndex, int64 bigBlockSize)
{
uint64 dbidx = dataIndex / (bigBlockSize / sizeof(uint32));
std::map<uint64, char>::iterator pFind = dirtyBlocks.find(dbidx);
if (pFind == dirtyBlocks.end())
dirtyBlocks.insert(std::make_pair(dbidx, 0));
}
void AllocTable::flush(std::vector<uint64> blocks, StorageIO *const io, int64 bigBlockSize)
{
unsigned char *buffer = new unsigned char[bigBlockSize * blocks.size()];
memset(buffer, 0, bigBlockSize * blocks.size());
save(buffer);
for (uint64 idx = 0; idx < static_cast<uint64>(blocks.size()); idx++)
{
std::map<uint64, char>::iterator pFind = dirtyBlocks.find(idx);
if (pFind != dirtyBlocks.end())
io->saveBigBlock(blocks[idx], 0, &buffer[bigBlockSize * idx], bigBlockSize);
}
dirtyBlocks.clear();
delete[] buffer;
}
void AllocTable::debug()
{
std::cout << "block size " << data.size() << std::endl;
for( unsigned i=0; i< data.size(); i++ )
{
if( data[i] == Avail ) continue;
std::cout << i << ": ";
if( data[i] == Eof ) std::cout << "[eof]";
else if( data[i] == Bat ) std::cout << "[bat]";
else if( data[i] == MetaBat ) std::cout << "[metabat]";
else std::cout << data[i];
std::cout << std::endl;
}
}
// =========== DirEntry ==========
// "A node with a shorter name is less than a node with a inter name"
// "For nodes with the same length names, compare the two names."
// --Windows Compound Binary File Format Specification, Section 2.5
int DirEntry::compare(const DirEntry& de)
{
return compare(de.name);
}
int DirEntry::compare(const std::wstring& name2)
{
if (name.length() < name2.length())
return -1;
else if (name.length() > name2.length())
return 1;
else
return name.compare(name2);
}
// =========== DirTree ==========
const uint64 DirTree::End = 0xffffffff;
DirTree::DirTree(int64 bigBlockSize)
{
clear(bigBlockSize);
}
void DirTree::clear(int64 bigBlockSize)
{
// leave only root entry
entries.resize( 1 );
entries[0].valid = true;
entries[0].name = L"Root Entry";
entries[0].dir = true;
entries[0].size = 0;
entries[0].start = End;
entries[0].prev = End;
entries[0].next = End;
entries[0].child = End;
markAsDirty(0, bigBlockSize);
}
inline uint64 DirTree::entryCount()
{
return entries.size();
}
uint64 DirTree::unusedEntryCount()
{
uint64 nFound = 0;
for (uint64 idx = 0; idx < entryCount(); idx++)
{
if (!entries[idx].valid)
nFound++;
}
return nFound;
}
DirEntry* DirTree::entry( uint64 index )
{
if( index >= entryCount() ) return (DirEntry*) 0;
return &entries[ index ];
}
int64 DirTree::indexOf( DirEntry* e )
{
for( uint64 i = 0; i < entryCount(); i++ )
if( entry( i ) == e ) return i;
return -1;
}
int64 DirTree::parent( uint64 index )
{
// brute-force, basically we iterate for each entries, find its children
// and check if one of the children is 'index'
for( uint64 j=0; j<entryCount(); j++ )
{
std::vector<uint64> chi = children( j );
for( size_t i=0; i<chi.size();i++ )
if( chi[i] == index )
return j;
}
return -1;
}
std::wstring DirTree::fullName( uint64 index )
{
// don't use root name ("Root Entry"), just give "/"
if( index == 0 ) return L"/";
std::wstring result = entry( index )->name;
result.insert( 0, L"/" );
uint64 p = parent( index );
DirEntry * _entry = 0;
while( p > 0 )
{
_entry = entry( p );
if (_entry->dir && _entry->valid)
{
result.insert( 0, _entry->name);
result.insert( 0, L"/" );
}
--p;
index = p;
if( index <= 0 ) break;
}
return result;
}
// given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
// if not found and create is false, return 0
// if create is true, a new entry is returned
DirEntry* DirTree::entry( const std::wstring& name, bool create, int64 bigBlockSize, StorageIO *const io, int64 streamSize)
{
if( name.empty() ) return (DirEntry*)0;
// quick check for "/" (that's root)
if( name == L"/" ) return entry( 0 );
// split the names, e.g "/ObjectPool/_1020961869" will become:
// "ObjectPool" and "_1020961869"
std::list<std::wstring> names;
std::wstring::size_type start = 0, end = 0;
if( name[0] == '/' ) start++;
int levelsLeft = 0;
while( start < name.length() )
{
end = name.find_first_of( L'/', start );
if( end == std::wstring::npos ) end = name.length();
names.push_back( name.substr( start, end-start ) );
levelsLeft++;
start = end+1;
}
// start from root
int64 index = 0;
// trace one by one
std::list<std::wstring>::iterator it;
for( it = names.begin(); it != names.end(); ++it )
{
// find among the children of index
levelsLeft--;
uint64 child = 0;
// dima: this block is really inefficient
//std::vector<uint64> chi = children( index );
//for( size_t i = 0; i < chi.size(); i++ )
//{
// DirEntry* ce = entry( chi[i] );
// if( ce )
// if( ce->valid && ( ce->name !="/" ) )
// if( ce->name == *it ) {
// child = chi[i];
// break;
// }
//}
// dima: performance optimisation of the previous
uint64 closest = End;
child = find_child( index, *it, closest );
// traverse to the child
if( child > 0 )
{
index = child;
}
else if( !create || !io->writeable)
{
std::vector<uint64> chi = children( index );
for( size_t i = 0; i < chi.size(); i++ )
{
DirEntry* ce = entry( chi[i] );
if( ce )
if( ce->valid && ( ce->name != L"/" )/*( ce->name.length()>1 )*/ )
if( ce->name == *it )
{
child = chi[i];
break;
}
}
}
if( child > 0 )
{
index = child;
}
else
{
// not found among children ..wrong header???
if( !create || !io->writeable)
{
return (DirEntry*)0;
}
// create a new entry
uint64 parent2 = index;
index = unused();
DirEntry* e = entry( index );
e->valid = true;
e->name = *it;
e->dir = (levelsLeft > 0);
if (!e->dir)
e->size = streamSize;
else
e->size = 0;
e->start = AllocTable::Eof;
e->child = End;
if (closest == End)
{
e->prev = End;
e->next = entry(parent2)->child;
entry(parent2)->child = index;
markAsDirty(parent2, bigBlockSize);
}
else
{
DirEntry* closeE = entry( closest );
if (closeE->compare(*e) < 0)
{
e->prev = closeE->next;
e->next = End;
closeE->next = index;
}
else
{
e->next = closeE->prev;
e->prev = End;
closeE->prev = index;
}
markAsDirty(closest, bigBlockSize);
}
markAsDirty(index, bigBlockSize);
uint64 bbidx = index / (bigBlockSize / 128);
std::vector <uint64> blocks = io->bbat->follow(io->header->dirent_start);
while (blocks.size() <= bbidx)
{
uint64 nblock = io->bbat->unused();
if (blocks.size() > 0)
{
io->bbat->set(blocks[static_cast<uint64>(blocks.size())-1], nblock);
io->bbat->markAsDirty(blocks[static_cast<uint64>(blocks.size())-1], bigBlockSize);
}
io->bbat->set(nblock, AllocTable::Eof);
io->bbat->markAsDirty(nblock, bigBlockSize);
blocks.push_back(nblock);
uint64 bbidx = nblock / (io->bbat->blockSize / sizeof(uint64));
while (bbidx >= io->header->num_bat)
io->addbbatBlock();
}
}
}
return entry( index );
}
// helper function: recursively find siblings of index
void dirtree_find_siblings( DirTree* dirtree, std::vector<uint64>& result,
uint64 index )
{
DirEntry* e = dirtree->entry( index );
if (!e) return;
if (e->prev != DirTree::End)
dirtree_find_siblings(dirtree, result, e->prev);
result.push_back(index);
if (e->next != DirTree::End)
dirtree_find_siblings(dirtree, result, e->next);
}
std::vector<uint64> DirTree::children( uint64 index )
{
std::vector<uint64> result;
DirEntry* e = entry( index );
if( e ) if( e->valid && e->child < entryCount() )
dirtree_find_siblings( this, result, e->child );
return result;
}
uint64 dirtree_find_sibling( DirTree* dirtree, uint64 index, const std::wstring& name, uint64& closest ) {
uint64 count = dirtree->entryCount();
DirEntry* e = dirtree->entry( index );
if (!e || !e->valid) return 0;
int cval = e->compare(name);
if (cval == 0)
return index;
if (cval > 0)
{
if (e->prev > 0 && e->prev < count)
return dirtree_find_sibling( dirtree, e->prev, name, closest );
}
else
{
if (e->next > 0 && e->next < count)
return dirtree_find_sibling( dirtree, e->next, name, closest );
}
closest = index;
return 0;
}
uint64 DirTree::find_child( uint64 index, const std::wstring& name, uint64& closest ) {
uint64 count = entryCount();
DirEntry* p = entry( index );
if (p && p->valid && p->child < count )
return dirtree_find_sibling( this, p->child, name, closest );
return 0;
}
void DirTree::load( unsigned char* buffer, uint64 size )
{
entries.clear();
for( uint64 i = 0; i < size/128; i++ )
{
uint64 p = i * 128;
// would be < 32 if first char in the name isn't printable
unsigned char prefix = 32;
// parse name of this entry, which stored as Unicode 16-bit
int name_len = readU16( buffer + 0x40 + p );
if( name_len > 64 ) name_len = 64;
//std::string name;
//for( int j = 0; ( buffer[j + p]) && ( j < name_len); j+= 2 )
// name.append( 1, buffer[j + p] );
//name = std::string((char*)buffer + p, name_len);
// first char isn't printable ? remove it...
std::wstring name;
if (name_len > 1)
{
if (sizeof(wchar_t) == 2)
{
name = std::wstring((wchar_t*)(buffer + p), name_len / 2 - 1);
}
else
{
name = convertUtf16ToWString((UTF16*)(buffer + p), name_len / 2 - 1);
}
if( name[0] < 32 )
{
prefix = name[0];
name.erase( 0, 1 );
}
}
// 2 = file (aka stream), 1 = directory (aka storage), 5 = root
unsigned type = buffer[ 0x42 + p];
DirEntry e;
e.valid = ( type != 0 );
e.name = name;
e.prefix = prefix;
e.start = readU32( buffer + 0x74+p );
e.size = readU32( buffer + 0x78+p );
e.prev = readU32( buffer + 0x44+p );
e.next = readU32( buffer + 0x48+p );
e.child = readU32( buffer + 0x4C+p );
e.dir = ( type!=2 );
// sanity checks
if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false;
if( name_len < 1 && false == entries.empty()) e.valid = false; //conv_WiwbV5sztXdFmZC6XhI__xlsx.xls
entries.push_back( e );
}
}
// return space required to save this dirtree
uint64 DirTree::size()
{
return entryCount() * 128;
}
void DirTree::save( unsigned char* buffer )
{
memset( buffer, 0, size() );
// root is fixed as "Root Entry"
DirEntry* root = entry( 0 );
std::wstring name = L"Root Entry";
if (sizeof(wchar_t) == 2)
{
memcpy( buffer, (unsigned char*)name.c_str(), name.length() * 2);
}
else
{
unsigned char* pData = NULL;
int size = 0;
convertWStringToUtf16(name, pData, size);
if (pData)
{
memcpy( buffer, pData, size);
delete []pData;
}
}
writeU16( buffer + 0x40, static_cast<uint32>(name.length() * 2 + 2) );
writeU32( buffer + 0x74, 0xffffffff );
writeU32( buffer + 0x78, 0 );
writeU32( buffer + 0x44, 0xffffffff );
writeU32( buffer + 0x48, 0xffffffff );
writeU32( buffer + 0x4c, (uint32) root->child );
buffer[ 0x42 ] = 5;
//buffer[ 0x43 ] = 1;
for( unsigned int i = 1; i < entryCount(); i++ )
{
DirEntry* e = entry( i );
if( !e ) continue;
if( e->dir )
{
e->start = 0xffffffff;
e->size = 0;
}
// max length for name is 32 chars
std::wstring name = e->name;
if( name.length() > 32 )
{
name.erase( 32, name.length() );
}
if (sizeof(wchar_t) == 2)
{
memcpy( buffer + i * 128, (unsigned char*)name.c_str(), name.length() * 2);
}
else
{
unsigned char* pData = NULL;
int size = 0;
convertWStringToUtf16(name, pData, size);
if (pData)
{
memcpy( buffer + i * 128, pData, size);
delete []pData;
}
}
writeU16( buffer + i*128 + 0x40, static_cast<uint32>(name.length()*2 + 2) );
writeU32( buffer + i*128 + 0x74, (uint32) e->start );
writeU32( buffer + i*128 + 0x78, (uint32) e->size );
writeU32( buffer + i*128 + 0x44, (uint32) e->prev );
writeU32( buffer + i*128 + 0x48, (uint32) e->next );
writeU32( buffer + i*128 + 0x4c, (uint32) e->child );
if (!e->valid)
buffer[ i*128 + 0x42 ] = 0; //STGTY_INVALID
else
buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2; //STGTY_STREAM or STGTY_STORAGE
buffer[ i*128 + 0x43 ] = 1; // always black
}
}
bool DirTree::isDirty()
{
return (dirtyBlocks.empty() == false);
}
void DirTree::markAsDirty(uint64 dataIndex, int64 bigBlockSize)
{
uint64 dbidx = dataIndex / (bigBlockSize / 128);
std::map<uint64, char>::iterator pFind = dirtyBlocks.find(dbidx);
if (pFind == dirtyBlocks.end())
dirtyBlocks.insert(std::make_pair(dbidx, 0));
}
void DirTree::flush(std::vector<uint64> blocks, StorageIO *const io, int64 bigBlockSize, uint64 sb_start, uint64 sb_size)
{
uint64 bufLen = size();
unsigned char *buffer = new unsigned char[bufLen];
memset(buffer, 0, bufLen);
save(buffer);
writeU32( buffer + 0x74, (uint32) sb_start );
writeU32( buffer + 0x78, (uint32) sb_size );
for (uint64 idx = 0; idx < static_cast<uint64>(blocks.size()); idx++)
{
std::map<uint64, char>::iterator pFind = dirtyBlocks.find(idx);
if (pFind != dirtyBlocks.end())
{
uint64 bytesToWrite = bigBlockSize;
uint64 pos = bigBlockSize * idx;
if ((bufLen - pos) < bytesToWrite)
bytesToWrite = bufLen - pos;
io->saveBigBlock(blocks[idx], 0, &buffer[pos], bytesToWrite);
}
}
dirtyBlocks.clear();
delete[] buffer;
}
uint64 DirTree::unused()
{
for (uint64 idx = 0; idx < static_cast<uint64>(entryCount()); idx++)
{
if (!entries[idx].valid)
return idx;
}
entries.push_back(DirEntry());
return entryCount()-1;
}
// Utility function to get the index of the parent dirEntry, given that we already have a full name it is relatively fast.
// Then look for a sibling dirEntry that points to inIdx. In some circumstances, the dirEntry at inIdx will be the direct child
// of the parent, in which case sibIdx will be returned as 0. A failure is indicated if both parentIdx and sibIdx are returned as 0.
void DirTree::findParentAndSib(uint64 inIdx, const std::wstring& inFullName, uint64& parentIdx, uint64& sibIdx)
{
sibIdx = 0;
parentIdx = 0;
if (inIdx == 0 || inIdx >= entryCount() || inFullName == L"/" || inFullName == L"")
return;
std::wstring localName = inFullName;
if (localName[0] != L'/')
localName = L'/' + localName;
std::wstring parentName = localName;
if (parentName[parentName.size()-1] == L'/')
parentName = parentName.substr(0, parentName.size()-1);
std::wstring::size_type lastSlash;
lastSlash = parentName.find_last_of(L'/');
if (lastSlash == std::wstring::npos)
return;
if (lastSlash == 0)
lastSlash = 1; //leave root
parentName = parentName.substr(0, lastSlash);
DirEntry *parent2 = entry(parentName);
parentIdx = indexOf(parent2);
if (parent2->child == inIdx)
return; //successful return, no sibling points to inIdx
sibIdx = findSib(inIdx, parent2->child);
}
// Utility function to get the index of the sibling dirEntry which points to inIdx. It is the responsibility of the original caller
// to start with the root sibling - i.e., sibIdx should be pointed to by the parent node's child.
uint64 DirTree::findSib(uint64 inIdx, uint64 sibIdx)
{
DirEntry *sib = entry(sibIdx);
if (!sib || !sib->valid)
return 0;
if (sib->next == inIdx || sib->prev == inIdx)
return sibIdx;
DirEntry *targetSib = entry(inIdx);
int cval = sib->compare(*targetSib);
if (cval > 0)
return findSib(inIdx, sib->prev);
else
return findSib(inIdx, sib->next);
}
void DirTree::deleteEntry(DirEntry *dirToDel, const std::wstring& inFullName, int64 bigBlockSize)
{
uint64 parentIdx;
uint64 sibIdx;
uint64 inIdx = indexOf(dirToDel);
uint64 nEntries = entryCount();
findParentAndSib(inIdx, inFullName, parentIdx, sibIdx);
uint64 replIdx;
if (!dirToDel->next || dirToDel->next > nEntries)
replIdx = dirToDel->prev;
else
{
DirEntry *sibNext = entry(dirToDel->next);
if (!sibNext->prev || sibNext->prev > nEntries)
{
replIdx = dirToDel->next;
sibNext->prev = dirToDel->prev;
markAsDirty(replIdx, bigBlockSize);
}
else
{
DirEntry *smlSib = sibNext;
int64 smlIdx = dirToDel->next;
DirEntry *smlrSib;
int64 smlrIdx = -1;
for ( ; ; )
{
smlrIdx = smlSib->prev;
smlrSib = entry(smlrIdx);
if (!smlrSib->prev || smlrSib->prev > nEntries)
break;
smlSib = smlrSib;
smlIdx = smlrIdx;
}
replIdx = smlSib->prev;
smlSib->prev = smlrSib->next;
smlrSib->prev = dirToDel->prev;
smlrSib->next = dirToDel->next;
markAsDirty(smlIdx, bigBlockSize);
markAsDirty(smlrIdx, bigBlockSize);
}
}
if (sibIdx)
{
DirEntry *sib = entry(sibIdx);
if (sib->next == inIdx)
sib->next = replIdx;
else
sib->prev = replIdx;
markAsDirty(sibIdx, bigBlockSize);
}
else
{
DirEntry *parNode = entry(parentIdx);
parNode->child = replIdx;
markAsDirty(parentIdx, bigBlockSize);
}
dirToDel->valid = false; //indicating that this entry is not in use
markAsDirty(inIdx, bigBlockSize);
}
void DirTree::debug()
{
for( unsigned i = 0; i < entryCount(); i++ )
{
DirEntry* e = entry( i );
if( !e ) continue;
std::cout << i << ": ";
if( !e->valid ) std::cout << L"INVALID ";
std::wcout << e->name << L" ";
if( e->dir ) std::cout << L"(Dir) ";
else std::cout << L"(File) ";
std::cout << e->size << L" ";
std::cout << L"s:" << e->start << L" ";
std::cout << L"(";
if( e->child == End ) std::cout << L"-"; else std::cout << e->child;
std::cout << " ";
if( e->prev == End ) std::cout << L"-"; else std::cout << e->prev;
std::cout << L":";
if( e->next == End ) std::cout << L"-"; else std::cout << e->next;
std::cout << L")";
std::cout << std::endl;
}
}
// =========== StorageIO ==========
StorageIO::StorageIO( Storage* st, const wchar_t* fname )
: storage(st),
filename(fname),
file(),
result(Storage::Ok),
opened(false),
filesize(0),
writeable(false),
header(new Header()),
dirtree(new DirTree(1 << header->b_shift)),
bbat(new AllocTable()),
sbat(new AllocTable()),
sb_blocks(),
mbat_blocks(),
mbat_data(),
mbatDirty(),
streams()
{
bbat->blockSize = (uint64) 1 << header->b_shift;
sbat->blockSize = (uint64) 1 << header->s_shift;
}
StorageIO::~StorageIO()
{
if( opened ) close();
delete sbat;
delete bbat;
delete dirtree;
delete header;
}
bool StorageIO::open(bool bWriteAccess, bool bCreate)
{
// already opened ? close first
if (opened)
close();
if (bCreate)
{
create();
init();
writeable = true;
}
else
{
writeable = bWriteAccess;
load(bWriteAccess);
}
return result == Storage::Ok;
}
void StorageIO::load(bool bWriteAccess)
{
unsigned char* buffer = 0;
uint64 buflen = 0;
std::vector<uint64> blocks;
// open the file, check for error
result = Storage::OpenFailed;
#if defined(POLE_USE_UTF16_FILENAMES)
if (bWriteAccess)
file.open(filename.c_str(), std::ios::binary | std::ios::in | std::ios::out);
else
file.open(filename.c_str(), std::ios::binary | std::ios::in);
#else
if (bWriteAccess)
file.open(stringWToUtf8String(filename), std::ios::binary | std::ios::in | std::ios::out);
else
file.open(stringWToUtf8String(filename), std::ios::binary | std::ios::in);
#endif //defined(POLE_USE_UTF16_FILENAMES) && defined(POLE_WIN)
if( !file.good() ) return;
// find size of input file
file.seekg(0, std::ios::end );
filesize = static_cast<uint64>(file.tellg());
// load header
buffer = new unsigned char[512];
file.seekg( 0 );
file.read( (char*)buffer, 512 );
fileCheck(file);
header->load( buffer );
delete[] buffer;
// check OLE magic id
result = Storage::NotOLE;
for( unsigned i=0; i<8; i++ )
if( header->id[i] != pole_magic[i] )
return;
// sanity checks
result = Storage::BadOLE;
if( !header->valid() ) return;
if( header->threshold != 4096 ) return;
// important block size
bbat->blockSize = (uint64) 1 << header->b_shift;
sbat->blockSize = (uint64) 1 << header->s_shift;
blocks = getbbatBlocks(true);
// load big bat
buflen = static_cast<uint64>(blocks.size())*bbat->blockSize;
if( buflen > 0 )
{
buffer = new unsigned char[ buflen ];
loadBigBlocks( blocks, buffer, buflen );
bbat->load( buffer, buflen );
delete[] buffer;
}
// load small bat
blocks.clear();
blocks = bbat->follow( header->sbat_start );
buflen = static_cast<uint64>(blocks.size())*bbat->blockSize;
if( buflen > 0 )
{
buffer = new unsigned char[ buflen ];
loadBigBlocks( blocks, buffer, buflen );
sbat->load( buffer, buflen );
delete[] buffer;
}
// load directory tree
blocks.clear();
blocks = bbat->follow( header->dirent_start );
buflen = static_cast<uint64>(blocks.size())*bbat->blockSize;
if (buflen > 0)
{
buffer = new unsigned char[ buflen ];
loadBigBlocks( blocks, buffer, buflen );
dirtree->load( buffer, buflen );
unsigned sb_start = readU32( buffer + 0x74 );
delete[] buffer;
// fetch block chain as data for small-files
sb_blocks = bbat->follow( sb_start ); // small files
// for troubleshooting, just enable this block
#if 0
header->debug();
sbat->debug();
bbat->debug();
dirtree->debug();
#endif
// so far so good
result = Storage::Ok;
opened = true;
}
}
void StorageIO::create()
{
// std::cout << "Creating " << filename << std::endl;
#if defined(POLE_USE_UTF16_FILENAMES)
file.open(filename.c_str(), std::ios::binary | std::ios::in | std::ios::out | std::ios::trunc);
#else
file.open( stringWToUtf8String(filename).c_str(), std::ios::binary | std::ios::in | std::ios::out | std::ios::trunc);
#endif
if( !file.good() )
{
//std::cerr << "Can't create " << filename << std::endl;
result = Storage::OpenFailed;
return;
}
// so far so good
opened = true;
result = Storage::Ok;
}
void StorageIO::init()
{
// Initialize parts of the header, directory entries, and big and small allocation tables
header->bb_blocks[0] = 0;
header->dirent_start = 1;
header->sbat_start = 2;
header->num_bat = 1;
header->num_sbat = 1;
header->dirty = true;
bbat->set(0, AllocTable::Eof);
bbat->markAsDirty(0, bbat->blockSize);
bbat->set(1, AllocTable::Eof);
bbat->markAsDirty(1, bbat->blockSize);
bbat->set(2, AllocTable::Eof);
bbat->markAsDirty(2, bbat->blockSize);
bbat->set(3, AllocTable::Eof);
bbat->markAsDirty(3, bbat->blockSize);
sb_blocks = bbat->follow( 3 );
mbatDirty = false;
}
void StorageIO::flush()
{
if (header->dirty)
{
unsigned char *buffer = new unsigned char[512];
memset(buffer, 0, 512);
header->save( buffer );
file.seekp( 0 );
file.write( (char*)buffer, 512 );
fileCheck(file);
delete[] buffer;
}
if (bbat->isDirty())
flushbbat();
if (sbat->isDirty())
flushsbat();
if (dirtree->isDirty())
{
std::vector<uint64> blocks;
blocks = bbat->follow(header->dirent_start);
uint64 sb_start = 0xffffffff;
if (sb_blocks.size() > 0)
sb_start = sb_blocks[0];
dirtree->flush(blocks, this, bbat->blockSize, sb_start, static_cast<uint64>(sb_blocks.size())*bbat->blockSize);
}
if (mbatDirty && mbat_blocks.size() > 0)
{
uint64 nBytes = bbat->blockSize * static_cast<uint64>(mbat_blocks.size());
unsigned char *buffer = new unsigned char[nBytes];
memset(buffer, 0, nBytes);
uint64 sIdx = 0;
uint64 dcount = 0;
uint64 blockCapacity = bbat->blockSize / sizeof(uint64) - 1;
uint64 blockIdx = 0;
for (size_t mdIdx = 0; mdIdx < mbat_data.size(); mdIdx++)
{
writeU32(buffer + sIdx, (uint32) mbat_data[mdIdx]);
sIdx += 4;
dcount++;
if (dcount == blockCapacity)
{
blockIdx++;
if (blockIdx == mbat_blocks.size())
writeU32(buffer + sIdx, AllocTable::Eof);
else
writeU32(buffer + sIdx, (uint32) mbat_blocks[blockIdx]);
sIdx += 4;
dcount = 0;
}
}
saveBigBlocks(mbat_blocks, 0, buffer, nBytes);
delete[] buffer;
mbatDirty = false;
}
file.flush();
fileCheck(file);
/* Note on Microsoft implementation:
- directory entries are stored in the last block(s)
- BATs are as second to the last
- Meta BATs are third to the last
*/
}
void StorageIO::close()
{
if( !opened ) return;
if (writeable)
{
file.seekg(0, std::ios::end );
filesize = static_cast<uint64>(file.tellg());
if (filesize % 512 != 0)
{
char padding[512] = {};
POLE::uint64 sz = (filesize / 512 + 1) * 512 - filesize;
file.write(padding, sz);
fileCheck(file);
}
}
file.close();
opened = false;
std::list<Stream*>::iterator it;
for( it = streams.begin(); it != streams.end(); ++it )
delete *it;
streams.clear();
}
StreamIO* StorageIO::streamIO( const std::wstring& name, bool bCreate, int64 streamSize )
{
// sanity check
if( !name.length() ) return (StreamIO*)0;
// search in the entries
DirEntry* entry = dirtree->entry( name, bCreate, bbat->blockSize, this, streamSize );
//if( entry) std::cout << "FOUND\n";
if( !entry ) return (StreamIO*)0;
//if( !entry->dir ) std::cout << " NOT DIR\n";
if( entry->dir ) return (StreamIO*)0;
StreamIO* result2 = new StreamIO( this, entry );
result2->fullName = name;
return result2;
}
bool StorageIO::deleteByName(const std::wstring& fullName)
{
if (!fullName.length())
return false;
if (!writeable)
return false;
DirEntry* entry = dirtree->entry(fullName);
if (!entry)
return false;
bool retVal;
if (entry->dir)
retVal = deleteNode(entry, fullName);
else
retVal = deleteLeaf(entry, fullName);
if (retVal)
flush();
return retVal;
}
bool StorageIO::deleteNode(DirEntry *entry, const std::wstring& fullName)
{
std::wstring lclName = fullName;
if (lclName[lclName.size()-1] != '/')
lclName += '/';
bool retVal = true;
while (entry->child && entry->child < dirtree->entryCount())
{
DirEntry* childEnt = dirtree->entry(entry->child);
std::wstring childFullName = lclName + childEnt->name;
if (childEnt->dir)
retVal = deleteNode(childEnt, childFullName);
else
retVal = deleteLeaf(childEnt, childFullName);
if (!retVal)
return false;
}
dirtree->deleteEntry(entry, fullName, bbat->blockSize);
return retVal;
}
bool StorageIO::deleteLeaf(DirEntry *entry, const std::wstring& fullName)
{
std::vector<uint64> blocks;
if (entry->size >= header->threshold)
{
blocks = bbat->follow(entry->start);
for (unsigned idx = 0; idx < blocks.size(); idx++)
{
bbat->set(blocks[idx], AllocTable::Avail);
bbat->markAsDirty(idx, bbat->blockSize);
}
}
else
{
blocks = sbat->follow(entry->start);
for (unsigned idx = 0; idx < blocks.size(); idx++)
{
sbat->set(blocks[idx], AllocTable::Avail);
sbat->markAsDirty(idx, bbat->blockSize);
}
}
dirtree->deleteEntry(entry, fullName, bbat->blockSize);
return true;
}
uint64 StorageIO::loadBigBlocks( std::vector<uint64> blocks, unsigned char* data, uint64 maxlen )
{
// sentinel
if( !data ) return 0;
fileCheck(file);
if( !file.good() ) return 0;
if( blocks.size() < 1 ) return 0;
if( maxlen == 0 ) return 0;
// read block one by one, seems fast enough
uint64 bytes = 0;
for( unsigned int i=0; (i < blocks.size() ) & ( bytes<maxlen ); i++ )
{
uint64 block = blocks[i];
uint64 pos = bbat->blockSize * ( block+1 );
uint64 p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
if( pos + p > filesize )
p = filesize - pos;
file.seekg( pos );
file.read( (char*)data + bytes, p );
fileCheck(file);
// should use gcount to see how many bytes were really returned - eof check...
bytes += p;
}
return bytes;
}
uint64 StorageIO::loadBigBlock( uint64 block, unsigned char* data, uint64 maxlen )
{
// sentinel
if( !data ) return 0;
fileCheck(file);
if( !file.good() ) return 0;
// wraps call for loadBigBlocks
std::vector<uint64> blocks;
blocks.resize( 1 );
blocks[ 0 ] = block;
return loadBigBlocks( blocks, data, maxlen );
}
uint64 StorageIO::saveBigBlocks( std::vector<uint64> blocks, uint64 offset, unsigned char* data, uint64 len )
{
// sentinel
if( !data ) return 0;
fileCheck(file);
if( !file.good() ) return 0;
if( blocks.size() < 1 ) return 0;
if( len == 0 ) return 0;
// write block one by one, seems fast enough
uint64 bytes = 0;
for ( size_t i = 0; (i < blocks.size() ) & ( bytes < len ); i++ )
{
uint64 block = blocks[i];
uint64 pos = (bbat->blockSize * ( block + 1 ) ) + offset;
uint64 maxWrite = bbat->blockSize - offset;
uint64 tobeWritten = len - bytes;
if (tobeWritten > maxWrite)
tobeWritten = maxWrite;
file.seekp( pos );
file.write( (char*)data + bytes, tobeWritten );
fileCheck(file);
bytes += tobeWritten;
offset = 0;
if (filesize < pos + tobeWritten)
filesize = pos + tobeWritten;
}
return bytes;
}
uint64 StorageIO::saveBigBlock( uint64 block, uint64 offset, unsigned char* data, uint64 len )
{
if ( !data ) return 0;
fileCheck(file);
if ( !file.good() ) return 0;
//wrap call for saveBigBlocks
std::vector<uint64> blocks;
blocks.resize( 1 );
blocks[ 0 ] = block;
return saveBigBlocks(blocks, offset, data, len );
}
// return number of bytes which has been read
uint64 StorageIO::loadSmallBlocks( std::vector<uint64> blocks, unsigned char* data, uint64 maxlen )
{
// sentinel
if( !data ) return 0;
fileCheck(file);
if( !file.good() ) return 0;
if( blocks.size() < 1 ) return 0;
if( maxlen == 0 ) return 0;
// our own local buffer
unsigned char* buf = new unsigned char[ bbat->blockSize ];
// read small block one by one
uint64 bytes = 0;
for( unsigned int i=0; ( i<blocks.size() ) & ( bytes<maxlen ); i++ )
{
uint64 block = blocks[i];
// find where the small-block exactly is
uint64 pos = block * sbat->blockSize;
uint64 bbindex = pos / bbat->blockSize;
if( bbindex >= sb_blocks.size() ) break;
loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
// copy the data
uint64 offset = pos % bbat->blockSize;
uint64 p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes : bbat->blockSize-offset;
p = (sbat->blockSize<p ) ? sbat->blockSize : p;
memcpy( data + bytes, buf + offset, p );
bytes += p;
}
delete[] buf;
return bytes;
}
uint64 StorageIO::loadSmallBlock( uint64 block, unsigned char* data, uint64 maxlen )
{
// sentinel
if( !data ) return 0;
fileCheck(file);
if( !file.good() ) return 0;
// wraps call for loadSmallBlocks
std::vector<uint64> blocks;
blocks.resize( 1 );
blocks.assign( 1, block );
return loadSmallBlocks( blocks, data, maxlen );
}
uint64 StorageIO::saveSmallBlocks( std::vector<uint64> blocks, uint64 offset,
unsigned char* data, uint64 len, int64 startAtBlock )
{
// sentinel
if( !data ) return 0;
fileCheck(file);
if( !file.good() ) return 0;
if( blocks.size() < 1 ) return 0;
if( len == 0 ) return 0;
// write block one by one, seems fast enough
uint64 bytes = 0;
for( uint64 i = startAtBlock; (i < blocks.size() ) & ( bytes<len ); i++ )
{
uint64 block = blocks[i];
// find where the small-block exactly is
uint64 pos = block * sbat->blockSize;
uint64 bbindex = pos / bbat->blockSize;
if( bbindex >= sb_blocks.size() ) break;
uint64 offset2 = pos % bbat->blockSize;
uint64 maxWrite = sbat->blockSize - offset;
uint64 tobeWritten = len - bytes;
if (tobeWritten > maxWrite)
tobeWritten = maxWrite;
saveBigBlock( sb_blocks[ bbindex ], offset2 + offset, data + bytes, tobeWritten);
bytes += tobeWritten;
offset = 0;
if (filesize < pos + tobeWritten)
filesize = pos + tobeWritten;
}
return bytes;
}
uint64 StorageIO::saveSmallBlock( uint64 block, uint64 offset, unsigned char* data, uint64 len )
{
if ( !data ) return 0;
fileCheck(file);
if ( !file.good() ) return 0;
//wrap call for saveSmallBlocks
std::vector<uint64> blocks;
blocks.resize( 1 );
blocks[ 0 ] = block;
return saveSmallBlocks(blocks, offset, data, len );
}
void StorageIO::flushbbat()
{
std::vector<uint64> blocks;
blocks = getbbatBlocks(false);
bbat->flush(blocks, this, bbat->blockSize);
}
void StorageIO::flushsbat()
{
std::vector<uint64> blocks;
blocks = bbat->follow(header->sbat_start);
sbat->flush(blocks, this, bbat->blockSize);
}
std::vector<uint64> StorageIO::getbbatBlocks(bool bLoading)
{
std::vector<uint64> blocks;
// find blocks allocated to store big bat
// the first 109 blocks are in header, the rest in meta bat
blocks.clear();
blocks.resize( header->num_bat );
for( unsigned i = 0; i < 109; i++ )
{
if( i >= header->num_bat )
break;
else
blocks[i] = header->bb_blocks[i];
}
if (bLoading)
{
mbat_blocks.clear();
mbat_data.clear();
if( (header->num_bat > 109) && (header->num_mbat > 0) )
{
unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
uint64 k = 109;
uint64 sector;
uint64 mdidx = 0;
for( uint64 r = 0; r < header->num_mbat; r++ )
{
if(r == 0) // 1st meta bat location is in file header.
sector = header->mbat_start;
else // next meta bat location is the last current block value.
{
sector = blocks[--k];
mdidx--;
}
mbat_blocks.push_back(sector);
mbat_data.resize(mbat_blocks.size()*(bbat->blockSize/4));
loadBigBlock( sector, buffer2, bbat->blockSize );
for( uint64 s=0; s < bbat->blockSize; s+=4 )
{
if( k >= header->num_bat )
break;
else
{
blocks[k] = readU32( buffer2 + s );
mbat_data[mdidx++] = blocks[k];
k++;
}
}
}
if (mbat_data.size() != mdidx) mbat_data.resize(mdidx);
delete[] buffer2;
}
}
else
{
unsigned i = 109;
for (unsigned int idx = 0; idx < mbat_data.size(); idx++)
{
blocks[i++] = mbat_data[idx];
if (i == header->num_bat)
break;
}
}
return blocks;
}
uint64 StorageIO::ExtendFile( std::vector<uint64> *chain )
{
uint64 newblockIdx = bbat->unused();
bbat->set(newblockIdx, AllocTable::Eof);
uint64 bbidx = newblockIdx / (bbat->blockSize / sizeof(uint64));
while (bbidx >= header->num_bat)
addbbatBlock();
bbat->markAsDirty(newblockIdx, bbat->blockSize);
if (chain->size() > 0)
{
bbat->set((*chain)[chain->size()-1], newblockIdx);
bbat->markAsDirty((*chain)[chain->size()-1], bbat->blockSize);
}
chain->push_back(newblockIdx);
return newblockIdx;
}
void StorageIO::addbbatBlock()
{
uint64 newblockIdx = bbat->unused();
bbat->set(newblockIdx, AllocTable::MetaBat);
if (header->num_bat < 109)
header->bb_blocks[header->num_bat] = newblockIdx;
else
{
mbatDirty = true;
mbat_data.push_back(newblockIdx);
uint64 metaIdx = header->num_bat - 109;
uint64 idxPerBlock = bbat->blockSize / sizeof(uint64) - 1; //reserve room for index to next block
uint64 idxBlock = metaIdx / idxPerBlock;
if (idxBlock == mbat_blocks.size())
{
uint64 newmetaIdx = bbat->unused();
bbat->set(newmetaIdx, AllocTable::MetaBat);
mbat_blocks.push_back(newmetaIdx);
if (header->num_mbat == 0)
header->mbat_start = newmetaIdx;
header->num_mbat++;
}
}
header->num_bat++;
header->dirty = true;
}
// =========== StreamIO ==========
StreamIO::StreamIO( StorageIO* s, DirEntry* e)
: io(s),
entryIdx(io->dirtree->indexOf(e)),
fullName(),
blocks(),
eof(false),
fail(false),
m_pos(0),
cache_data(new unsigned char[CACHEBUFSIZE]),
cache_size(0), // indicating an empty cache
cache_pos(0)
{
if( e->size >= io->header->threshold )
blocks = io->bbat->follow( e->start );
else
blocks = io->sbat->follow( e->start );
}
// FIXME tell parent we're gone
StreamIO::~StreamIO()
{
delete[] cache_data;
}
void StreamIO::setSize(uint64 newSize)
{
bool bThresholdCrossed = false;
bool bOver = false;
if(!io->writeable )
return;
DirEntry *entry = io->dirtree->entry(entryIdx);
if (newSize >= io->header->threshold && entry->size < io->header->threshold)
{
bThresholdCrossed = true;
bOver = true;
}
else if (newSize < io->header->threshold && entry->size >= io->header->threshold)
{
bThresholdCrossed = true;
bOver = false;
}
if (bThresholdCrossed)
{
// first, read what is already in the stream, limited by the requested new size. Note
// that the read can work precisely because we have not yet reset the size.
uint64 len = newSize;
if (len > entry->size)
len = entry->size;
unsigned char *buffer = 0;
uint64 savePos = tell();
if (len)
{
buffer = new unsigned char[len];
seek(0);
read(buffer, len);
}
// Now get rid of the existing blocks
if (bOver)
{
for (unsigned int idx = 0; idx < blocks.size(); idx++)
{
io->sbat->set(blocks[idx], AllocTable::Avail);
io->sbat->markAsDirty(idx, io->bbat->blockSize);
}
}
else
{
for (unsigned int idx = 0; idx < blocks.size(); idx++)
{
io->bbat->set(blocks[idx], AllocTable::Avail);
io->bbat->markAsDirty(idx, io->bbat->blockSize);
}
}
blocks.clear();
entry->start = DirTree::End;
// Now change the size, and write the old data back into the stream, if any
entry->size = newSize;
io->dirtree->markAsDirty(io->dirtree->indexOf(entry), io->bbat->blockSize);
if (len)
{
write(0, buffer, len);
delete buffer;
}
if (savePos <= entry->size)
seek(savePos);
}
else if (entry->size != newSize) //simple case - no threshold was crossed, so just change the size
{
entry->size = newSize;
io->dirtree->markAsDirty(io->dirtree->indexOf(entry), io->bbat->blockSize);
}
}
void StreamIO::seek( uint64 pos )
{
m_pos = pos;
}
uint64 StreamIO::tell()
{
return m_pos;
}
int64 StreamIO::getch()
{
// past end-of-file ?
DirEntry *entry = io->dirtree->entry(entryIdx);
if( m_pos >= entry->size ) return -1;
// need to update cache ?
if( !cache_size || ( m_pos < cache_pos ) ||
( m_pos >= cache_pos + cache_size ) )
updateCache();
// something bad if we don't get good cache
if( !cache_size ) return -1;
int64 data = cache_data[m_pos - cache_pos];
m_pos++;
return data;
}
uint64 StreamIO::read( uint64 pos, unsigned char* data, uint64 maxlen )
{
// sanity checks
if( !data ) return 0;
if( maxlen == 0 ) return 0;
uint64 totalbytes = 0;
DirEntry *entry = io->dirtree->entry(entryIdx);
if (pos + maxlen > entry->size)
maxlen = entry->size - pos;
if ( entry->size < io->header->threshold )
{
// small file
uint64 index = pos / io->sbat->blockSize;
if( index >= blocks.size() ) return 0;
unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
uint64 offset = pos % io->sbat->blockSize;
while( totalbytes < maxlen )
{
if( index >= blocks.size() ) break;
io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
uint64 count = io->sbat->blockSize - offset;
if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
memcpy( data+totalbytes, buf + offset, count );
totalbytes += count;
offset = 0;
index++;
}
delete[] buf;
}
else
{
// big file
uint64 index = pos / io->bbat->blockSize;
if( index >= blocks.size() ) return 0;
unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
uint64 offset = pos % io->bbat->blockSize;
while( totalbytes < maxlen )
{
if( index >= blocks.size() ) break;
io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
uint64 count = io->bbat->blockSize - offset;
if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
memcpy( data+totalbytes, buf + offset, count );
totalbytes += count;
index++;
offset = 0;
}
delete [] buf;
}
return totalbytes;
}
uint64 StreamIO::read( unsigned char* data, uint64 maxlen )
{
uint64 bytes = read( tell(), data, maxlen );
m_pos += bytes;
return bytes;
}
uint64 StreamIO::write( unsigned char* data, uint64 len )
{
return write( tell(), data, len );
}
uint64 StreamIO::write( uint64 pos, unsigned char* data, uint64 len )
{
// sanity checks
if( !data ) return 0;
if( len == 0 ) return 0;
if( !io->writeable ) return 0;
DirEntry *entry = io->dirtree->entry(entryIdx);
if (pos + len > entry->size)
setSize(pos + len); //reset size, possibly changing from small to large blocks
uint64 totalbytes = 0;
if ( entry->size < io->header->threshold )
{
// small file
uint64 index = (pos + len - 1) / io->sbat->blockSize;
while (index >= blocks.size())
{
uint64 nblock = io->sbat->unused();
if (blocks.size() > 0)
{
io->sbat->set(blocks[blocks.size()-1], nblock);
io->sbat->markAsDirty(blocks[blocks.size()-1], io->bbat->blockSize);
}
io->sbat->set(nblock, AllocTable::Eof);
io->sbat->markAsDirty(nblock, io->bbat->blockSize);
blocks.resize(blocks.size()+1);
blocks[blocks.size()-1] = nblock;
uint64 bbidx = nblock / (io->bbat->blockSize / sizeof(unsigned int));
while (bbidx >= io->header->num_sbat)
{
std::vector<uint64> sbat_blocks = io->bbat->follow(io->header->sbat_start);
io->ExtendFile(&sbat_blocks);
io->header->num_sbat++;
io->header->dirty = true; //Header will have to be rewritten
}
uint64 sidx = nblock * io->sbat->blockSize / io->bbat->blockSize;
while (sidx >= io->sb_blocks.size())
{
io->ExtendFile(&io->sb_blocks);
io->dirtree->markAsDirty(0, io->bbat->blockSize); //make sure to rewrite first directory block
}
}
uint64 offset = pos % io->sbat->blockSize;
index = pos / io->sbat->blockSize;
//if (index == 0)
totalbytes = io->saveSmallBlocks(blocks, offset, data, len, index);
}
else
{
uint64 index = (pos + len - 1) / io->bbat->blockSize;
while (index >= blocks.size())
io->ExtendFile(&blocks);
uint64 offset = pos % io->bbat->blockSize;
uint64 remainder = len;
index = pos / io->bbat->blockSize;
while( remainder > 0 )
{
if( index >= blocks.size() ) break;
uint64 count = io->bbat->blockSize - offset;
if ( remainder < count )
count = remainder;
io->saveBigBlock( blocks[index], offset, data + totalbytes, count );
totalbytes += count;
remainder -= count;
index++;
offset = 0;
}
}
if (blocks.size() > 0 && entry->start != blocks[0])
{
entry->start = blocks[0];
io->dirtree->markAsDirty(io->dirtree->indexOf(entry), io->bbat->blockSize);
}
m_pos += len;
return totalbytes;
}
void StreamIO::flush()
{
io->flush();
}
void StreamIO::updateCache()
{
// sanity check
if( !cache_data ) return;
DirEntry *entry = io->dirtree->entry(entryIdx);
cache_pos = m_pos - (m_pos % CACHEBUFSIZE);
uint64 bytes = CACHEBUFSIZE;
if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
cache_size = read( cache_pos, cache_data, bytes );
}
// =========== Storage ==========
Storage::Storage( const wchar_t* filename )
{
io = new StorageIO( this, filename );
}
Storage::~Storage()
{
delete io;
}
int Storage::result()
{
return (int) io->result;
}
bool Storage::open(bool bWriteAccess, bool bCreate)
{
return io->open(bWriteAccess, bCreate);
}
void Storage::close()
{
io->close();
}
std::list<std::wstring> Storage::entries( const std::wstring& path )
{
std::list<std::wstring> localResult;
DirTree* dt = io->dirtree;
DirEntry* e = dt->entry( path, false );
if( e && e->dir )
{
uint64 parent = dt->indexOf( e );
std::vector<uint64> children = dt->children( parent );
for( uint64 i = 0; i < children.size(); i++ )
{
localResult.push_back( dt->entry( children[i] )->name );
}
}
return localResult;
}
std::list<std::wstring> Storage::entries_with_prefix( const std::wstring& path )
{
std::list<std::wstring> localResult;
DirTree* dt = io->dirtree;
DirEntry* e = dt->entry( path, false );
if( e && e->dir )
{
uint64 parent = dt->indexOf( e );
std::vector<uint64> children = dt->children( parent );
for( uint64 i = 0; i < children.size(); i++ )
{
std::wstring val;
if (dt->entry( children[i] )->prefix != 32)
{
val = dt->entry( children[i] )->prefix;
}
val += dt->entry( children[i] )->name;
localResult.push_back(val);
}
}
return localResult;
}
bool Storage::isDirectory( const std::wstring& name )
{
DirEntry* e = io->dirtree->entry( name, false );
return e ? e->dir : false;
}
bool Storage::exists( const std::wstring& name )
{
DirEntry* e = io->dirtree->entry( name, false );
return (e != 0);
}
bool Storage::isWriteable()
{
return io->writeable;
}
bool Storage::deleteByName( const std::wstring& name )
{
return io->deleteByName(name);
}
void Storage::GetStats(uint64 *pEntries, uint64 *pUnusedEntries,
uint64 *pBigBlocks, uint64 *pUnusedBigBlocks,
uint64 *pSmallBlocks, uint64 *pUnusedSmallBlocks)
{
*pEntries = io->dirtree->entryCount();
*pUnusedEntries = io->dirtree->unusedEntryCount();
*pBigBlocks = io->bbat->count();
*pUnusedBigBlocks = io->bbat->unusedCount();
*pSmallBlocks = io->sbat->count();
*pUnusedSmallBlocks = io->sbat->unusedCount();
}
// recursively collect stream names
void CollectStreams( std::list<std::wstring>& result, DirTree* tree, DirEntry* parent, const std::wstring& path )
{
DirEntry* c = tree->entry( parent->child );
std::queue<DirEntry*> queue;
if ( c ) queue.push( c );
while ( !queue.empty() )
{
DirEntry* e = queue.front();
queue.pop();
if ( e->dir )
CollectStreams( result, tree, e, path + e->name + L"/" );
else
result.push_back( path + e->name );
DirEntry* p = tree->entry( e->prev );
if ( p ) queue.push( p );
DirEntry* n = tree->entry( e->next );
if ( n ) queue.push( n );
// not testing if p or n have already been processed; potential infinite loop in case of closed Entry chain
// it seems not to happen, though
}
}
std::list<std::wstring> Storage::GetAllStreams( const std::wstring& storageName )
{
std::list<std::wstring> vresult;
DirEntry* e = io->dirtree->entry( storageName, false );
if ( e && e->dir ) CollectStreams( vresult, io->dirtree, e, storageName );
return vresult;
}
// =========== Stream ==========
Stream::Stream( Storage* storage, const std::wstring& name, bool bCreate, int64 streamSize )
: io(storage->io->streamIO( name, bCreate, (int) streamSize ))
{
}
// FIXME tell parent we're gone
Stream::~Stream()
{
delete io;
}
std::wstring Stream::fullName()
{
return io ? io->fullName : std::wstring();
}
uint64 Stream::tell()
{
return io ? io->tell() : 0;
}
void Stream::seek( uint64 newpos )
{
if( io )
io->seek(newpos);
}
uint64 Stream::size()
{
if (!io)
return 0;
DirEntry *entry = io->io->dirtree->entry(io->entryIdx);
return entry->size;
}
void Stream::setSize(int64 newSize)
{
if (!io)
return;
if (newSize < 0)
return;
//if (newSize > std::numeric_limits<int64>::max())
// return;
io->setSize(newSize);
}
int64 Stream::getch()
{
return io ? io->getch() : 0;
}
uint64 Stream::read( unsigned char* data, uint64 maxlen )
{
return io ? io->read( data, maxlen ) : 0;
}
uint64 Stream::write( unsigned char* data, uint64 len )
{
return io ? io->write( data, len ) : 0;
}
void Stream::flush()
{
if (io)
io->flush();
}
bool Stream::eof()
{
return io ? io->eof : false;
}
bool Stream::fail()
{
return io ? io->fail : true;
}