Files
DocumentServer-v-9.2.0/core/Common/OfficeFileFormatChecker2.cpp
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

2162 lines
61 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* (c) Copyright Ascensio System SIA 2010-2023
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish
* street, Riga, Latvia, EU, LV-1050.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "OfficeFileFormatChecker.h"
#include "../DesktopEditor/common/Directory.h"
#include "../DesktopEditor/xml/include/xmlutils.h"
#include "../OOXML/Base/Base.h"
#include "../OfficeUtils/src/OfficeUtils.h"
//#define FILE_FORMAT_CHECKER_WITH_MACRO
#if defined FILE_FORMAT_CHECKER_WITH_MACRO
#include "../MsBinaryFile/PptFile/Main/PPTFormatLib.h"
#endif
#include "3dParty/pole/pole.h"
#include <algorithm>
#include <limits>
#include "OfficeFileFormatDefines.h"
#define MIN_SIZE_BUFFER 4096
#define MAX_SIZE_BUFFER 102400
std::string ReadStringFromOle(POLE::Stream *stream, unsigned int max_size)
{
std::string result;
if (!stream)
return result;
_UINT32 cch = 0;
if (4 != stream->read((BYTE *)&cch, 4))
return result;
unsigned char *stringBytes = new unsigned char[max_size];
if (!stringBytes)
return result;
if (cch > max_size)
{
// error ... skip to 0
unsigned int pos_orinal = (unsigned int)stream->tell();
unsigned int pos = 0;
stream->read(stringBytes, max_size);
while (pos < max_size)
{
if (stringBytes[pos] == 0)
break;
pos++;
}
stream->seek(pos_orinal + pos - 1);
}
else
{
if (cch > 0)
{
// dont read the terminating zero
cch = (_UINT32)stream->read(stringBytes, cch);
result = std::string((char *)stringBytes, cch);
}
}
RELEASEARRAYOBJECTS(stringBytes);
// skip the terminating zero of the Unicode string
stream->seek(stream->tell() + 2);
return result;
}
bool COfficeFileFormatChecker::isRtfFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
if ((5 <= dwBytes) && ('{' == pBuffer[0] && '\\' == pBuffer[1] && 'r' == pBuffer[2] && 't' == pBuffer[3] && 'f' == pBuffer[4]))
return true;
return false;
}
bool COfficeFileFormatChecker::isMultiPartsHtmlFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
const char *contentTypeFormatLine1 = "Content-Type: multipart/related";
const char *contentTypeFormatLine2 = "Content-Type: text/html"; // может быть и вне заданого буфера (todooo)
std::string xml_string((char *)pBuffer, dwBytes);
if ((std::string::npos != xml_string.find(contentTypeFormatLine1)) && (std::string::npos != xml_string.find(contentTypeFormatLine2)))
{
return true;
}
return false;
}
bool COfficeFileFormatChecker::iXmlFile(const std::wstring &fileName)
{
XmlUtils::CXmlLiteReader oReader;
if (!oReader.FromFile(fileName))
return false;
if (!oReader.ReadNextNode())
return false;
std::wstring name = oReader.GetNameNoNS();
if (name == L"svg")
{
nFileType = AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_SVG;
}
else if (name == L"html")
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML;
}
else
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_XML;
}
return true;
}
bool COfficeFileFormatChecker::isHtmlFormatFile(unsigned char *pBuffer, int dwBytes, bool testCloseTag)
{
if (pBuffer == NULL || dwBytes < 4)
return false;
bool tagOpen = false;
if (testCloseTag && dwBytes > 5)
{
for (int i = 0; i < dwBytes - 6; i++)
{
if ((0x3C == pBuffer[i]) && (0x2F == pBuffer[i + 1]) && (0x48 == pBuffer[i + 2] || 0x68 == pBuffer[i + 2]) && (0x54 == pBuffer[i + 3] || 0x74 == pBuffer[i + 3]) &&
(0x4d == pBuffer[i + 4] || 0x6d == pBuffer[i + 4]) && (0x4c == pBuffer[i + 5] || 0x6c == pBuffer[i + 5]))
{
return true;
}
else if (
(0x3C == pBuffer[i]) && (0x2F == pBuffer[i + 1]) && (0x62 == pBuffer[i + 2]) && (0x6f == pBuffer[i + 3]) && (0x64 == pBuffer[i + 4]) && (0x79 == pBuffer[i + 5]) &&
(0x3e == pBuffer[i + 6]))
{ //</body>
return true;
}
}
}
else if (dwBytes > 3)
{
for (int i = 0; i < dwBytes - 4 && i < 100; i++)
{
if (0x3C == pBuffer[i])
tagOpen = true;
else if (0x3E == pBuffer[i])
tagOpen = false;
else if (
tagOpen && (0x48 == pBuffer[i] || 0x68 == pBuffer[i]) && (0x54 == pBuffer[i + 1] || 0x74 == pBuffer[i + 1]) && (0x4d == pBuffer[i + 2] || 0x6d == pBuffer[i + 2]) &&
(0x4c == pBuffer[i + 3] || 0x6c == pBuffer[i + 3]))
{
return true;
}
}
}
return false;
}
bool COfficeFileFormatChecker::isBinaryDoctFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
if ((4 <= dwBytes) && ('D' == pBuffer[0] && 'O' == pBuffer[1] && 'C' == pBuffer[2] && 'Y' == pBuffer[3]))
return true;
return false;
}
bool COfficeFileFormatChecker::isBinaryXlstFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
if ((4 <= dwBytes) && ('X' == pBuffer[0] && 'L' == pBuffer[1] && 'S' == pBuffer[2] && 'Y' == pBuffer[3]))
return true;
return false;
}
bool COfficeFileFormatChecker::isBinaryPpttFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
if ((4 <= dwBytes) && ('P' == pBuffer[0] && 'P' == pBuffer[1] && 'T' == pBuffer[2] && 'Y' == pBuffer[3]))
return true;
return false;
}
bool COfficeFileFormatChecker::isBinaryVsdtFormatFile(unsigned char* pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
if ((4 <= dwBytes) && ('V' == pBuffer[0] && 'S' == pBuffer[1] && 'D' == pBuffer[2] && 'Y' == pBuffer[3]))
return true;
return false;
}
bool COfficeFileFormatChecker::isPdfFormatFile(unsigned char *pBuffer, int dwBytes, std::wstring &documentID)
{
if (pBuffer == NULL)
return false;
documentID.clear();
if (dwBytes < 5 || (pBuffer[0] == 'P' && pBuffer[1] == 'K'))
return false;
pBuffer[dwBytes - 1] = '\0';
char *pFirst = strstr((char *)pBuffer, "%PDF-");
if (NULL == pFirst)
{
char* pData = (char*)pBuffer;
for (int i = 0; i < dwBytes - 5; ++i)
{
int nPDF = strncmp(&pData[i], "%PDF-", 5);
if (!nPDF)
{
pFirst = (char*)pBuffer + i;
break;
}
}
if (NULL == pFirst)
{
//skip special
_UINT16 sz = pBuffer[0] + (pBuffer[1] << 8);
if (sz < dwBytes - 8)
{
pFirst = strstr((char*)(pBuffer + sz), "%PDF-");
}
}
}
if (NULL != pFirst)
{
pFirst = strstr((char *)pBuffer, "%DocumentID ");
if (NULL != pFirst)
{
pFirst += 12;
char *pLast = strstr(pFirst, " ");
if (NULL != pLast)
{
std::string s(pFirst, pLast - pFirst);
documentID = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8((BYTE*)pFirst, (LONG)(pLast - pFirst));
}
}
return true;
}
return false;
}
bool COfficeFileFormatChecker::isPdfOformFormatFile(unsigned char *pBuffer, int dwBytes)
{
pBuffer[dwBytes - 1] = 0;
char* pFirst = strstr((char*)pBuffer, "%\315\312\322\251\015");
if (!pFirst || pFirst - (char*)pBuffer + 6 >= dwBytes)
return false;
pFirst += 6;
if (strncmp(pFirst, "1 0 obj\012<<\012", 11) != 0 || pFirst - (char*)pBuffer + 11 >= dwBytes)
return false;
pFirst += 11;
char* pStream = strstr(pFirst, "stream\015\012");
char* pMeta = strstr(pFirst, g_format_oform_pdf_meta_tag);
if (!pStream || !pMeta || pStream < pMeta)
return false;
pMeta += strlen(g_format_oform_pdf_meta_tag) + 3;
char* pMetaLast = strstr(pMeta, " ");
if (!pMetaLast)
return false;
pMeta = pMetaLast + 1;
pMetaLast = strstr(pMeta, " ");
if (!pMetaLast)
return false;
return true;
}
bool COfficeFileFormatChecker::isOleObjectFile(POLE::Storage *storage)
{
if (storage == NULL)
return false;
POLE::Stream streamOle(storage, L"Ole");
if (false == streamOle.fail())
{
std::string UserType, ClipboardFormat, Program;
POLE::Stream streamCompObject(storage, L"CompObj");
if (false == streamCompObject.fail() && streamCompObject.size() >= 28)
{
streamCompObject.seek(28); // skip Header
unsigned int sz_obj = (unsigned int)(streamCompObject.size() - streamCompObject.tell());
if (sz_obj > 4)
{
UserType = ReadStringFromOle(&streamCompObject, sz_obj);
sz_obj = (unsigned int)(streamCompObject.size() - streamCompObject.tell());
if (sz_obj > 4)
ClipboardFormat = ReadStringFromOle(&streamCompObject, sz_obj);
sz_obj = (unsigned int)(streamCompObject.size() - streamCompObject.tell());
if (sz_obj > 4)
Program = ReadStringFromOle(&streamCompObject, sz_obj);
}
POLE::Stream streamPackage(storage, L"Package");
if (false == streamPackage.fail())
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_PACKAGE_IN_OLE;
}
else if (std::string::npos != Program.find("Excel") || std::string::npos != UserType.find("Excel"))
{
if (isXlsFormatFile(storage))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLS;
}
}
else if (std::string::npos != Program.find("Word") || std::string::npos != UserType.find("Word"))
{
if (isDocFormatFile(storage))
{
//nFileType inside
}
}
else if (std::string::npos != Program.find("PowerPoint") || std::string::npos != UserType.find("PowerPoint"))
{
if (isPptFormatFile(storage))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPT;
}
}
return true;
}
else
{
POLE::Stream streamLinkInfo(storage, L"LinkInfo");
if (false == streamLinkInfo.fail())
{
short cch = 0;
if (2 == streamLinkInfo.read((BYTE *)&cch, 2))
{
unsigned char *str = new unsigned char[cch];
cch = (short)streamLinkInfo.read(str, cch);
ClipboardFormat = std::string((char *)str, cch);
RELEASEARRAYOBJECTS(str);
streamLinkInfo.seek(streamLinkInfo.tell() + 6);
// skip ...
}
return true;
}
}
}
return false;
}
bool COfficeFileFormatChecker::isDocFormatFile(POLE::Storage *storage)
{
if (storage == NULL)
return false;
POLE::Stream stream(storage, L"WordDocument");
unsigned char buffer[64];
memset(buffer, 0, 64);
if (stream.read(buffer, 64) > 0)
{
// ms office 2007 encrypted contains stream WordDocument !!
std::list<std::wstring> entries = storage->entries(L"DataSpaces");
if (entries.size() > 0)
return false;
if ((buffer[0] == 0xEC && buffer[1] == 0xA5) || // word 1997-2003
(buffer[0] == 0xDC && buffer[1] == 0xA5) || // word 1995
(buffer[0] == 0xDB && buffer[1] == 0xA5)) // word 2.0
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC;
}
else if (isHtmlFormatFile(buffer, 64, false))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML_IN_CONTAINER;
return true;
}
if (storage->isDirectory(L"Macros"))
{
bMacroEnabled = true;
}
return true;
}
return false;
}
bool COfficeFileFormatChecker::isVbaProjectFile(POLE::Storage *storage)
{
if (storage == NULL)
return false;
unsigned char buffer[10];
POLE::Stream stream(storage, L"PROJECT");
if (stream.read(buffer, 10) < 1)
{
return false;
}
if (false == storage->isDirectory(L"VBA"))
{
return false;
}
POLE::Stream stream2(storage, L"VBA/dir");
if (stream2.read(buffer, 10) < 1)
{
return false;
}
return true;
}
bool COfficeFileFormatChecker::isHwpFile(POLE::Storage* storage)
{
if (storage == NULL)
return false;
unsigned char buffer[17];
POLE::Stream stream(storage, L"FileHeader");
static constexpr const char* hwpFormatLine = "HWP Document File";
if (17 == stream.read(buffer, 17) && NULL != strstr((char*)buffer, hwpFormatLine))
return true;
return false;
}
bool COfficeFileFormatChecker::isXlsFormatFile(POLE::Storage *storage)
{
if (storage == NULL)
return false;
unsigned char buffer[10];
POLE::Stream stream(storage, L"Workbook");
if (stream.read(buffer, 10) < 1)
{
POLE::Stream stream2(storage, L"Book");
if (stream2.read(buffer, 10) < 1)
{
POLE::Stream stream3(storage, L"WORKBOOK");
if (stream3.read(buffer, 10) < 1)
{
POLE::Stream stream4(storage, L"BOOK");
if (stream4.read(buffer, 10) < 1)
{
POLE::Stream stream5(storage, L"book");
if (stream5.read(buffer, 10) < 1)
return false;
}
}
}
}
if (storage->isDirectory(L"_VBA_PROJECT_CUR"))
{
bMacroEnabled = true;
}
return true;
}
bool COfficeFileFormatChecker::isDocFlatFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL || dwBytes < 2)
return false;
if ((pBuffer[0] == 0xEC && pBuffer[1] == 0xA5) || (pBuffer[0] == 0xDC && pBuffer[1] == 0xA5) || (pBuffer[0] == 0xDB && pBuffer[1] == 0xA5))
return true;
return false;
}
bool COfficeFileFormatChecker::isXlsFlatFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL || dwBytes < 2)
return false;
// BOF started
if ((pBuffer[1] == 0x08 && pBuffer[0] == 0x09) || (pBuffer[1] == 0x04 && pBuffer[0] == 0x09) || (pBuffer[1] == 0x02 && pBuffer[0] == 0x09) ||
(pBuffer[2] == 0x04 && pBuffer[0] == 0x09 && pBuffer[1] == 0x00 && pBuffer[3] == 0x00))
return true;
return false;
}
bool COfficeFileFormatChecker::isPptFormatFile(POLE::Storage *storage)
{
if (storage == NULL)
return false;
POLE::Stream stream(storage, L"PowerPoint Document");
unsigned char buffer[10];
if (stream.read(buffer, 10) < 1)
return false;
return true;
}
std::wstring COfficeFileFormatChecker::getDocumentID(const std::wstring &_fileName)
{
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(_WIN64)
std::wstring fileName = CorrectPathW(_fileName);
#else
std::wstring fileName = _fileName;
#endif
std::wstring documentID;
POLE::Storage storage(fileName.c_str());
if (storage.open())
{
if (isMS_OFFICECRYPTOFormatFile(&storage, documentID))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_OFFCRYPTO;
}
else if (isMS_MITCRYPTOFormatFile(&storage, documentID))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_MITCRYPTO;
}
}
else
{
if (false == isOpenOfficeFormatFile(fileName, documentID))
{
NSFile::CFileBinary file;
if (!file.OpenFile(fileName))
return documentID;
unsigned char *buffer = new unsigned char[4096]; // enaf !!
if (!buffer)
{
file.CloseFile();
return documentID;
}
DWORD dwReadBytes = 0;
file.ReadFile(buffer, MIN_SIZE_BUFFER, dwReadBytes);
file.CloseFile();
if (isPdfFormatFile(buffer, (int)dwReadBytes, documentID))
{
nFileType = AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_PDF;
}
}
}
sDocumentID = documentID;
return documentID;
}
bool COfficeFileFormatChecker::isMS_OFFCRYPTOFormatFile(const std::wstring &_fileName, std::wstring &documentID)
{
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(_WIN64)
std::wstring fileName = CorrectPathW(_fileName);
#else
std::wstring fileName = _fileName;
#endif
POLE::Storage storage(fileName.c_str());
if (storage.open())
{
if (isMS_OFFICECRYPTOFormatFile(&storage, documentID))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_OFFCRYPTO;
return true;
}
if (isMS_MITCRYPTOFormatFile(&storage, documentID))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_MITCRYPTO;
return true;
}
}
return false;
}
bool COfficeFileFormatChecker::isMS_OFFICECRYPTOFormatFile(POLE::Storage *storage, std::wstring &documentID)
{
if (storage == NULL)
return false;
documentID.clear();
bool result = false;
std::list<std::wstring> entries = storage->entries(L"DataSpaces");
if (/*false == entries.empty() && */ storage->exists(L"EncryptionInfo"))
{
result = true;
}
if (result)
{
POLE::Stream stream(storage, L"DocumentID");
std::string sData;
sData.resize(stream.size());
if (stream.read((BYTE *)sData.c_str(), stream.size()) > 0)
{
documentID = UTF8_TO_U(sData);
}
}
return result;
}
bool COfficeFileFormatChecker::isMS_MITCRYPTOFormatFile(POLE::Storage *storage, std::wstring &documentID)
{
if (storage == NULL)
return false;
documentID.clear();
bool result = false;
std::list<std::wstring> entries = storage->entries(L"DataSpaces");
if (false == entries.empty() && false == storage->exists(L"EncryptionInfo") && storage->exists(L"EncryptedPackage"))
{
result = true;
}
return result;
}
bool COfficeFileFormatChecker::isVbaProjectFile(const std::wstring &_fileName)
{
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(_WIN64)
std::wstring fileName = CorrectPathW(_fileName);
#else
std::wstring fileName = _fileName;
#endif
POLE::Storage storage(fileName.c_str());
if (storage.open())
{
if (isVbaProjectFile(&storage))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_VBAPROJECT;
return true;
}
}
return false;
}
bool COfficeFileFormatChecker::isOfficeFile(const std::wstring &_fileName)
{
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(_WIN64)
std::wstring fileName = CorrectPathW(_fileName);
#else
std::wstring fileName = _fileName;
#endif
// приоритет как оказывается важен
// Metamorphic Manual for windows 28415.doc
POLE::Storage storage(fileName.c_str());
if (storage.open())
{
if (isOleObjectFile(&storage))
{
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
return false;
}
else if (isDocFormatFile(&storage))
{
// nFileType внутри
return true;
}
else if (isXlsFormatFile(&storage))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLS;
return true;
}
else if (isPptFormatFile(&storage))
{
// #if defined FILE_FORMAT_CHECKER_WITH_MACRO
// COfficePPTFile pptFile;
//
// bMacroEnabled = true;
// long nResult = pptFile.OpenFile(fileName, L"", bMacroEnabled);
// if (nResult != S_OK)
// {
// return false;
// }
// pptFile.CloseFile();
// #endif
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPT;
return true;
}
else if (isMS_OFFICECRYPTOFormatFile(&storage, sDocumentID))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_OFFCRYPTO;
return true;
}
else if (isMS_MITCRYPTOFormatFile(&storage, sDocumentID))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_MITCRYPTO;
return true;
}
else if (isVbaProjectFile(&storage))
{
nFileType = AVS_OFFICESTUDIO_FILE_OTHER_MS_VBAPROJECT;
return true;
}
else if (isHwpFile(&storage))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP;
return true;
}
}
NSFile::CFileBinary file;
if (!file.OpenFile(fileName))
return false;
unsigned char* bufferDetect = new unsigned char[4096]; // enaf !!
if (!bufferDetect)
{
file.CloseFile();
return false;
}
DWORD dwDetectdBytes = 0;
file.ReadFile(bufferDetect, MIN_SIZE_BUFFER, dwDetectdBytes);
COfficeUtils OfficeUtils(NULL);
if (OfficeUtils.IsArchive(fileName) == S_OK && (false == isPdfFormatFile(bufferDetect, dwDetectdBytes, sDocumentID)))
{
if (isOOXFormatFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
else if (isOpenOfficeFormatFile(fileName, sDocumentID))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
else if (isOnlyOfficeFormatFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
else if (isXpsFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
else if (isOFDFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
else if (isMacFormatFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
else if (isHwpxFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
}
//-----------------------------------------------------------------------------------------------
// others
bool bEmptyFile = false;
{
int sizeRead = (int)dwDetectdBytes;
bEmptyFile = (dwDetectdBytes < 1);
if (isBinaryDoctFormatFile(bufferDetect, sizeRead)) // min size - 4
{
nFileType = AVS_OFFICESTUDIO_FILE_CANVAS_WORD;
}
else if (isBinaryXlstFormatFile(bufferDetect, sizeRead)) // min size - 4
{
nFileType = AVS_OFFICESTUDIO_FILE_CANVAS_SPREADSHEET;
}
else if (isBinaryPpttFormatFile(bufferDetect, sizeRead)) // min size - 4
{
nFileType = AVS_OFFICESTUDIO_FILE_CANVAS_PRESENTATION;
}
else if (isBinaryVsdtFormatFile(bufferDetect, sizeRead)) // min size - 4
{
nFileType = AVS_OFFICESTUDIO_FILE_CANVAS_DRAW;
}
else if (isOOXFlatFormatFile(bufferDetect, sizeRead))
{
// nFileType;
}
else if (isRtfFormatFile(bufferDetect, sizeRead)) // min size - 5
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_RTF;
}
else if (isPdfFormatFile(bufferDetect, sizeRead, sDocumentID)) // min size - 5
{
nFileType = AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_PDF;
if (isPdfOformFormatFile(bufferDetect, sizeRead))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_OFORM_PDF;
}
else if (isDjvuFormatFile(bufferDetect, sizeRead)) // min size - 8
{
nFileType = AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_DJVU;
}
else if (isHtmlFormatFile(bufferDetect, sizeRead, false)) // min size - 4
{
long fileSize = file.GetFileSize();
if (fileSize > MIN_SIZE_BUFFER)
{
file.SeekFile(fileSize - MIN_SIZE_BUFFER);
file.ReadFile(bufferDetect, MIN_SIZE_BUFFER, dwDetectdBytes);
}
if (isHtmlFormatFile(bufferDetect, sizeRead, true)) // min size - 6
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML;
}
}
else if (isFB2FormatFile(bufferDetect, sizeRead)) // min size - 11
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_FB2;
}
else if (isOpenOfficeFlatFormatFile(bufferDetect, sizeRead)) // min size - 78
{
// nFileType
}
else if (isDocFlatFormatFile(bufferDetect, sizeRead)) // min size - 2
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC_FLAT; // without compaund container
}
else if (isXlsFlatFormatFile(bufferDetect, sizeRead)) // min size - 2
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLS; // without compaund container
}
else if (isMultiPartsHtmlFormatFile(bufferDetect, sizeRead))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_MHT;
}
else if (isHwpmlFile(bufferDetect, sizeRead))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
}
//------------------------------------------------------------------------------------------------
file.CloseFile();
}
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
//------------------------------------------------------------------------------------------------
if (iXmlFile(fileName))
{
// nFileType внутри
return true;
}
//// by Extension
std::wstring::size_type nExtPos = fileName.rfind(L'.');
std::wstring sExt = L"unknown";
if (nExtPos != std::wstring::npos)
sExt = fileName.substr(nExtPos);
std::transform(sExt.begin(), sExt.end(), sExt.begin(), tolower);
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
if (bEmptyFile)
{
if (0 == sExt.compare(L".xlsx"))
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX;
else if (0 == sExt.compare(L".docx"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX;
else if (0 == sExt.compare(L".pptx"))
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX;
else if (0 == sExt.compare(L".vsxd"))
nFileType = AVS_OFFICESTUDIO_FILE_DRAW_VSDX;
else if (0 == sExt.compare(L".ods"))
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS;
else if (0 == sExt.compare(L".odt"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT;
else if (0 == sExt.compare(L".odp"))
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP;
}
else if (0 == sExt.compare(L".mht") || 0 == sExt.compare(L".mhtml"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_MHT;
else if (0 == sExt.compare(L".md"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_MD;
else if (0 == sExt.compare(L".csv") || 0 == sExt.compare(L".xls") || 0 == sExt.compare(L".xlsx") || 0 == sExt.compare(L".xlsb"))
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_CSV;
else if (0 == sExt.compare(L".html") || 0 == sExt.compare(L".htm"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML;
else if (0 == sExt.compare(L".bin")) // base64 string
nFileType = AVS_OFFICESTUDIO_FILE_CANVAS_PDF;
else if (0 == sExt.compare(L".doct")) // случай архива с html viewer
nFileType = AVS_OFFICESTUDIO_FILE_TEAMLAB_DOCY;
else if (0 == sExt.compare(L".txt") || 0 == sExt.compare(L".xml") || 0 == sExt.compare(L".rtf") || 0 == sExt.compare(L".doc") || 0 == sExt.compare(L".docx"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_TXT;
else if (0 == sExt.compare(L".pages"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES;
else if (0 == sExt.compare(L".numbers"))
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_NUMBERS;
else if (0 == sExt.compare(L".key"))
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_KEY;
else if (0 == sExt.compare(L".hwp"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP;
else if (0 == sExt.compare(L".hwpx"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX;
else if (0 == sExt.compare(L".hml"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
return false;
}
bool COfficeFileFormatChecker::isOOXFormatFile(const std::wstring &fileName, bool unpacked)
{
COfficeUtils OfficeUtils(NULL);
ULONG nBufferSize = 0;
BYTE *pBuffer = NULL;
HRESULT hresult = S_FALSE;
if (unpacked)
{
if (NSFile::CFileBinary::ReadAllBytes(fileName + FILE_SEPARATOR_STR + L"[Content_Types].xml", &pBuffer, nBufferSize))
hresult = S_OK;
}
else
{
hresult = OfficeUtils.LoadFileFromArchive(fileName, L"[Content_Types].xml", &pBuffer, nBufferSize);
}
if (hresult == S_OK && pBuffer != NULL)
{
const char *docxFormatLine = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
const char *dotxFormatLine = "application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml";
const char *docmFormatLine = "application/vnd.ms-word.document.macroEnabled.main+xml";
const char *dotmFormatLine = "application/vnd.ms-word.template.macroEnabledTemplate.main+xml";
const char *oformFormatLine = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.oform";
const char *docxfFormatLine = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.docxf";
const char *xlsxFormatLine = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
const char *xltxFormatLine = "application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml";
const char *xlsmFormatLine = "application/vnd.ms-excel.sheet.macroEnabled.main+xml";
const char *xltmFormatLine = "application/vnd.ms-excel.template.macroEnabled.main+xml";
const char *xlsbFormatLine = "application/vnd.ms-excel.sheet.binary.macroEnabled.main";
const char *pptxFormatLine = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
const char *ppsxFormatLine = "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml";
const char *potxFormatLine = "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml";
const char *pptmFormatLine = "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml";
const char *ppsmFormatLine = "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml";
const char *potmFormatLine = "application/vnd.ms-powerpoint.template.macroEnabled.main+xml";
const char *vsdxFormatLine = "application/vnd.ms-visio.drawing.main+xml";
const char *vssxFormatLine = "application/vnd.ms-visio.stencil.main+xml";
const char *vstxFormatLine = "application/vnd.ms-visio.template.main+xml";
const char *vsdmFormatLine = "application/vnd.ms-visio.drawing.macroEnabled.main+xml";
const char *vssmFormatLine = "application/vnd.ms-visio.stencil.macroEnabled.main+xml";
const char *vstmFormatLine = "application/vnd.ms-visio.template.macroEnabled.main+xml";
std::string strContentTypes((char*)pBuffer, nBufferSize);
if (std::string::npos != strContentTypes.find(oformFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_OFORM;
}
else if (std::string::npos != strContentTypes.find(docxfFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCXF;
}
else if (std::string::npos != strContentTypes.find(docxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX;
}
else if (std::string::npos != strContentTypes.find(docmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(dotxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTX;
}
else if (std::string::npos != strContentTypes.find(dotmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(xlsxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX;
}
else if (std::string::npos != strContentTypes.find(xlsmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(xltxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTX;
}
else if (std::string::npos != strContentTypes.find(xltmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(xlsbFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSB;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(pptxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX;
}
else if (std::string::npos != strContentTypes.find(pptmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(ppsmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPSM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(ppsxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPSX;
}
else if (std::string::npos != strContentTypes.find(potxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_POTX;
}
else if (std::string::npos != strContentTypes.find(potmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_POTM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(vsdxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DRAW_VSDX;
}
else if (std::string::npos != strContentTypes.find(vssxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DRAW_VSSX;
}
else if (std::string::npos != strContentTypes.find(vstxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DRAW_VSTX;
}
else if (std::string::npos != strContentTypes.find(vsdmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DRAW_VSDM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(vssmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DRAW_VSSM;
bMacroEnabled = true;
}
else if (std::string::npos != strContentTypes.find(vstmFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DRAW_VSTM;
bMacroEnabled = true;
}
delete []pBuffer;
pBuffer = NULL;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
}
return false;
}
bool COfficeFileFormatChecker::isDocFormatFile(const std::wstring &fileName)
{
POLE::Storage storage(fileName.c_str());
if (storage.open())
{
if (isDocFormatFile(&storage))
{
// nFileType внутри;
return true;
}
}
return false;
}
bool COfficeFileFormatChecker::isXlsFormatFile(const std::wstring &fileName)
{
POLE::Storage storage(fileName.c_str());
if (storage.open())
{
if (isXlsFormatFile(&storage))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLS;
return true;
}
}
return false;
}
bool COfficeFileFormatChecker::isOnlyOfficeFormatFile(const std::wstring &fileName)
{
COfficeUtils OfficeUtils(NULL);
ULONG nBufferSize = 128; // ограничим считывание из бинарника
BYTE *pBuffer = NULL;
HRESULT hresult = OfficeUtils.LoadFileFromArchive(fileName, L"Editor.bin", &pBuffer, nBufferSize);
if (hresult == S_OK && pBuffer != NULL)
{
if (isBinaryDoctFormatFile(pBuffer, nBufferSize))
{
nFileType = AVS_OFFICESTUDIO_FILE_TEAMLAB_DOCY;
}
else if (isBinaryXlstFormatFile(pBuffer, nBufferSize))
{
nFileType = AVS_OFFICESTUDIO_FILE_TEAMLAB_XLSY;
}
else if (isBinaryPpttFormatFile(pBuffer, nBufferSize))
{
nFileType = AVS_OFFICESTUDIO_FILE_TEAMLAB_PPTY;
}
else if (isBinaryVsdtFormatFile(pBuffer, nBufferSize))
{
nFileType = AVS_OFFICESTUDIO_FILE_TEAMLAB_VSDY;
}
delete[] pBuffer;
pBuffer = NULL;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
}
return false;
}
struct TIWAField
{
size_t m_unStart;
size_t m_unEnd;
unsigned m_uIndex;
unsigned m_unWireType;
uint64_t m_oValue;
};
bool ReadUVar(BYTE* pBuffer, size_t unEndPos, size_t& unPos, uint64_t& unValue)
{
std::vector<unsigned char> arBytes;
arBytes.reserve(8);
unValue = 0;
bool bNext = true;
while (unPos < unEndPos && bNext)
{
const unsigned char c = pBuffer[unPos++];
arBytes.push_back((unsigned char)(c & ~0x80));
bNext = c & 0x80;
}
if (bNext && unPos == unEndPos)
return false;
for (std::vector<unsigned char>::const_reverse_iterator it = arBytes.rbegin(); it != arBytes.rend(); ++it)
{
if (std::numeric_limits<uint64_t>::max() >> 7 < unValue ||
std::numeric_limits<uint64_t>::max() - (unValue << 7) < *it) // overflow
return false;
unValue = (unValue << 7) + *it;
}
return true;
}
bool ReadIWAField(BYTE* pBuffer, size_t unEndPos, size_t& unPos, TIWAField& oIWAField)
{
if (NULL == pBuffer || unPos + 2 > unEndPos)
return false;
unsigned uSpec;
uSpec = (unsigned)pBuffer[unPos++];
oIWAField.m_unWireType = uSpec & 0x7;
oIWAField.m_unStart = unPos;
switch (oIWAField.m_unWireType)
{
case 0:
{
if (!ReadUVar(pBuffer, unEndPos, unPos, oIWAField.m_oValue))
return false;
break;
}
case 1:
{
unPos += 4;
break;
}
case 2:
{
uint64_t unLen;
if (!ReadUVar(pBuffer, unEndPos, unPos, unLen) || unPos + unLen > unEndPos)
return false;
oIWAField.m_unStart = unPos;
unPos += unLen;
break;
}
case 5:
{
unPos += 2;
break;
}
default:
return false;
}
oIWAField.m_unEnd = unPos;
oIWAField.m_uIndex = uSpec >> 3;
return true;
}
bool DetectIWorkFormat(const std::wstring& fileName, int &nType)
{
COfficeUtils OfficeUtils(NULL);
ULONG unSize = 0;
BYTE* pBuffer = NULL;
HRESULT hresult = OfficeUtils.LoadFileFromArchive(fileName, L"Index/Document.iwa", &pBuffer, unSize);
if (hresult != S_OK || NULL == pBuffer)
return false;
#define CLEAR_BUFFER_AND_RETURN(return_value)\
do{\
delete[] pBuffer;\
return return_value;\
}while(false)
if (unSize < 13)
CLEAR_BUFFER_AND_RETURN(false);
size_t uPos = 6;
for (; uPos < 12; ++uPos)
{
if (0x08 == pBuffer[uPos] && 0x01 == pBuffer[uPos + 1])
{
--uPos;
break;
}
}
if (12 == uPos)
CLEAR_BUFFER_AND_RETURN(false);
uint64_t unHeaderLen;
if (!ReadUVar(pBuffer, unSize, uPos, unHeaderLen))
CLEAR_BUFFER_AND_RETURN(false);
const size_t uStartPos = uPos;
if (unHeaderLen < 8 || unSize < unHeaderLen + uStartPos)
CLEAR_BUFFER_AND_RETURN(false);
uPos += 2;
TIWAField oMessageField;
if (!ReadIWAField(pBuffer, uStartPos + unHeaderLen, uPos, oMessageField) || 2 != oMessageField.m_unWireType ||
2 != oMessageField.m_uIndex)
CLEAR_BUFFER_AND_RETURN(false);
size_t uSubPos = oMessageField.m_unStart;
TIWAField oField;
if (!ReadIWAField(pBuffer, oMessageField.m_unEnd, uSubPos, oField) || 0 != oField.m_unWireType ||
1 != oField.m_uIndex)
CLEAR_BUFFER_AND_RETURN(false);
switch (oField.m_oValue)
{
case 1:
{
uint32_t unDataLen = 0;
TIWAField oTempField;
if (ReadIWAField(pBuffer, oMessageField.m_unEnd, uSubPos, oTempField) &&
ReadIWAField(pBuffer, oMessageField.m_unEnd, uSubPos, oTempField) && 0 == oTempField.m_unWireType &&
3 == oTempField.m_uIndex)
unDataLen += oTempField.m_oValue;
size_t unTempPos = uStartPos + unHeaderLen;
// keynote: presentation ref in 2
// number: sheet ref in 1
if (ReadIWAField(pBuffer, uStartPos + unDataLen, unTempPos, oTempField) &&
(2 != oTempField.m_unWireType || 1 != oTempField.m_uIndex || oTempField.m_unEnd - oTempField.m_unStart < 2))
{
nType = AVS_OFFICESTUDIO_FILE_PRESENTATION_KEY;
CLEAR_BUFFER_AND_RETURN(true);
}
else if (ReadIWAField(pBuffer, uStartPos + unDataLen, unTempPos, oTempField) &&
(2 != oTempField.m_unWireType || 2 != oTempField.m_uIndex || oTempField.m_unEnd - oTempField.m_unStart < 2))
{
nType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_NUMBERS;
CLEAR_BUFFER_AND_RETURN(true);
}
break;
}
case 10000:
{
nType = AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES;
CLEAR_BUFFER_AND_RETURN(true);
}
}
CLEAR_BUFFER_AND_RETURN(false);
}
bool COfficeFileFormatChecker::isMacFormatFile(const std::wstring& fileName)
{
if (DetectIWorkFormat(fileName, nFileType))
return true;
std::wstring::size_type nExtPos = fileName.rfind(L'.');
std::wstring sExt = L"unknown";
if (nExtPos != std::wstring::npos)
sExt = fileName.substr(nExtPos);
std::transform(sExt.begin(), sExt.end(), sExt.begin(), tolower);
if (0 == sExt.compare(L".pages"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES;
else if (0 == sExt.compare(L".numbers"))
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_NUMBERS;
else if (0 == sExt.compare(L".key"))
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_KEY;
else
return false;
return true;
}
bool COfficeFileFormatChecker::isHwpxFile(const std::wstring &fileName)
{
COfficeUtils oOfficeUtils;
ULONG unSize = 0;
BYTE* pBuffer = NULL;
HRESULT hresult = oOfficeUtils.LoadFileFromArchive(fileName, L"mimetype", &pBuffer, unSize);
if (hresult != S_OK || NULL == pBuffer)
return false;
static constexpr const char* hwpxFormatLine = "application/hwp+zip";
bool bResult = false;
if (19 <= unSize && NULL != strstr((char *)pBuffer, hwpxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX;
bResult = true;
}
delete[] pBuffer;
return bResult;
}
bool COfficeFileFormatChecker::isOpenOfficeFormatFile(const std::wstring &fileName, std::wstring &documentID)
{
documentID.clear();
const char *odtFormatLine = "application/vnd.oasis.opendocument.text";
const char *odsFormatLine = "application/vnd.oasis.opendocument.spreadsheet";
const char *odpFormatLine = "application/vnd.oasis.opendocument.presentation";
const char* odgFormatLine = "application/vnd.oasis.opendocument.graphics";
const char *ottFormatLine = "application/vnd.oasis.opendocument.text-template";
const char *otsFormatLine = "application/vnd.oasis.opendocument.spreadsheet-template";
const char *otpFormatLine = "application/vnd.oasis.opendocument.presentation-template";
const char *epubFormatLine = "application/epub+zip";
const char *sxwFormatLine = "application/vnd.sun.xml.writer";
const char *sxcFormatLine = "application/vnd.sun.xml.calc";
const char *sxiFormatLine = "application/vnd.sun.xml.impress";
COfficeUtils OfficeUtils(NULL);
ULONG nBufferSize = 0;
BYTE *pBuffer = NULL;
HRESULT hresult = OfficeUtils.LoadFileFromArchive(fileName, L"documentID", &pBuffer, nBufferSize);
if (hresult == S_OK && pBuffer != NULL)
{
documentID = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8(pBuffer, nBufferSize);
delete[] pBuffer;
pBuffer = NULL;
}
nBufferSize = 0;
hresult = OfficeUtils.LoadFileFromArchive(fileName, L"mimetype", &pBuffer, nBufferSize);
if (hresult == S_OK && pBuffer != NULL)
{
if (48 <= nBufferSize && NULL != strstr((char *)pBuffer, ottFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_OTT;
}
else if (55 <= nBufferSize && NULL != strstr((char *)pBuffer, otsFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_OTS;
}
else if (56 <= nBufferSize && NULL != strstr((char *)pBuffer, otpFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_OTP;
}
else if ((39 <= nBufferSize && NULL != strstr((char *)pBuffer, odtFormatLine)) ||
(30 <= nBufferSize && NULL != strstr((char *)pBuffer, sxwFormatLine)))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT;
}
else if ((46 <= nBufferSize && NULL != strstr((char *)pBuffer, odsFormatLine)) ||
(28 <= nBufferSize && NULL != strstr((char *)pBuffer, sxcFormatLine)))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS;
}
else if ((47 <= nBufferSize && NULL != strstr((char *)pBuffer, odpFormatLine)) ||
(31 <= nBufferSize && NULL != strstr((char *)pBuffer, sxiFormatLine)))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP;
}
else if (43 <= nBufferSize && NULL != strstr((char*)pBuffer, odgFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_ODG;
}
else if (20 <= nBufferSize && NULL != strstr((char *)pBuffer, epubFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_EPUB;
}
delete[] pBuffer;
pBuffer = NULL;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
}
else
{
// если не записан тип смотрим манифест
nBufferSize = 0;
HRESULT hresult = OfficeUtils.LoadFileFromArchive(fileName, L"META-INF/manifest.xml", &pBuffer, nBufferSize);
if (hresult == S_OK && pBuffer != NULL)
{
std::string xml_string((char *)pBuffer, nBufferSize);
std::string::size_type nFindMem1 = xml_string.find(odtFormatLine);
std::string::size_type nFindMem2 = xml_string.find(odsFormatLine);
std::string::size_type nFindMem3 = xml_string.find(odpFormatLine);
if (nFindMem1 != std::string::npos && nFindMem2 != std::string::npos)
{
if (nFindMem1 < nFindMem2)
nFindMem2 = std::string::npos;
else
nFindMem1 = std::string::npos;
}
if (nFindMem1 != std::string::npos && nFindMem3 != std::string::npos)
{
if (nFindMem1 < nFindMem3)
nFindMem3 = std::string::npos;
else
nFindMem1 = std::string::npos;
}
if (nFindMem2 != std::string::npos && nFindMem3 != std::string::npos)
{
if (nFindMem2 < nFindMem3)
nFindMem3 = std::string::npos;
else
nFindMem2 = std::string::npos;
}
if (nFindMem1 != std::string::npos)
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT;
else if (nFindMem2 != std::string::npos)
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS;
else if (nFindMem3 != std::string::npos)
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP;
delete[] pBuffer;
pBuffer = NULL;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
}
}
return false;
}
bool COfficeFileFormatChecker::isOpenOfficeFlatFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (dwBytes < 78)
return false;
const char *odfFormatLine1 = "office:document";
const char *odfFormatLine2 = "xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\"";
std::string xml_string((char *)pBuffer, dwBytes);
if ((std::string::npos == xml_string.find(odfFormatLine1)) || (std::string::npos == xml_string.find(odfFormatLine2)))
{
return false;
}
const char *odtFormatLine = "application/vnd.oasis.opendocument.text";
const char *odsFormatLine = "application/vnd.oasis.opendocument.spreadsheet";
const char *odpFormatLine = "application/vnd.oasis.opendocument.presentation";
if (std::string::npos != xml_string.find(odtFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT_FLAT;
}
else if (std::string::npos != xml_string.find(odsFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS_FLAT;
}
else if (std::string::npos != xml_string.find(odpFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP_FLAT;
}
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
return false;
}
bool COfficeFileFormatChecker::isHwpmlFile(unsigned char *pBuffer, int dwBytes)
{
if (NULL == pBuffer || dwBytes < 8)
return false;
for (unsigned int unPos = 0; unPos < dwBytes - 8; ++unPos)
{
if ('<' != pBuffer[unPos])
continue;
if (dwBytes - unPos >= 15 && '!' == pBuffer[unPos + 1] &&
0 == memcmp(&pBuffer[unPos], "<!DOCTYPE HWPML", 15))
return true;
if (dwBytes - unPos >= 6 && 0 == memcmp(&pBuffer[unPos], "<HWPML", 6))
return true;
}
return false;
}
bool COfficeFileFormatChecker::isOOXFlatFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (dwBytes < 8)
return false;
std::string xml_string;
if (pBuffer[0] == 0xff && pBuffer[1] == 0xfe)
{ // utf-16- little
std::wstring xml_wstring = NSFile::CUtf8Converter::GetWStringFromUTF16((unsigned short *)pBuffer, dwBytes / 2);
xml_string = NSFile::CUtf8Converter::GetUtf8StringFromUnicode(xml_wstring);
}
else if (pBuffer[0] == 0xfe && pBuffer[1] == 0xff)
{ // utf-16- big
// swap bytes
DWORD file_size_round = (dwBytes / 2) * 2;
for (DWORD i = 0; i < file_size_round; i += 2)
{
char v = pBuffer[i];
pBuffer[i] = pBuffer[i + 1];
pBuffer[i + 1] = v;
}
std::wstring xml_wstring = NSFile::CUtf8Converter::GetWStringFromUTF16((unsigned short *)pBuffer, dwBytes / 2);
xml_string = NSFile::CUtf8Converter::GetUtf8StringFromUnicode(xml_wstring);
}
else
xml_string = std::string((char *)pBuffer, dwBytes);
const char *docxFormatLine = "xmlns:w=\"http://schemas.microsoft.com/office/word/2003/wordml\"";
const char *xlsxFormatLine = "xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\"";
const char *docxPackage = "progid=\"Word.Document\"";
const char *xlsxPackage = "progid=\"Excel.Sheet\"";
const char *pptxPackage = "progid=\"PowerPoint.Show\"";
const char *packageFormatLine = "xmlns:pkg=\"http://schemas.microsoft.com/office/2006/xmlPackage\"";
const char* workbookFormatLine = "<Workbook";
const char* htmlFormatLine = "<html";
if (std::string::npos != xml_string.find(docxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX_FLAT;
}
else if (std::string::npos != xml_string.find(xlsxFormatLine) && ( std::string::npos != xml_string.find(workbookFormatLine) ||
std::string::npos == xml_string.find(htmlFormatLine)))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX_FLAT;
}
else if (std::string::npos != xml_string.find(packageFormatLine))
{
if (std::string::npos != xml_string.find(docxPackage))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX_PACKAGE;
}
else if (std::string::npos != xml_string.find(xlsxPackage))
{
nFileType = AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX_PACKAGE;
}
else if (std::string::npos != xml_string.find(pptxPackage))
{
nFileType = AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX_PACKAGE;
}
}
return nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN;
}
std::wstring COfficeFileFormatChecker::GetExtensionByType(int type)
{
switch (type)
{
case AVS_OFFICESTUDIO_FILE_DOCUMENT_OFORM:
return L".oform";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCXF:
return L".docxf";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX:
return L".docx";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCM:
return L".docm";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTX:
return L".dotx";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTM:
return L".dotm";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC:
case AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC_FLAT:
return L".doc";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT:
return L".odt";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_RTF:
return L".rtf";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_TXT:
return L".txt";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML:
return L".html";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_MHT:
return L".mht";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_EPUB:
return L".epub";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_FB2:
return L".fb2";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_MOBI:
return L".mobi";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT_FLAT:
return L".fodt";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_OTT:
return L".ott";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES:
return L".pages";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP:
return L".hwp";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX:
return L".hwpx";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML:
return L".hml";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_MD:
return L".md";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX:
return L".pptx";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTM:
return L".pptm";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_PPT:
return L".ppt";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP:
return L".odp";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_PPSX:
return L".ppsx";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_PPSM:
return L".ppsm";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_POTX:
return L".potx";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_POTM:
return L".potm";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP_FLAT:
return L".fodp";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_OTP:
return L".otp";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_ODG:
return L".odg";
case AVS_OFFICESTUDIO_FILE_PRESENTATION_KEY:
return L".key";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX:
return L".xlsx";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSM:
return L".xlsm";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTX:
return L".xltx";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTM:
return L".xltm";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSB:
return L".xlsb";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLS:
return L".xls";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS:
return L".ods";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_CSV:
return L".csv";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS_FLAT:
return L".fods";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_OTS:
return L".ots";
case AVS_OFFICESTUDIO_FILE_SPREADSHEET_NUMBERS:
return L".numbers";
case AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_PDF:
case AVS_OFFICESTUDIO_FILE_DOCUMENT_OFORM_PDF:
return L".pdf";
case AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_SWF:
return L".swf";
case AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_DJVU:
return L".djvu";
case AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_XPS:
return L".xps";
case AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_OFD:
return L".ofd";
case AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_SVG:
return L".svg";
case AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_HTMLR:
return L".htmlr";
case AVS_OFFICESTUDIO_FILE_OTHER_HTMLZIP:
return L".zip";
case AVS_OFFICESTUDIO_FILE_OTHER_JSON:
return L".json";
case AVS_OFFICESTUDIO_FILE_IMAGE:
case AVS_OFFICESTUDIO_FILE_IMAGE_JPG:
return L".jpg";
case AVS_OFFICESTUDIO_FILE_IMAGE_TIFF:
return L".tiff";
case AVS_OFFICESTUDIO_FILE_IMAGE_TGA:
return L".tga";
case AVS_OFFICESTUDIO_FILE_IMAGE_GIF:
return L".gif";
case AVS_OFFICESTUDIO_FILE_IMAGE_PNG:
return L".png";
case AVS_OFFICESTUDIO_FILE_IMAGE_EMF:
return L".emf";
case AVS_OFFICESTUDIO_FILE_IMAGE_WMF:
return L".wmf";
case AVS_OFFICESTUDIO_FILE_IMAGE_BMP:
return L".bmp";
case AVS_OFFICESTUDIO_FILE_IMAGE_CR2:
return L".cr2";
case AVS_OFFICESTUDIO_FILE_IMAGE_PCX:
return L".pcx";
case AVS_OFFICESTUDIO_FILE_IMAGE_RAS:
return L".ras";
case AVS_OFFICESTUDIO_FILE_IMAGE_PSD:
return L".psd";
case AVS_OFFICESTUDIO_FILE_IMAGE_ICO:
return L".ico";
case AVS_OFFICESTUDIO_FILE_CANVAS_WORD:
case AVS_OFFICESTUDIO_FILE_CANVAS_SPREADSHEET:
case AVS_OFFICESTUDIO_FILE_CANVAS_PRESENTATION:
case AVS_OFFICESTUDIO_FILE_CANVAS_DRAW:
return L".bin";
case AVS_OFFICESTUDIO_FILE_OTHER_OLD_DOCUMENT:
case AVS_OFFICESTUDIO_FILE_TEAMLAB_DOCY:
return L".doct";
case AVS_OFFICESTUDIO_FILE_TEAMLAB_XLSY:
return L".xlst";
case AVS_OFFICESTUDIO_FILE_TEAMLAB_VSDY:
return L".vsdt";
case AVS_OFFICESTUDIO_FILE_OTHER_OLD_PRESENTATION:
case AVS_OFFICESTUDIO_FILE_OTHER_OLD_DRAWING:
case AVS_OFFICESTUDIO_FILE_TEAMLAB_PPTY:
return L".pptt";
case AVS_OFFICESTUDIO_FILE_DRAW_VSDX:
return L".vsdx";
case AVS_OFFICESTUDIO_FILE_DRAW_VSSX:
return L".vssx";
case AVS_OFFICESTUDIO_FILE_DRAW_VSTX:
return L".vstx";
case AVS_OFFICESTUDIO_FILE_DRAW_VSDM:
return L".vsdm";
case AVS_OFFICESTUDIO_FILE_DRAW_VSSM:
return L".vssm";
case AVS_OFFICESTUDIO_FILE_DRAW_VSTM:
return L".vstm";
}
return L"";
}
int COfficeFileFormatChecker::GetFormatByExtension(const std::wstring &sExt)
{
std::wstring ext;
ext.resize(sExt.size());
std::transform(sExt.begin(), sExt.end(), ext.begin(), tolower);
if (L".docx" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCX;
if (L".oform" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_OFORM;
if (L".docxf" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCXF;
if (L".docm" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCM;
if (L".dotx" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTX;
if (L".dotm" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTM;
if (L".doc" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOC;
if (L".odt" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT;
if (L".rtf" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_RTF;
if (L".txt" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_TXT;
if (L".html" == ext || L".htm" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HTML;
if (L".mht" == ext || L".mhtml" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_MHT;
if (L".epub" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_EPUB;
if (L".fb2" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_FB2;
if (L".mobi" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_MOBI;
if (L".docm" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOCM;
if (L".dotx" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTX;
if (L".dotm" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_DOTM;
if (L".fodt" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_ODT_FLAT;
if (L".ott" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_OTT;
if (L".pages" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES;
if (L".hwp" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP;
if (L".hwpx" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX;
if (L".hml" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
if (L".md" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_MD;
if (L".pptx" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTX;
if (L".pptm" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_PPTM;
if (L".ppsm" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_PPSM;
if (L".ppt" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_PPT;
if (L".odp" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP;
if (L".ppsx" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_PPSX;
if (L".potx" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_POTX;
if (L".potm" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_POTM;
if (L".fodp" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_ODP_FLAT;
if (L".otp" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_OTP;
if (L".odg" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_ODG;
if (L".key" == ext)
return AVS_OFFICESTUDIO_FILE_PRESENTATION_KEY;
if (L".xlsx" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSX;
if (L".xlsm" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSM;
if (L".xltx" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTX;
if (L".xltm" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTM;
if (L".xlsm" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSM;
if (L".xltx" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTX;
if (L".xltm" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLTM;
if (L".xlsb" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLSB;
if (L".xls" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_XLS;
if (L".csv" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_CSV;
if (L".fods" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS_FLAT;
if (L".ots" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_OTS;
if (L".ods" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_ODS;
if (L".numbers" == ext)
return AVS_OFFICESTUDIO_FILE_SPREADSHEET_NUMBERS;
if (L".ooxml" == ext)
return AVS_OFFICESTUDIO_FILE_OTHER_OOXML;
if (L".odf" == ext)
return AVS_OFFICESTUDIO_FILE_OTHER_ODF;
if (L".pdf" == ext)
return AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_PDF;
if (L".djvu" == ext)
return AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_DJVU;
if (L".xps" == ext || L".oxps" == ext)
return AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_XPS;
if (L"ofd" == ext)
return AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_OFD;
if (L".jpg" == ext || L".jpeg" == ext || L".jpe" == ext || L".jfif" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_JPG;
if (L".tiff" == ext || L".tif" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_TIFF;
if (L".tga" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_TGA;
if (L".gif" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_GIF;
if (L".png" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_PNG;
if (L".emf" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_EMF;
if (L".wmf" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_WMF;
if (L".bmp" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_BMP;
if (L".cr2" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_CR2;
if (L".pcx" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_PCX;
if (L".ras" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_RAS;
if (L".psd" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_PSD;
if (L".ico" == ext)
return AVS_OFFICESTUDIO_FILE_IMAGE_ICO;
if (L".doct" == ext)
return AVS_OFFICESTUDIO_FILE_TEAMLAB_DOCY;
if (L".xlst" == ext)
return AVS_OFFICESTUDIO_FILE_TEAMLAB_XLSY;
if (L".pptt" == ext)
return AVS_OFFICESTUDIO_FILE_TEAMLAB_PPTY;
if (L".vsdt" == ext)
return AVS_OFFICESTUDIO_FILE_TEAMLAB_VSDY;
if (L".vsdx" == ext)
return AVS_OFFICESTUDIO_FILE_DRAW_VSDX;
if (L".vssx" == ext)
return AVS_OFFICESTUDIO_FILE_DRAW_VSSX;
if (L".vstx" == ext)
return AVS_OFFICESTUDIO_FILE_DRAW_VSTX;
if (L".vsdm" == ext)
return AVS_OFFICESTUDIO_FILE_DRAW_VSDM;
if (L".vssm" == ext)
return AVS_OFFICESTUDIO_FILE_DRAW_VSSM;
if (L".vstm" == ext)
return AVS_OFFICESTUDIO_FILE_DRAW_VSTM;
return 0;
}
std::wstring COfficeFileFormatChecker::GetFormatExtension(const std::wstring &fileName)
{
if (isOfficeFile(fileName))
return GetExtensionByType(nFileType);
return L"";
}
bool COfficeFileFormatChecker::isDjvuFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
if ((8 <= dwBytes) &&
(0x41 == pBuffer[0] && 0x54 == pBuffer[1] && 0x26 == pBuffer[2] && 0x54 == pBuffer[3] && 0x46 == pBuffer[4] && 0x4f == pBuffer[5] && 0x52 == pBuffer[6] && 0x4d == pBuffer[7]))
return true;
return false;
}
bool COfficeFileFormatChecker::isMobiFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL)
return false;
if ((68 <= dwBytes) &&
(('B' == pBuffer[60] && 'O' == pBuffer[61] && 'O' == pBuffer[62] && 'K' == pBuffer[63] && 'M' == pBuffer[64] && 'O' == pBuffer[65] && 'B' == pBuffer[66] && 'I' == pBuffer[67]) ||
('T' == pBuffer[60] && 'E' == pBuffer[61] && 'X' == pBuffer[62] && 't' == pBuffer[63] && 'R' == pBuffer[64] && 'E' == pBuffer[65] && 'A' == pBuffer[66] && 'd' == pBuffer[67])))
return true;
return false;
}
bool COfficeFileFormatChecker::isFB2FormatFile(unsigned char *pBuffer, int dwBytes)
{
if (pBuffer == NULL || dwBytes < 11)
return false;
bool tagOpen = false;
// FB2 File is XML-file with rootElement - FictionBook
for (int i = 0; i < dwBytes - 11 && i < 100; i++)
{
if (0x3C == pBuffer[i])
tagOpen = true;
else if (0x3E == pBuffer[i])
tagOpen = false;
else if (
tagOpen && 0x46 == pBuffer[i] && 0x69 == pBuffer[i + 1] && 0x63 == pBuffer[i + 2] && 0x74 == pBuffer[i + 3] && 0x69 == pBuffer[i + 4] && 0x6F == pBuffer[i + 5] && 0x6E == pBuffer[i + 6] &&
0x42 == pBuffer[i + 7] && 0x6F == pBuffer[i + 8] && 0x6F == pBuffer[i + 9] && 0x6B == pBuffer[i + 10])
return true;
}
return false;
}
bool COfficeFileFormatChecker::isXpsFile(const std::wstring &fileName)
{
COfficeUtils OfficeUtils(NULL);
ULONG nBufferSize = 0;
BYTE *pBuffer = NULL;
HRESULT hresult = OfficeUtils.LoadFileFromArchive(fileName, L"_rels/.rels", &pBuffer, nBufferSize);
if (hresult == S_OK && pBuffer != NULL)
{
// http://schemas.microsoft.com/xps/2005/06/fixedrepresentation
// http://schemas.openxps.org/oxps/v1.0/fixedrepresentation
if ((19 <= nBufferSize && NULL != strstr((char *)pBuffer, "fixedrepresentation") && (NULL != strstr((char *)pBuffer, "/xps/")) ||
(6 <= nBufferSize && NULL != strstr((char *)pBuffer, "/oxps/"))))
{
nFileType = AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_XPS;
}
delete[] pBuffer;
pBuffer = NULL;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
}
else
{
HRESULT hresult = OfficeUtils.LoadFileFromArchive(fileName, L"_rels/.rels/[0].piece", &pBuffer, nBufferSize);
if (hresult == S_OK && pBuffer != NULL)
{
nFileType = AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_XPS;
return true;
}
}
return false;
}
bool COfficeFileFormatChecker::isOFDFile(const std::wstring& fileName)
{
COfficeUtils OfficeUtils(NULL);
ULONG nBufferSize = 0;
BYTE *pBuffer = NULL;
HRESULT hresult = OfficeUtils.LoadFileFromArchive(fileName, L"OFD.xml", &pBuffer, nBufferSize);
if (hresult == S_OK && pBuffer != NULL)
{
if (19 <= nBufferSize && NULL != strstr((char *)pBuffer, "ofd:OFD"))
nFileType = AVS_OFFICESTUDIO_FILE_CROSSPLATFORM_OFD;
delete[] pBuffer;
pBuffer = NULL;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
}
return false;
}