Files
DocumentServer-v-9.2.0/core/MsBinaryFile/Common/Base/FormatUtils.cpp
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

398 lines
10 KiB
C++

/*
* (c) Copyright Ascensio System SIA 2010-2023
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish
* street, Riga, Latvia, EU, LV-1050.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "FormatUtils.h"
namespace DocFileFormat
{
#define WriteWCharPadding(STLCollection, padding) \
for (int i = 0; i < padding; ++i)\
{\
STLCollection->push_back((wchar_t)0);\
}
static void WriteUtf16ToWChar(int code, std::vector<wchar_t>* STLCollection, int padding)
{
int used = 0;
if (code < 0x10000)
{
STLCollection->push_back((wchar_t)code);
used = 1;
}
else
{
code -= 0x10000;
STLCollection->push_back((wchar_t)(0xD800 | ((code >> 10) & 0x03FF)));
STLCollection->push_back((wchar_t)(0xDC00 | (code & 0x03FF)));
used = 2;
}
WriteWCharPadding(STLCollection, padding - used);
}
static bool GetSTLCollectionFromUtf8(std::vector<wchar_t>* STLCollection, unsigned char* pBuffer, int lCount)
{
if ((STLCollection == NULL) || (pBuffer == NULL))
{
return false;
}
int lIndex = 0;
if (sizeof(wchar_t) == 2)//utf8 -> utf16
{
while (lIndex < lCount)
{
BYTE byteMain = pBuffer[lIndex];
if (0x00 == (byteMain & 0x80))
{
// 1 byte
STLCollection->push_back((WCHAR)byteMain);
++lIndex;
}
else if (0x00 == (byteMain & 0x20))
{
// 2 byte
int val = 0;
if ((lIndex + 1) < lCount)
{
val = (int)(((byteMain & 0x1F) << 6) |
(pBuffer[lIndex + 1] & 0x3F));
}
STLCollection->push_back((wchar_t)val);
STLCollection->push_back((wchar_t)0);
lIndex += 2;
}
else if (0x00 == (byteMain & 0x10))
{
// 3 byte
int val = 0;
if ((lIndex + 2) < lCount)
{
val = (int)(((byteMain & 0x0F) << 12) |
((pBuffer[lIndex + 1] & 0x3F) << 6) |
(pBuffer[lIndex + 2] & 0x3F));
}
WriteUtf16ToWChar(val, STLCollection, 3);
lIndex += 3;
}
else if (0x00 == (byteMain & 0x0F))
{
// 4 byte
int val = 0;
if ((lIndex + 3) < lCount)
{
val = (int)(((byteMain & 0x07) << 18) |
((pBuffer[lIndex + 1] & 0x3F) << 12) |
((pBuffer[lIndex + 2] & 0x3F) << 6) |
(pBuffer[lIndex + 3] & 0x3F));
}
WriteUtf16ToWChar(val, STLCollection, 4);
lIndex += 4;
}
else if (0x00 == (byteMain & 0x08))
{
// 4 byte
int val = 0;
if ((lIndex + 3) < lCount)
{
val = (int)(((byteMain & 0x07) << 18) |
((pBuffer[lIndex + 1] & 0x3F) << 12) |
((pBuffer[lIndex + 2] & 0x3F) << 6) |
(pBuffer[lIndex + 3] & 0x3F));
}
WriteUtf16ToWChar(val, STLCollection, 4);
lIndex += 4;
}
else if (0x00 == (byteMain & 0x04))
{
// 5 byte
int val = 0;
if ((lIndex + 4) < lCount)
{
val = (int)(((byteMain & 0x03) << 24) |
((pBuffer[lIndex + 1] & 0x3F) << 18) |
((pBuffer[lIndex + 2] & 0x3F) << 12) |
((pBuffer[lIndex + 3] & 0x3F) << 6) |
(pBuffer[lIndex + 4] & 0x3F));
}
WriteUtf16ToWChar(val, STLCollection, 5);
lIndex += 5;
}
else
{
// 6 byte
int val = 0;
if ((lIndex + 5) < lCount)
{
val = (int)(((byteMain & 0x01) << 30) |
((pBuffer[lIndex + 1] & 0x3F) << 24) |
((pBuffer[lIndex + 2] & 0x3F) << 18) |
((pBuffer[lIndex + 3] & 0x3F) << 12) |
((pBuffer[lIndex + 4] & 0x3F) << 6) |
(pBuffer[lIndex + 5] & 0x3F));
}
WriteUtf16ToWChar(val, STLCollection, 6);
lIndex += 6;
}
}
}
else //utf8 -> utf32
{
while (lIndex < lCount)
{
BYTE byteMain = pBuffer[lIndex];
if (0x00 == (byteMain & 0x80))
{
// 1 byte
STLCollection->push_back((WCHAR)byteMain);
++lIndex;
}
else if (0x00 == (byteMain & 0x20))
{
// 2 byte
int val = 0;
if ((lIndex + 1) < lCount)
{
val = (int)(((byteMain & 0x1F) << 6) |
(pBuffer[lIndex + 1] & 0x3F));
}
STLCollection->push_back((WCHAR)val);
STLCollection->push_back((WCHAR)0);
lIndex += 2;
}
else if (0x00 == (byteMain & 0x10))
{
// 3 byte
int val = 0;
if ((lIndex + 2) < lCount)
{
val = (int)(((byteMain & 0x0F) << 12) |
((pBuffer[lIndex + 1] & 0x3F) << 6) |
(pBuffer[lIndex + 2] & 0x3F));
}
STLCollection->push_back((WCHAR)val);
WriteWCharPadding(STLCollection, 2);
lIndex += 3;
}
else if (0x00 == (byteMain & 0x0F))
{
// 4 byte
int val = 0;
if ((lIndex + 3) < lCount)
{
val = (int)(((byteMain & 0x07) << 18) |
((pBuffer[lIndex + 1] & 0x3F) << 12) |
((pBuffer[lIndex + 2] & 0x3F) << 6) |
(pBuffer[lIndex + 3] & 0x3F));
}
STLCollection->push_back((WCHAR)val);
WriteWCharPadding(STLCollection, 3);
lIndex += 4;
}
else if (0x00 == (byteMain & 0x08))
{
// 4 byte
int val = 0;
if ((lIndex + 3) < lCount)
{
val = (int)(((byteMain & 0x07) << 18) |
((pBuffer[lIndex + 1] & 0x3F) << 12) |
((pBuffer[lIndex + 2] & 0x3F) << 6) |
(pBuffer[lIndex + 3] & 0x3F));
}
STLCollection->push_back((WCHAR)val);
WriteWCharPadding(STLCollection, 3);
lIndex += 4;
}
else if (0x00 == (byteMain & 0x04))
{
// 5 byte
int val = 0;
if ((lIndex + 4) < lCount)
{
val = (int)(((byteMain & 0x03) << 24) |
((pBuffer[lIndex + 1] & 0x3F) << 18) |
((pBuffer[lIndex + 2] & 0x3F) << 12) |
((pBuffer[lIndex + 3] & 0x3F) << 6) |
(pBuffer[lIndex + 4] & 0x3F));
}
STLCollection->push_back((WCHAR)val);
WriteWCharPadding(STLCollection, 4);
lIndex += 5;
}
else
{
// 6 byte
int val = 0;
if ((lIndex + 5) < lCount)
{
val = (int)(((byteMain & 0x01) << 30) |
((pBuffer[lIndex + 1] & 0x3F) << 24) |
((pBuffer[lIndex + 2] & 0x3F) << 18) |
((pBuffer[lIndex + 3] & 0x3F) << 12) |
((pBuffer[lIndex + 4] & 0x3F) << 6) |
(pBuffer[lIndex + 5] & 0x3F));
}
STLCollection->push_back((WCHAR)val);
WriteWCharPadding(STLCollection, 5);
lIndex += 6;
}
}
}
return true;
}
}
bool DocFileFormat::FormatUtils::GetWStringFromBytes(std::wstring& string, unsigned char* bytes, int size, int code_page)
{
if (bytes == NULL)
{
return false;
}
if (code_page == ENCODING_UTF8)
{
string = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8(bytes, size);
return true;
}
else if (code_page == ENCODING_UTF16)
{
string = NSFile::CUtf8Converter::GetWStringFromUTF16((unsigned short*)bytes, size / 2);
}
else if (code_page == ENCODING_WINDOWS_1250)
{
wchar_t wch = 0;
int i = 0;
while (i < size)
{
wch = MapByteToWChar(bytes[i++]);
string += (wch);
}
}
else
{
std::string sCodePage;
std::map<int, std::string>::const_iterator pFind = NSUnicodeConverter::mapEncodingsICU.find(code_page);
if (pFind != NSUnicodeConverter::mapEncodingsICU.end())
{
sCodePage = pFind->second;
}
if (sCodePage.empty())
sCodePage = "CP1250"/* + std::to_string(code_page)*/;
NSUnicodeConverter::CUnicodeConverter oConverter;
string = oConverter.toUnicode((char*)bytes, (unsigned int)size, sCodePage.c_str());
}
return true;
}
bool DocFileFormat::FormatUtils::GetSTLCollectionFromBytes(std::vector<wchar_t>* STLCollection, unsigned char* bytes, int size, int code_page)
{
if (bytes == NULL)
{
return false;
}
if (code_page == ENCODING_UTF8)
{
return GetSTLCollectionFromUtf8(STLCollection, bytes, size);
}
else if (code_page == ENCODING_UTF16)
{
int i = 0;
#if !defined(_WIN32) && !defined(_WIN64)
int nCount = size / 2;
unsigned short* pShort = (unsigned short*)bytes;
int nCurrent = 0;
while (nCurrent < nCount)
{
if (*pShort < 0xD800 || *pShort > 0xDBFF)
{
STLCollection->push_back((wchar_t)(*pShort));
++pShort;
++nCurrent;
}
else
{
STLCollection->push_back((wchar_t)(((((pShort[0] - 0xD800) & 0x03FF) << 10) | ((pShort[1] - 0xDC00) & 0x03FF)) + 0x10000));
STLCollection->push_back((wchar_t)0);
pShort += 2;
nCurrent += 2;
}
}
#else
while (i < size)
{
STLCollection->push_back(FormatUtils::BytesToUInt16(bytes, i, size));
i += 2;
}
#endif
}
else if (code_page == ENCODING_WINDOWS_1250)
{
wchar_t wch = 0;
int i = 0;
while (i < size)
{
wch = MapByteToWChar(bytes[i++]);
STLCollection->push_back(wch);
}
}
else
{
std::string sCodePage;
std::map<int, std::string>::const_iterator pFind = NSUnicodeConverter::mapEncodingsICU.find(code_page);
if (pFind != NSUnicodeConverter::mapEncodingsICU.end())
{
sCodePage = pFind->second;
}
if (sCodePage.empty())
sCodePage = "CP1250"/* + std::to_string(code_page)*/;
NSUnicodeConverter::CUnicodeConverter oConverter;
std::wstring unicode_string = oConverter.toUnicode((char*)bytes, (unsigned int)size, sCodePage.c_str());
for (size_t i = 0; i < unicode_string.size(); i++)
{
STLCollection->push_back(unicode_string[i]);
}
}
return true;
}