Files
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

505 lines
17 KiB
C++

/*
* (c) Copyright Ascensio System SIA 2010-2023
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish
* street, Riga, Latvia, EU, LV-1050.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "CPEncodings/CodePage.h"
#include "StringExt.h"
namespace NSStringExt
{
#define NSSTRING_COMMON_CP(UnicodeMapCP, lCount, pData) \
for (long i = 0; i < lCount; ++i)\
{\
unsigned char unChar = (unsigned char)pData[i];\
if (unChar < MSCP_FIRST_CHAR || unChar > MSCP_LAST_CHAR)\
pUnicode[i] = (wchar_t)unChar;\
else\
pUnicode[i] = (wchar_t)(UnicodeMapCP[unChar - MSCP_FIRST_CHAR]);\
}
// end define
void NSSTRING_WITHLEADBYTE_CP(wchar_t** ppUnicode, unsigned short LEAD_CHAR, const unsigned short* UnicodeMapCP, const TCodePagePair* UnicodeMapWithLeadByte, long lCount, const unsigned char* pData)
{
int nLeadByte = -1;
int nUnicodePos = 0;
for (long i = 0; i < lCount; ++i)
{
unsigned char unCode = (unsigned char)pData[i];
unsigned short ushUnicode = UnicodeMapCP[unCode];
if (-1 == nLeadByte)
{
if (LEAD_CHAR != ushUnicode)
{
(*ppUnicode)[nUnicodePos++] = ushUnicode;
nLeadByte = -1;
}
else
{
nLeadByte = unCode;
}
}
else
{
unsigned short ushCode = (nLeadByte << 8) | unCode;
TCodePagePair *pPair = (TCodePagePair*)UnicodeMapWithLeadByte;
while (0xFFFF != pPair->ushCode)
{
if (ushCode == pPair->ushCode)
{
(*ppUnicode)[nUnicodePos++] = pPair->ushUnicode;
break;
}
pPair++;
}
nLeadByte = -1;
}
}
(*ppUnicode)[nUnicodePos++] = 0x0000;
}
std::wstring CConverter::GetUnicodeFromSingleByteString(const unsigned char* pData, long lCount, ESingleByteEncoding eType)
{
wchar_t* pUnicode = new wchar_t[lCount + 1];
if (!pUnicode)
return std::wstring(L"");
switch (eType)
{
default:
case SINGLE_BYTE_ENCODING_DEFAULT:
{
for (long i = 0; i < lCount; ++i)
pUnicode[i] = (wchar_t)(unsigned char)pData[i];
break;
}
case SINGLE_BYTE_ENCODING_SYMBOL:
{
// Добавляем 0xF000 к кодам всех символов
for (long i = 0; i < lCount; ++i)
{
pUnicode[i] = (wchar_t)(0xF000 | (unsigned char)pData[i]);
}
break;
}
case SINGLE_BYTE_ENCODING_CP866: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP866, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP874: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP874, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1250: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1250, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1251: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1251, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1252: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1252, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1253: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1253, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1254: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1254, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1255: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1255, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1256: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1256, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1257: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1257, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1258: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1258, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP932: NSSTRING_WITHLEADBYTE_CP(&pUnicode, MSCP932_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP932, c_aoUnicodeMapCP932WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP936: NSSTRING_WITHLEADBYTE_CP(&pUnicode, MSCP936_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP936, NSStringExt::c_aoUnicodeMapCP936WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP949: NSSTRING_WITHLEADBYTE_CP(&pUnicode, MSCP949_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP949, NSStringExt::c_aoUnicodeMapCP949WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP950: NSSTRING_WITHLEADBYTE_CP(&pUnicode, MSCP950_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP950, NSStringExt::c_aoUnicodeMapCP950WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1361:NSSTRING_WITHLEADBYTE_CP(&pUnicode, MSCP1361_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP1361, NSStringExt::c_aoUnicodeMapCP1361WithLeadByte, lCount, pData); break;
}
pUnicode[lCount] = 0;
std::wstring s(pUnicode);
if (pUnicode)
delete[] pUnicode;
return s;
}
std::wstring CConverter::GetUnicodeFromUTF16(const unsigned short* pData, long lCount)
{
if (0 == lCount)
return L"";
if (2 == sizeof(wchar_t))
return std::wstring((wchar_t*)pData, lCount);
wchar_t* pUnicode = new wchar_t[lCount + 1];
if (!pUnicode)
return L"";
wchar_t* pCur = pUnicode;
int nCurPos = 0;
while (nCurPos < lCount)
{
int nLeading = pData[nCurPos]; nCurPos++;
if (nLeading < 0xD800 || nLeading > 0xDFFF)
{
*pCur = (wchar_t)nLeading;
pCur++;
}
else
{
if (nCurPos >= lCount)
break;
int nTrailing = pData[nCurPos]; nCurPos++;
if (nTrailing >= 0xDC00 && nTrailing <= 0xDFFF)
{
*pCur = (wchar_t)(((nLeading & 0x03FF) << 10) | (nTrailing & 0x03FF));
*pCur += (wchar_t) (0x10000);
pCur++;
}
}
}
if (0 == pCur - pUnicode)
return L"";
std::wstring sRet(pUnicode, pCur - pUnicode);
if (pUnicode)
delete[] pUnicode;
return sRet;
}
std::wstring CConverter::GetUnicodeFromUTF32(const unsigned int* pData, long lCount)
{
if (0 == lCount)
return L"";
if (4 == sizeof(wchar_t))
return std::wstring((wchar_t*)pData, lCount);
wchar_t* pUnicode = new wchar_t[2 * lCount + 1];
if (!pUnicode)
return L"";
wchar_t* pCur = pUnicode;
memset(pUnicode, 0x00, sizeof(wchar_t) * (2 * lCount + 1));
for (long lIndex = 0; lIndex < lCount; lIndex++)
{
unsigned int unUnicode = pData[lIndex];
if (unUnicode < 0x10000)
{
*pCur = unUnicode;
pCur++;
}
else
{
unUnicode = unUnicode - 0x10000;
*pCur = 0xD800 | (unUnicode >> 10);
pCur++;
*pCur = 0xDC00 | (unUnicode & 0x3FF);
pCur++;
}
}
if (0 == pCur - pUnicode)
return L"";
std::wstring sRet(pUnicode, pCur - pUnicode);
if (pUnicode)
delete[] pUnicode;
return sRet;
}
void CConverter::GetUtf8FromUTF32(const unsigned int* pUnicodes, long lCount, unsigned char*& pOutputData, long& lOutputCount)
{
pOutputData = new unsigned char[6 * lCount + 3 + 1];
unsigned char* pCodesCur = pOutputData;
for (int i = 0; i < lCount; i++)
{
unsigned int code = *pUnicodes++;
if (code < 0x80)
{
*pCodesCur++ = (unsigned char)code;
}
else if (code < 0x0800)
{
*pCodesCur++ = 0xC0 | (code >> 6);
*pCodesCur++ = 0x80 | (code & 0x3F);
}
else if (code < 0x10000)
{
*pCodesCur++ = 0xE0 | (code >> 12);
*pCodesCur++ = 0x80 | (code >> 6 & 0x3F);
*pCodesCur++ = 0x80 | (code & 0x3F);
}
else if (code < 0x1FFFFF)
{
*pCodesCur++ = 0xF0 | (code >> 18);
*pCodesCur++ = 0x80 | (code >> 12 & 0x3F);
*pCodesCur++ = 0x80 | (code >> 6 & 0x3F);
*pCodesCur++ = 0x80 | (code & 0x3F);
}
else if (code < 0x3FFFFFF)
{
*pCodesCur++ = 0xF8 | (code >> 24);
*pCodesCur++ = 0x80 | (code >> 18 & 0x3F);
*pCodesCur++ = 0x80 | (code >> 12 & 0x3F);
*pCodesCur++ = 0x80 | (code >> 6 & 0x3F);
*pCodesCur++ = 0x80 | (code & 0x3F);
}
else if (code < 0x7FFFFFFF)
{
*pCodesCur++ = 0xFC | (code >> 30);
*pCodesCur++ = 0x80 | (code >> 24 & 0x3F);
*pCodesCur++ = 0x80 | (code >> 18 & 0x3F);
*pCodesCur++ = 0x80 | (code >> 12 & 0x3F);
*pCodesCur++ = 0x80 | (code >> 6 & 0x3F);
*pCodesCur++ = 0x80 | (code & 0x3F);
}
}
lOutputCount = (long)(pCodesCur - pOutputData);
*pCodesCur++ = 0;
}
std::string CConverter::GetUtf8FromUTF32(const unsigned int* pUnicodes, long lCount)
{
unsigned char* pData;
long lOutputCount = 0;
GetUtf8FromUTF32(pUnicodes, lCount, pData, lOutputCount);
std::string s((char*)pData, lOutputCount);
delete [] pData;
return s;
}
unsigned int* CConverter::GetUtf32FromUnicode(const std::wstring& wsUnicodeText, unsigned int& unLen)
{
if (wsUnicodeText.size() <= 0)
return NULL;
unsigned int* pUnicodes = new unsigned int[wsUnicodeText.size()];
if (!pUnicodes)
return NULL;
unsigned int* pOutput = pUnicodes;
unLen = 0;
if (2 == sizeof(wchar_t))
{
const wchar_t* wsEnd = wsUnicodeText.c_str() + wsUnicodeText.size();
wchar_t* wsInput = (wchar_t*)wsUnicodeText.c_str();
wchar_t wLeading, wTrailing;
unsigned int unCode;
while (wsInput < wsEnd)
{
wLeading = *wsInput++;
if (wLeading < 0xD800 || wLeading > 0xDFFF)
{
pUnicodes[unLen++] = (unsigned int)wLeading;
}
else if (wLeading >= 0xDC00)
{
// Такого не должно быть
continue;
}
else
{
unCode = (wLeading & 0x3FF) << 10;
wTrailing = *wsInput++;
if (wTrailing < 0xDC00 || wTrailing > 0xDFFF)
{
// Такого не должно быть
continue;
}
else
{
pUnicodes[unLen++] = (unCode | (wTrailing & 0x3FF) + 0x10000);
}
}
}
}
else
{
unLen = (unsigned int)wsUnicodeText.size();
for (unsigned int unIndex = 0; unIndex < unLen; unIndex++)
{
pUnicodes[unIndex] = (unsigned int)wsUnicodeText.at(unIndex);
}
}
return pUnicodes;
}
unsigned short* CConverter::GetUtf16FromUnicode(const std::wstring& wsUnicodeText, unsigned int& unLen, const bool& bIsLE)
{
unsigned int unTextLen = (unsigned int)wsUnicodeText.size();
if (unTextLen <= 0)
return NULL;
unsigned short* pUtf16 = NULL;
unLen = 0;
if (2 == sizeof(wchar_t))
{
pUtf16 = new unsigned short[unTextLen];
if (!pUtf16)
return NULL;
unLen = unTextLen;
for (unsigned int unIndex = 0; unIndex < unLen; unIndex++)
{
pUtf16[unIndex] = (unsigned short)wsUnicodeText.at(unIndex);
}
}
else
{
pUtf16 = new unsigned short[2 * unTextLen + 1];
if (!pUtf16)
return NULL;
unsigned short* pCur = pUtf16;
memset(pUtf16, 0x00, sizeof(unsigned short) * (2 * unTextLen + 1));
for (unsigned int lIndex = 0; lIndex < unTextLen; lIndex++)
{
unsigned int unUnicode = wsUnicodeText.at(lIndex);
if (unUnicode < 0x10000)
{
*pCur = unUnicode;
pCur++;
}
else
{
unUnicode = unUnicode - 0x10000;
*pCur = 0xD800 | (unUnicode >> 10);
pCur++;
*pCur = 0xDC00 | (unUnicode & 0x3FF);
pCur++;
}
}
unLen = (unsigned int)(pCur - pUtf16);
if (!unLen)
{
delete[] pUtf16;
return NULL;
}
}
if (!bIsLE)
{
unsigned char* pDataReverce = (unsigned char*)pUtf16;
unsigned int unLen2 = unLen << 1;
for (unsigned int i = 0; i < unLen2; i += 2)
{
unsigned char tmp = pDataReverce[i];
pDataReverce[i] = pDataReverce[i + 1];
pDataReverce[i + 1] = tmp;
}
}
return pUtf16;
}
class CStringUnicodeIterator_private
{
public:
const wchar_t* m_str;
size_t m_str_len;
size_t m_index;
public:
CStringUnicodeIterator_private(const std::wstring& str)
{
m_str = str.c_str();
m_str_len = str.length();
m_index = 0;
}
inline bool IsLeadingSurrogateChar(const unsigned int& nCharCode)
{
return (nCharCode >= 0xD800 && nCharCode <= 0xDFFF);
}
inline unsigned int DecodeSurrogateChar(const unsigned int& nLeadingChar, const unsigned int& nTrailingChar)
{
if (nLeadingChar < 0xDC00 && nTrailingChar >= 0xDC00 && nTrailingChar <= 0xDFFF)
return 0x10000 + ((nLeadingChar & 0x3FF) << 10) | (nTrailingChar & 0x3FF);
else
return 0;
}
};
CStringUnicodeIterator::CStringUnicodeIterator(const std::wstring& string)
{
m_internal = new CStringUnicodeIterator_private(string);
}
CStringUnicodeIterator::~CStringUnicodeIterator()
{
delete m_internal;
}
bool CStringUnicodeIterator::Check()
{
return (m_internal->m_index < m_internal->m_str_len) ? true : false;
}
void CStringUnicodeIterator::Next()
{
if (this->m_internal->m_index >= this->m_internal->m_str_len)
return;
if (2 != sizeof(wchar_t))
{
m_internal->m_index++;
}
else
{
if (!m_internal->IsLeadingSurrogateChar((unsigned int)m_internal->m_str[m_internal->m_index]))
{
m_internal->m_index++;
return;
}
m_internal->m_index += 2;
}
}
unsigned int CStringUnicodeIterator::Value()
{
if (m_internal->m_index >= m_internal->m_str_len)
return 0;
if (2 != sizeof(wchar_t))
{
return (unsigned int)m_internal->m_str[m_internal->m_index];
}
else
{
unsigned int nCharCode = (unsigned int)m_internal->m_str[m_internal->m_index];
if (!m_internal->IsLeadingSurrogateChar(nCharCode))
return nCharCode;
if (m_internal->m_index == (m_internal->m_str_len - 1))
return nCharCode;
unsigned int nTrailingChar = (unsigned int)m_internal->m_str[m_internal->m_index + 1];
return m_internal->DecodeSurrogateChar(nCharCode, nTrailingChar);
}
}
}