Files
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

425 lines
12 KiB
C++

/*
* (c) Copyright Ascensio System SIA 2010-2023
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish
* street, Riga, Latvia, EU, LV-1050.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "RtfReader.h"
#include "../OOXml/Writer/OOXWriter.h"
#include "DestinationCommand.h"
RtfReader::ReaderState::ReaderState()
{
m_bControlPresent = false;
m_nUnicodeClean = 1;
m_oCharProp.SetDefaultRtf();
m_oParagraphProp.SetDefaultRtf();
m_oRowProperty.SetDefaultRtf();
m_oCellProperty.SetDefaultRtf();
m_oCurOldList.SetDefault();
}
RtfReader::~RtfReader()
{
}
long RtfReader::GetProgress()
{
return (long)( g_cdMaxPercent * m_oLex.GetProgress());
}
void RtfReader::Stop()
{
m_oLex.CloseSource();
}
RtfReader::RtfReader(RtfDocument& oDocument, std::wstring sFilename ) : m_oDocument(oDocument), m_sFilename(sFilename)
{
m_oState = ReaderStatePtr(new ReaderState());
m_nFootnote = PROP_DEF;
m_nDefFont = PROP_DEF;
m_nDefLang = PROP_DEF;
m_nDefLangAsian = PROP_DEF;
m_convertationManager = NULL;
}
bool RtfReader::Load()
{
if (false == m_oLex.SetSource(m_sFilename)) return false;
RtfNormalReader oNormalReader( m_oDocument, (*this) );
oNormalReader.Parse( m_oDocument, (*this) );
m_oLex.CloseSource();
return true;
}
void RtfReader::PushState()
{
ReaderStatePtr psaveNew = ReaderStatePtr(new ReaderState());
psaveNew -> m_nUnicodeClean = m_oState->m_nUnicodeClean;
psaveNew -> m_oCharProp = m_oState->m_oCharProp;
psaveNew -> m_oParagraphProp = m_oState->m_oParagraphProp;
psaveNew -> m_oRowProperty = m_oState->m_oRowProperty;
psaveNew -> m_oCellProperty = m_oState->m_oCellProperty;
psaveNew -> m_oCurOldList = m_oState->m_oCurOldList;
//psaveNew -> m_oSectionProp = m_oState->m_oSectionProp;
psaveNew -> m_pSaveState = m_oState;
m_oState = psaveNew;
if( PROP_DEF == m_oState->m_oCharProp.m_nFont )
m_oState->m_oCharProp.m_nFont = m_nDefFont;
if (PROP_DEF == m_oState->m_oCharProp.m_nLanguage)
m_oState->m_oCharProp.m_nLanguage = m_nDefLang;
if (PROP_DEF == m_oState->m_oCharProp.m_nLanguageAsian)
m_oState->m_oCharProp.m_nLanguageAsian = m_nDefLangAsian;
}
void RtfReader::PopState()
{
if( 0 != m_oState->m_pSaveState )
m_oState = m_oState->m_pSaveState;
}
//---------------------------------------------------------------------------------------------------------------------------------
RtfAbstractReader::RtfAbstractReader()
{
m_bCanStartNewReader = false;
m_bSkip = false;
m_nSkipChars = 0;
m_nCurGroups = 1;
m_oFileWriter = NULL;
m_bStopReader = false;
m_bUseGlobalCodepage = false;
}
void RtfAbstractReader::PushState(RtfReader& oReader)
{
oReader.PushState();
m_nCurGroups++;
m_bCanStartNewReader = true;
}
void RtfAbstractReader::PopState(RtfDocument& oDocument, RtfReader& oReader)
{
if( m_nCurGroups > 0 )
m_nCurGroups--;
else
;//ASSERT(false);
if( m_nCurGroups == 0 )
{
m_bStopReader = true;
ExitReader( oDocument, oReader );
}
oReader.PopState();
if( m_nCurGroups == 0 )
ExitReader2( oDocument, oReader );
}
bool RtfAbstractReader::StartSubReader( RtfAbstractReader& poNewReader, RtfDocument& oDocument, RtfReader& oReader )
{
if( true == m_bCanStartNewReader )
{
m_bCanStartNewReader = false;
m_nCurGroups--;
poNewReader.m_bSkip = m_bSkip;
return poNewReader.Parse(oDocument, oReader);
}
return false;
}
void RtfAbstractReader::Skip( RtfDocument& oDocument, RtfReader& oReader )
{
int cGroup = 1;
while( cGroup >= 1 )
{
m_oTok = oReader.m_oLex.NextToken();
if(m_oTok.Type == RtfToken::GroupStart)
cGroup++;
else if(m_oTok.Type == RtfToken::GroupEnd)
cGroup--;
else if(m_oTok.Type == RtfToken::Eof)
break;
}
PopState( oDocument, oReader );
}
bool RtfAbstractReader::ExecuteCommand( RtfDocument& oDocument, RtfReader& oReader, std::string sKey, bool bHasPar, int nPar )
{
return true;
}
void RtfAbstractReader::ExecuteText( RtfDocument& oDocument, RtfReader& oReader, std::wstring oText )
{
}
void RtfAbstractReader::ExitReader( RtfDocument& oDocument, RtfReader& oReader )
{
}
void RtfAbstractReader::ExitReader2( RtfDocument& oDocument, RtfReader& oReader )
{
}
bool RtfAbstractReader::RtfAbstractReader::Parse(RtfDocument& oDocument, RtfReader& oReader)
{
NFileWriter::CBufferedFileWriter* poOldWriter = oReader.m_oLex.m_oFileWriter;
oReader.m_oLex.m_oFileWriter = m_oFileWriter;
int res = 0;
m_oTok = oReader.m_oLex.NextCurToken();
if (m_oTok.Type == m_oTok.None)
m_oTok = oReader.m_oLex.NextToken();
while (m_oTok.Type != RtfToken::Eof && false == m_bStopReader)
{
switch (m_oTok.Type)
{
case RtfToken::GroupStart:
{
ExecuteTextInternal2(oDocument, oReader, m_oTok.Key, m_nSkipChars);
PushState(oReader);
}break;
case RtfToken::GroupEnd:
{
ExecuteTextInternal2(oDocument, oReader, m_oTok.Key, m_nSkipChars);
PopState(oDocument, oReader);
}break;
case RtfToken::Keyword:
{
ExecuteTextInternal2(oDocument, oReader, m_oTok.Key, m_nSkipChars);
if (m_oTok.Key == "u")
{
ExecuteText(oDocument, oReader, ExecuteTextInternal(oDocument, oReader, m_oTok.Key, m_oTok.HasParameter, m_oTok.Parameter, m_nSkipChars));
break;
}
else
{
if (true == m_bSkip)
{
if (false == ExecuteCommand(oDocument, oReader, m_oTok.Key, m_oTok.HasParameter, m_oTok.Parameter))
Skip(oDocument, oReader);
m_bSkip = false;
}
else
ExecuteCommand(oDocument, oReader, m_oTok.Key, m_oTok.HasParameter, m_oTok.Parameter);
}
if (true == m_bCanStartNewReader)
m_bCanStartNewReader = false;
}break;
case RtfToken::Control:
{
if (m_oTok.Key == "42")
m_bSkip = true;
if (m_oTok.Key == "39" && true == m_oTok.HasParameter)
{
oReader.m_oState->m_sCurText += m_oTok.Parameter;
oReader.m_oState->m_bControlPresent = true;
}
if (m_oTok.Key == "32" && false == m_oTok.HasParameter)
{
oReader.m_oState->m_sCurText += " ";
oReader.m_oState->m_bControlPresent = true;
}
if (m_oTok.Key == "par" && false == m_oTok.HasParameter)
{
oReader.m_oState->m_sCurText += "\n";
oReader.m_oState->m_bControlPresent = true;
}
}break;
case RtfToken::Text:
{
oReader.m_oState->m_sCurText += m_oTok.Key;
}break;
}
if (false == m_bStopReader)
m_oTok = oReader.m_oLex.NextToken();
}
oReader.m_oLex.m_oFileWriter = poOldWriter;
return true;
}
std::wstring RtfAbstractReader::ExecuteTextInternal(RtfDocument& oDocument, RtfReader& oReader, std::string & sKey, bool bHasPar, int nPar, int& nSkipChars)
{
std::wstring sResult;
if ("u" == sKey)
{
if (true == bHasPar)
{
if (m_bUseGlobalCodepage && sizeof(wchar_t) != 2)
{
nPar = nPar & 0x0FFF;
}
sResult += wchar_t(nPar);
}
}
else
{
std::string sCharString;
if ("39" == sKey)
{
if (true == bHasPar)
sCharString += (char)nPar;
}
else
sCharString = sKey;
sResult = ExecuteTextInternalCodePage(sCharString, oDocument, oReader);
}
ExecuteTextInternalSkipChars(sResult, oReader, sKey, nSkipChars);
return sResult;
}
void RtfAbstractReader::ExecuteTextInternal2(RtfDocument& oDocument, RtfReader& oReader, std::string & sKey, int& nSkipChars)
{
if (false == oReader.m_oState->m_sCurText.empty())
{
std::string str;
ExecuteTextInternalSkipChars(oReader.m_oState->m_sCurText, oReader, str, nSkipChars);
std::wstring sResult = ExecuteTextInternalCodePage(oReader.m_oState->m_sCurText, oDocument, oReader);
oReader.m_oState->m_sCurText.erase();
oReader.m_oState->m_bControlPresent = false;
if (false == sResult.empty())
{
ExecuteText(oDocument, oReader, sResult);
}
}
}
void RtfAbstractReader::ExecuteTextInternalSkipChars(std::string & sResult, RtfReader& oReader, std::string & sKey, int& nSkipChars)
{
//удаляем символы вслед за юникодом
if (nSkipChars > 0)
{
if (nSkipChars >= (int)sResult.length())
{
//nSkipChars -= nLength;//vedomost.rtf
sResult.clear();
}
else
{
sResult = sResult.substr(nSkipChars);
}
nSkipChars = 0;
}
if ("u" == sKey)
{
//надо правильно установить m_nSkipChars по значению \ucN
nSkipChars = oReader.m_oState->m_nUnicodeClean;
}
}
void RtfAbstractReader::ExecuteTextInternalSkipChars(std::wstring & sResult, RtfReader& oReader, std::string & sKey, int& nSkipChars)
{
//удаляем символы вслед за юникодом
if (nSkipChars > 0)
{
if (nSkipChars >= (int)sResult.length())
{
//nSkipChars -= nLength;//vedomost.rtf
sResult.clear();
}
else
{
sResult = sResult.substr(nSkipChars);
}
nSkipChars = 0;
}
if ("u" == sKey)
{
//надо правильно установить m_nSkipChars по значению \ucN
nSkipChars = oReader.m_oState->m_nUnicodeClean;
}
}
std::wstring RtfAbstractReader::ExecuteTextInternalCodePage( std::string& sCharString, RtfDocument& oDocument, RtfReader& oReader)
{
if (sCharString.empty()) return L"";
if (sCharString == "*") return L"*";
std::wstring sResult;
int nCodepage = -1;
//применяем параметры codepage от текущего шрифта todo associated fonts.
RtfFont oFont;
if ((!m_bUseGlobalCodepage) && (true == oDocument.m_oFontTable.GetFont(oReader.m_oState->m_oCharProp.m_nFont, oFont)))
{
if (PROP_DEF != oFont.m_nCodePage)
{
nCodepage = oFont.m_nCodePage;
}
else if ((PROP_DEF != oFont.m_nCharset && oFont.m_nCharset > 2)
&& (PROP_DEF == oDocument.m_oProperty.m_nAnsiCodePage || 0 == oDocument.m_oProperty.m_nAnsiCodePage || 1252 == oDocument.m_oProperty.m_nAnsiCodePage))
{
nCodepage = RtfUtility::CharsetToCodepage(oFont.m_nCharset);
}
}
//от настроек документа
if (-1 == nCodepage && RtfDocumentProperty::cp_none != oDocument.m_oProperty.m_eCodePage)
{
switch (oDocument.m_oProperty.m_eCodePage)
{
case RtfDocumentProperty::cp_ansi:
{
if (PROP_DEF != oDocument.m_oProperty.m_nAnsiCodePage)
{
nCodepage = oDocument.m_oProperty.m_nAnsiCodePage;
}
else
nCodepage = CP_ACP;
break;
}
case RtfDocumentProperty::cp_mac: nCodepage = CP_MACCP; break; //?? todooo
case RtfDocumentProperty::cp_pc: nCodepage = 437; break; //ms dos latin us
case RtfDocumentProperty::cp_pca: nCodepage = 850; break; //ms dos latin eu
}
}
//если ничего нет ставим ANSI или default from user
if (-1 == nCodepage)
{
nCodepage = CP_ACP;
}
if ((nCodepage == CP_ACP || nCodepage == 1252)&& oDocument.m_nUserLCID > 0)
{
nCodepage = oDocument.m_lcidConverter.get_codepage(oDocument.m_nUserLCID);
}
if (m_bUseGlobalCodepage && nCodepage == 0 && PROP_DEF != oDocument.m_oProperty.m_nDefLang )
{
nCodepage = oDocument.m_lcidConverter.get_codepage(oDocument.m_oProperty.m_nDefLang);
}
if (m_bUseGlobalCodepage && nCodepage == 0)
{
sResult = std::wstring(sCharString.begin(), sCharString.end());
}
else
{
sResult = RtfUtility::convert_string_icu(sCharString.begin(), sCharString.end(), nCodepage);
}
//if (!sCharString.empty() && sResult.empty())
//{
// //code page not support in icu !!!
// sResult = RtfUtility::convert_string(sCharString.begin(), sCharString.end(), nCodepage); .. to UnicodeConverter
//}
return sResult;
}