Files
DocumentServer-v-9.2.0/core/RtfFile/Format/RtfLex.cpp
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

434 lines
9.7 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* (c) Copyright Ascensio System SIA 2010-2023
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish
* street, Riga, Latvia, EU, LV-1050.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "RtfLex.h"
StringStream::StringStream()
{
m_aBuffer = NULL;
Clear();
}
StringStream::~StringStream()
{
Clear();
}
void StringStream::Clear()
{
RELEASEARRAYOBJECTS( m_aBuffer );
m_nSizeAbs = 0;
m_nPosAbs = -1;
}
bool StringStream::SetSource( std::wstring sPath )
{
Clear();
NSFile::CFileBinary srcFile;
if (false == srcFile.OpenFile(sPath.c_str())) return false;
__int64 totalFileSize = srcFile.GetFileSize();
if (totalFileSize < 5)
{
srcFile.CloseFile();
return false;
}
m_nSizeAbs = (long)totalFileSize;
m_aBuffer = new unsigned char[m_nSizeAbs];
DWORD dwBytesRead = 0;
srcFile.ReadFile(m_aBuffer, (DWORD)m_nSizeAbs);
dwBytesRead = (DWORD)srcFile.GetPosition();
srcFile.CloseFile();
return true;
}
void StringStream::getBytes( int nCount, BYTE** pbData )
{
if( m_nPosAbs + nCount < m_nSizeAbs )
{
(*pbData) = new BYTE[nCount];
memcpy( (*pbData), (m_aBuffer + m_nPosAbs + 1), nCount);
m_nPosAbs += nCount;
}
}
int StringStream::getc()
{
int nResult = EOF;
if( m_nPosAbs + 1 < m_nSizeAbs )
{
m_nPosAbs++;
nResult = m_aBuffer[ m_nPosAbs ];
}
return nResult;
}
void StringStream::ungetc()
{
//в проекте используется ungetcб только после getc
//поэтому проблем с выходом в 0 нет
//if( m_nPosAbs + 2 < m_nSizeAbs )
{
m_nPosAbs--; //взять любой txt переименовать в rtf - зацикливание
}
}
void StringStream::putString( std::string sText )
{
size_t nExtBufSize = sText.length();
//копируем буфер в темповый буфер
unsigned char* aTempBuf = new unsigned char[ m_nSizeAbs ];
memcpy( aTempBuf, m_aBuffer, m_nSizeAbs );
//создаем новый буфер большего размера
RELEASEARRAYOBJECTS( m_aBuffer );
m_aBuffer = new unsigned char[ m_nSizeAbs + nExtBufSize ];
//копируем все в новый буфер
unsigned long nDelimiter = (unsigned long)m_nPosAbs + 1;
memcpy( m_aBuffer, aTempBuf, nDelimiter );
memcpy( m_aBuffer + nDelimiter , sText.c_str(), nExtBufSize );
memcpy( m_aBuffer + nDelimiter + nExtBufSize , aTempBuf + nDelimiter , m_nSizeAbs - nDelimiter );
RELEASEARRAYOBJECTS( aTempBuf );
m_nSizeAbs += nExtBufSize;
}
LONG64 StringStream::getCurPosition()
{
return m_nPosAbs;
}
LONG64 StringStream::getSize()
{
return m_nSizeAbs;
}
RtfLex::RtfLex()
{
m_oFileWriter = NULL;
m_nReadBufSize = 1024 * 1024 * 5; // 5мб
m_caReadBuffer = new char[m_nReadBufSize];
}
RtfLex::~RtfLex()
{
if (m_caReadBuffer) delete []m_caReadBuffer;
m_caReadBuffer = NULL;
RELEASEOBJECT( m_oFileWriter );
}
double RtfLex::GetProgress()
{
return 1.0 * m_oStream.getCurPosition() / m_oStream.getSize();
}
bool RtfLex::SetSource( std::wstring sPath )
{
if (false == m_oStream.SetSource(sPath)) return false;
if (m_oStream.getSize() > m_nReadBufSize)
{
m_nReadBufSize = (int)m_oStream.getSize() ;
if (m_caReadBuffer) delete []m_caReadBuffer;
m_caReadBuffer = new char[m_nReadBufSize];
}
return true;
}
void RtfLex::CloseSource()
{
m_oStream.Clear();
}
RtfToken RtfLex::NextCurToken()
{
return m_oCurToken;
}
void RtfLex::ReadBytes( int nCount, BYTE** pbData )
{
m_oStream.getBytes(nCount, pbData);
}
RtfToken RtfLex::NextToken()
{
int c;
m_oCurToken = RtfToken() ;
c = m_oStream.getc( );
while ((c >= 0 && c <= 8) || (c >= 10 && c <= 0x1f))
c = m_oStream.getc( );
if (c != EOF)
{
switch (c)
{
case '{':
m_oCurToken.Type = RtfToken::GroupStart;
break;
case '}':
m_oCurToken.Type = RtfToken::GroupEnd;
break;
case '\\':
parseKeyword(m_oCurToken);
break;
default:
m_oCurToken.Type = RtfToken::Text;
if( NULL == m_oFileWriter )
parseText(c, m_oCurToken);
else
parseTextFile(c, m_oCurToken);
break;
}
}
else
{
m_oStream.Clear();
m_oCurToken.Type = RtfToken::Eof;
}
return m_oCurToken;
}
void RtfLex::putString( std::string sText )
{
m_oStream.putString( sText );
}
void RtfLex::parseKeyword(RtfToken& token)
{
std::string palabraClave;
std::wstring parametroStr ;
int parametroInt = 0;
int c = m_oStream.getc();
m_oStream.ungetc();
bool negativo = false;
if ( !RtfUtility::IsAlpha( c ) )
{
c = m_oStream.getc();
if(c == '\\' || c == '{' || c == '}')
{
token.Type = RtfToken::Text;
token.Key = (char)c;
}
else if( c > 0 && c <= 31 )
{
if( c == '\t' )
{
token.Type = RtfToken::Keyword;
token.Key = std::string("tab");
}
else if( c == '\n'|| c == '\r' )
{
token.Type = RtfToken::Keyword;
token.Key = std::string("par");
}
else
{
token.Type = RtfToken::Text;
token.Key = std::string("");
}
}
else
{
token.Type = RtfToken::Control;
token.Key = std::to_string( c);
if (c == '\'')
{
token.HasParameter = true;
int nCharCode = RtfUtility::ToByte( m_oStream.getc() ) << 4;
nCharCode |= RtfUtility::ToByte( m_oStream.getc() );
if( nCharCode >= 0 && nCharCode <=30 )//искуственно сидвигаем на 1 чтобы не потерять \'00 ( символов от 0 до 0x20 служебные)
nCharCode++;
token.Parameter = nCharCode;
}
else if( c == '|' || c == '~' || c == '-' || c == '_' || c == ':' )
{
token.Type = RtfToken::Keyword;
token.Key.erase();
token.Key += (char)c ;
}
}
return;
}
c = m_oStream.getc();
m_oStream.ungetc();
while (RtfUtility::IsAlpha(c))
{
m_oStream.getc();
palabraClave += (char)c;
c = m_oStream.getc();
m_oStream.ungetc();
}
token.Type = RtfToken::Keyword;
token.Key = palabraClave;
if (RtfUtility::IsDigit(c) || c == '-')
{
token.HasParameter = true;
if (c == '-')
{
negativo = true;
m_oStream.getc();
}
c = m_oStream.getc();
m_oStream.ungetc();
while (RtfUtility::IsDigit(c))
{
m_oStream.getc();
parametroStr += c;
c = m_oStream.getc();
m_oStream.ungetc();
}
try
{
parametroInt = XmlUtils::GetInteger(parametroStr);
}
catch (...)
{
try
{
parametroInt = (int)XmlUtils::GetInteger64(parametroStr);
}
catch (...)
{
}
}
if (negativo)
parametroInt = -parametroInt;
//if (c == ' ' || c == '\\' || c == '}' || c == '{' || c == '\"' || c == ';')
{
token.Parameter = parametroInt;
}
//else
//{
// token.HasParameter = false;
// //token.Parameter = 0;
//}
}
if (c == ' ')
{
m_oStream.getc();
}
}
void RtfLex::parseText(int car, RtfToken& token)
{
int nTempBufPos = 0; //1 мб
int c = car;
//while ((isalnum(c) || c == '"'|| c == ':'|| c == '/' || c == '.') &&c != '\\' && c != '}' && c != '{' && c != Eof) // иправиЃEЃEрвьD усЃEвиЃE
//while (c != '\\' && c != '}' && c != '{' && c != Eof)
//while (c != ';' &&c ! = '\\' && c != '}' && c != '{' && c != EOF)
while (c != '\\' && c != '}' && c != '{' && c != EOF)
{
if( nTempBufPos >= m_nReadBufSize )
{
m_caReadBuffer[nTempBufPos++] = '\0';
token.Key += m_caReadBuffer ;
nTempBufPos = 0;
}
m_caReadBuffer[nTempBufPos++] = (char)c;
c = m_oStream.getc();
//Se ignoran los retornos de carro, tabuladores y caracteres nulos
while (c == '\r' || c == '\n')
c = m_oStream.getc();
}
if (c != EOF)
{
m_oStream.ungetc();
}
if( nTempBufPos > 0 )
{
m_caReadBuffer[nTempBufPos++] = '\0';
token.Key += m_caReadBuffer ;
}
}
bool RtfLex::GetNextChar( int& nChar )
{
int c = m_oStream.getc();
m_oStream.ungetc();
//Se ignoran los retornos de carro, tabuladores y caracteres nulos
while (c == '\r' || c == '\n')
{
m_oStream.getc();
c = m_oStream.getc();
m_oStream.ungetc();
}
if( c != '\\' && c != '}' && c != '{' && c != EOF )
{
m_oStream.getc();
nChar = c;
return true;
}
else
return false;
}
void RtfLex::parseTextFile(int car, RtfToken& token)
{
if (NULL == m_oFileWriter) return;
try
{
int nFirst = car;
int nSecond = 0;
if( true == GetNextChar( nSecond ) )
{
BYTE byteByte = 10 * RtfUtility::ToByte( nFirst ) + RtfUtility::ToByte( nSecond );
m_oFileWriter->Write( &byteByte, 1 );
while( true )
{
bool bContinue = false;
if (true == GetNextChar(nFirst))
{
if (true == GetNextChar(nSecond))
{
byteByte = 10 * Strings::ToDigit(nFirst) + Strings::ToDigit(nSecond);
m_oFileWriter->Write(&byteByte, 1);
bContinue = true;
}
}
if ( false == bContinue)
break;
}
}
}
catch(...)
{
}
}