DocumentServer-v-9.2.0/core/PdfFile/lib/xpdf/Parser.cc

//========================================================================
//
// Parser.cc
//
// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================

#include <aconf.h>

#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif

#include <stddef.h>
#include <string.h>
#include "gmempp.h"
#include "Object.h"
#include "Array.h"
#include "Dict.h"
#include "Decrypt.h"
#include "Parser.h"
#include "XRef.h"
#include "Error.h"

// Max number of nested objects.  This is used to catch infinite loops
// in the object structure.
#define recursionLimit 500

Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
  xref = xrefA;
  lexer = lexerA;
  inlineImg = 0;
  allowStreams = allowStreamsA;
  lexer->getObj(&buf1);
  lexer->getObj(&buf2);
}

Parser::~Parser() {
  buf1.free();
  buf2.free();
  delete lexer;
}

Object *Parser::getObj(Object *obj, GBool simpleOnly,
		       Guchar *fileKey,
		       CryptAlgorithm encAlgorithm, int keyLength,
		       int objNum, int objGen, int recursion) {
  char *key;
  Stream *str;
  Object obj2;
  int num;
  DecryptStream *decrypt;
  GString *s, *s2;
  int c;

  // refill buffer after inline image data
  if (inlineImg == 2) {
    buf1.free();
    buf2.free();
    lexer->getObj(&buf1);
    lexer->getObj(&buf2);
    inlineImg = 0;
  }

  // array
  if (!simpleOnly && recursion < recursionLimit && buf1.isCmd("[")) {
    shift();
    obj->initArray(xref);
    while (!buf1.isCmd("]") && !buf1.isEOF())
      obj->arrayAdd(getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength,
			   objNum, objGen, recursion + 1));
    if (buf1.isEOF())
      error(errSyntaxError, getPos(), "End of file inside array");
    shift();

  // dictionary or stream
  } else if (!simpleOnly && recursion < recursionLimit && buf1.isCmd("<<")) {
    shift();
    obj->initDict(xref);
    while (!buf1.isCmd(">>") && !buf1.isEOF()) {
      if (!buf1.isName()) {
	error(errSyntaxError, getPos(),
	      "Dictionary key must be a name object");
	shift();
      } else {
	key = copyString(buf1.getName());
	shift();
	if (buf1.isEOF() || buf1.isError()) {
	  gfree(key);
	  break;
	}
	obj->dictAdd(key, getObj(&obj2, gFalse,
				 fileKey, encAlgorithm, keyLength,
				 objNum, objGen, recursion + 1));
      }
    }
    if (buf1.isEOF())
      error(errSyntaxError, getPos(), "End of file inside dictionary");
    // stream objects are not allowed inside content streams or
    // object streams
    if (allowStreams && buf2.isCmd("stream")) {
      if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
			    objNum, objGen, recursion + 1))) {
	obj->initStream(str);
      } else {
	obj->free();
	obj->initError();
      }
    } else {
      shift();
    }

  // indirect reference or integer
  } else if (buf1.isInt()) {
    num = buf1.getInt();
    shift();
    if (buf1.isInt() && buf2.isCmd("R")) {
      obj->initRef(num, buf1.getInt());
      shift();
      shift();
    } else {
      obj->initInt(num);
    }

  // string
  } else if (buf1.isString() && fileKey) {
    s = buf1.getString();
    s2 = new GString();
    obj2.initNull();
    decrypt = new DecryptStream(new MemStream(s->getCString(), 0,
					      s->getLength(), &obj2),
				fileKey, encAlgorithm, keyLength,
				objNum, objGen);
    decrypt->reset();
    while ((c = decrypt->getChar()) != EOF) {
      s2->append((char)c);
    }
    delete decrypt;
    obj->initString(s2);
    shift();

  // simple object
  } else {
    buf1.copy(obj);
    shift();
  }

  return obj;
}

Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
			   CryptAlgorithm encAlgorithm, int keyLength,
			   int objNum, int objGen, int recursion) {
  Object obj;
  BaseStream *baseStr;
  Stream *str, *str2;
  GFileOffset pos, endPos, length;
  char endstreamBuf[8];
  GBool foundEndstream;
  int c, i;

  // get stream start position
  lexer->skipToNextLine();
  if (!(str = lexer->getStream())) {
    return NULL;
  }
  pos = str->getPos();

  // check for length in damaged file
  if (xref && xref->getStreamEnd(pos, &endPos)) {
    length = endPos - pos;

  // get length from the stream object
  } else {
    dict->dictLookup("Length", &obj, recursion);
    if (obj.isInt()) {
      length = (GFileOffset)(Guint)obj.getInt();
      obj.free();
    } else {
      error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
      obj.free();
      return NULL;
    }
  }

  // in badly damaged PDF files, we can run off the end of the input
  // stream immediately after the "stream" token
  if (!lexer->getStream()) {
    return NULL;
  }
  // copy the base stream (Lexer will free stream objects when it gets
  // to end of stream -- which can happen in the shift() calls below)
  baseStr = (BaseStream *)lexer->getStream()->getBaseStream()->copy();

  // make new base stream
  str = baseStr->makeSubStream(pos, gTrue, length, dict);

  // skip over stream data
  lexer->setPos(pos + length);

  // check for 'endstream'
  // NB: we never reuse the Parser object to parse objects after a
  // stream, and we could (if the PDF file is damaged) be in the
  // middle of binary data at this point, so we check the stream data
  // directly for 'endstream', rather than calling shift() to parse
  // objects
  foundEndstream = gFalse;
  if ((str2 = lexer->getStream())) {
    // skip up to 100 whitespace chars
    for (i = 0; i < 100; ++i) {
      c = str2->getChar();
      if (!Lexer::isSpace(c)) {
	break;
      }
    }
    if (c == 'e') {
      if (str2->getBlock(endstreamBuf, 8) == 8 ||
	  !memcmp(endstreamBuf, "ndstream", 8)) {
	foundEndstream = gTrue;
      }
    }
  }
  if (!foundEndstream) {
    error(errSyntaxError, getPos(), "Missing 'endstream'");
    // kludge for broken PDF files: just add 5k to the length, and
    // hope it's enough
    // (dict is now owned by str, so we need to copy it before deleting str)
    dict->copy(&obj);
    delete str;
    length += 5000;
    str = baseStr->makeSubStream(pos, gTrue, length, &obj);
  }

  // free the copied base stream
  delete baseStr;

  // handle decryption
  if (fileKey) {
    str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
			    objNum, objGen);
  }

  // get filters
  str = str->addFilters(dict, recursion);

  return str;
}

void Parser::shift() {
  if (inlineImg > 0) {
    if (inlineImg < 2) {
      ++inlineImg;
    } else {
      // in a damaged content stream, if 'ID' shows up in the middle
      // of a dictionary, we need to reset
      inlineImg = 0;
    }
  } else if (buf2.isCmd("ID")) {
    lexer->skipChar();		// skip char after 'ID' command
    inlineImg = 1;
  }
  buf1.free();
  buf1 = buf2;
  if (inlineImg > 0)		// don't buffer inline image data
    buf2.initNull();
  else
    lexer->getObj(&buf2);
}