DocumentServer-v-9.2.0/core/PdfFile/lib/xpdf/TextString.cc

//========================================================================
//
// TextString.cc
//
// Copyright 2011-2013 Glyph & Cog, LLC
//
//========================================================================

#include <aconf.h>

#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif

#include <string.h>
#include "gmem.h"
#include "gmempp.h"
#include "GString.h"
#include "PDFDocEncoding.h"
#include "UTF8.h"
#include "TextString.h"

//------------------------------------------------------------------------

TextString::TextString() {
  u = NULL;
  len = size = 0;
  bPDFDocEncoding = true;
}

TextString::TextString(GString *s) {
  u = NULL;
  len = size = 0;
  bPDFDocEncoding = true;
  append(s);
}

TextString::TextString(TextString *s) {
  len = size = s->len;
  bPDFDocEncoding = s->bPDFDocEncoding;
  if (len) {
    u = (Unicode *)gmallocn(size, sizeof(Unicode));
    memcpy(u, s->u, len * sizeof(Unicode));
  } else {
    u = NULL;
  }
}

TextString::~TextString() {
  gfree(u);
}

TextString *TextString::append(Unicode c) {
  expand(1);
  u[len] = c;
  ++len;
  return this;
}

TextString *TextString::append(GString *s) {
  return insert(len, s);
}

TextString *TextString::insert(int idx, Unicode c) {
  if (idx >= 0 && idx <= len) {
    expand(1);
    if (idx < len) {
      memmove(u + idx + 1, u + idx, (len - idx) * sizeof(Unicode));
    }
    u[idx] = c;
    ++len;
  }
  return this;
}

TextString *TextString::insert(int idx, Unicode *u2, int n) {
  if (idx >= 0 && idx <= len) {
    expand(n);
    if (idx < len) {
      memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode));
    }
    memcpy(u + idx, u2, n * sizeof(Unicode));
    len += n;
  }
  return this;
}

TextString *TextString::insert(int idx, GString *s) {
  Unicode uBuf[100];
  int n, i;

  if (idx >= 0 && idx <= len) {
    // look for a UTF-16BE BOM
    if ((s->getChar(0) & 0xff) == 0xfe &&
	(s->getChar(1) & 0xff) == 0xff) {
	  bPDFDocEncoding = false;
      i = 2;
      n = 0;
      while (getUTF16BE(s, &i, uBuf + n)) {
	++n;
	if (n == sizeof(uBuf) / sizeof(Unicode)) {
	  insert(idx, uBuf, n);
	  idx += n;
	  n = 0;
	}
      }
      if (n > 0) {
	insert(idx, uBuf, n);
      }

    // look for a UTF-16LE BOM
    // (technically, this isn't allowed by the PDF spec, but some
    // PDF files use it)
    } else if ((s->getChar(0) & 0xff) == 0xff &&
	       (s->getChar(1) & 0xff) == 0xfe) {
	  bPDFDocEncoding = false;
      i = 2;
      n = 0;
      while (getUTF16LE(s, &i, uBuf + n)) {
	++n;
	if (n == sizeof(uBuf) / sizeof(Unicode)) {
	  insert(idx, uBuf, n);
	  idx += n;
	  n = 0;
	}
      }
      if (n > 0) {
	insert(idx, uBuf, n);
      }

    // look for a UTF-8 BOM
    } else if ((s->getChar(0) & 0xff) == 0xef &&
	       (s->getChar(1) & 0xff) == 0xbb &&
	       (s->getChar(2) & 0xff) == 0xbf) {
	  bPDFDocEncoding = false;
      i = 3;
      n = 0;
      while (getUTF8(s, &i, uBuf + n)) {
	++n;
	if (n == sizeof(uBuf) / sizeof(Unicode)) {
	  insert(idx, uBuf, n);
	  idx += n;
	  n = 0;
	}
      }
      if (n > 0) {
	insert(idx, uBuf, n);
      }

    // otherwise, use PDFDocEncoding
    } else {
	  bPDFDocEncoding = true;
      n = s->getLength();
      expand(n);
      if (idx < len) {
	memmove(u + idx + n, u + idx, (len - idx) * sizeof(Unicode));
      }
      for (i = 0; i < n; ++i) {
	u[idx + i] = pdfDocEncoding[s->getChar(i) & 0xff];
      }
      len += n;
    }
  }
  return this;
}

void TextString::expand(int delta) {
  int newLen;

  newLen = len + delta;
  if (delta > INT_MAX - len) {
    // trigger an out-of-memory error
    size = -1;
  } else if (newLen <= size) {
    return;
  } else if (size > 0 && size <= INT_MAX / 2 && size*2 >= newLen) {
    size *= 2;
  } else {
    size = newLen;
  }
  u = (Unicode *)greallocn(u, size, sizeof(Unicode));
}

GString *TextString::toPDFTextString() {
  GString *s;
  GBool useUnicode;
  int i;

  useUnicode = gFalse;
  for (i = 0; i < len; ++i) {
    if (u[i] >= 0x80) {
      useUnicode = gTrue;
      break;
    }
  }
  s = new GString();
  if (useUnicode) {
    s->append((char)0xfe);
    s->append((char)0xff);
    for (i = 0; i < len; ++i) {
      s->append((char)(u[i] >> 8));
      s->append((char)u[i]);
    }
  } else {
    for (i = 0; i < len; ++i) {
      s->append((char)u[i]);
    }
  }
  return s;
}

GString *TextString::toUTF8() {
  GString *s = new GString();
  for (int i = 0; i < len; ++i) {
    char buf[8];
    int n = mapUTF8(u[i], buf, sizeof(buf));
    s->append(buf, n);
  }
  return s;
}