Files
DocumentServer-v-9.2.0/core/PdfFile/lib/xpdf/UTF8.cc
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

184 lines
4.5 KiB
C++

//========================================================================
//
// UTF8.cc
//
// Copyright 2001-2017 Glyph & Cog, LLC
//
//========================================================================
#include <aconf.h>
#include "UTF8.h"
int mapUTF8(Unicode u, char *buf, int bufSize) {
if (u <= 0x0000007f) {
if (bufSize < 1) {
return 0;
}
buf[0] = (char)u;
return 1;
} else if (u <= 0x000007ff) {
if (bufSize < 2) {
return 0;
}
buf[0] = (char)(0xc0 + (u >> 6));
buf[1] = (char)(0x80 + (u & 0x3f));
return 2;
} else if (u <= 0x0000ffff) {
if (bufSize < 3) {
return 0;
}
buf[0] = (char)(0xe0 + (u >> 12));
buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
buf[2] = (char)(0x80 + (u & 0x3f));
return 3;
} else if (u <= 0x0010ffff) {
if (bufSize < 4) {
return 0;
}
buf[0] = (char)(0xf0 + (u >> 18));
buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
buf[3] = (char)(0x80 + (u & 0x3f));
return 4;
} else {
return 0;
}
}
int mapUCS2(Unicode u, char *buf, int bufSize) {
if (u <= 0xffff) {
if (bufSize < 2) {
return 0;
}
buf[0] = (char)((u >> 8) & 0xff);
buf[1] = (char)(u & 0xff);
return 2;
} else {
return 0;
}
}
GBool getUTF8(GString *s, int *i, Unicode *u) {
Guchar c0, c1, c2, c3, c4, c5;
if (*i >= s->getLength()) {
return gFalse;
}
c0 = (Guchar)s->getChar((*i)++);
if (c0 < 0x80) {
*u = (Unicode)c0;
} else if (c0 < 0xe0) {
if (*i < s->getLength() &&
((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) {
*i += 1;
*u = (Unicode)(((c0 & 0x1f) << 6) |
(c1 & 0x3f));
} else {
*u = (Unicode)c0;
}
} else if (c0 < 0xf0) {
if (*i < s->getLength() - 1 &&
((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) {
*i += 2;
*u = (Unicode)(((c0 & 0x0f) << 12) |
((c1 & 0x3f) << 6) |
(c2 & 0x3f));
} else {
*u = (Unicode)c0;
}
} else if (c0 < 0xf8) {
if (*i < s->getLength() - 2 &&
((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) {
*i += 3;
*u = (Unicode)(((c0 & 0x07) << 18) |
((c1 & 0x3f) << 12) |
((c2 & 0x3f) << 6) |
(c3 & 0x3f));
} else {
*u = (Unicode)c0;
}
} else if (c0 < 0xfc) {
if (*i < s->getLength() - 3 &&
((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) {
*i += 4;
*u = (Unicode)(((c0 & 0x03) << 24) |
((c1 & 0x3f) << 18) |
((c2 & 0x3f) << 12) |
((c3 & 0x3f) << 6) |
(c4 & 0x3f));
} else {
*u = (Unicode)c0;
}
} else if (c0 < 0xfe) {
if (*i < s->getLength() - 4 &&
((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 &&
((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) {
*i += 5;
*u = (Unicode)(((c0 & 0x01) << 30) |
((c1 & 0x3f) << 24) |
((c2 & 0x3f) << 18) |
((c3 & 0x3f) << 12) |
((c4 & 0x3f) << 6) |
(c5 & 0x3f));
} else {
*u = (Unicode)c0;
}
} else {
*u = (Unicode)c0;
}
return gTrue;
}
GBool getUTF16BE(GString *s, int *i, Unicode *u) {
int w0, w1;
if (*i >= s->getLength() - 1) {
return gFalse;
}
w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
*i += 2;
if (w0 < 0xd800 || w0 >= 0xe000) {
*u = (Unicode)w0;
} else {
if (*i < s->getLength() - 1) {
w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
*i += 2;
*u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
} else {
*u = (Unicode)w0;
}
}
return gTrue;
}
GBool getUTF16LE(GString *s, int *i, Unicode *u) {
int w0, w1;
if (*i >= s->getLength() - 1) {
return gFalse;
}
w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
*i += 2;
if (w0 < 0xd800 || w0 >= 0xe000) {
*u = (Unicode)w0;
} else {
if (*i < s->getLength() - 1) {
w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
*i += 2;
*u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
} else {
*u = (Unicode)w0;
}
}
return gTrue;
}