Files
Yajbir Singh f1b860b25c
Some checks failed
check / markdownlint (push) Has been cancelled
check / spellchecker (push) Has been cancelled
updated
2025-12-11 19:03:17 +05:30

261 lines
6.8 KiB
C++

//========================================================================
//
// pdftohtml.cc
//
// Copyright 2005 Glyph & Cog, LLC
//
//========================================================================
#include <aconf.h>
#include <stdio.h>
#include <stdlib.h>
#include "gmem.h"
#include "gmempp.h"
#include "parseargs.h"
#include "gfile.h"
#include "GString.h"
#include "GlobalParams.h"
#include "PDFDoc.h"
#include "HTMLGen.h"
#include "Error.h"
#include "ErrorCodes.h"
#include "config.h"
//------------------------------------------------------------------------
static GBool createIndex(char *htmlDir);
//------------------------------------------------------------------------
static int firstPage = 1;
static int lastPage = 0;
static double zoom = 1;
static int resolution = 150;
static GBool noFonts = gFalse;
static GBool skipInvisible = gFalse;
static GBool allInvisible = gFalse;
static char ownerPassword[33] = "\001";
static char userPassword[33] = "\001";
static GBool quiet = gFalse;
static char cfgFileName[256] = "";
static GBool printVersion = gFalse;
static GBool printHelp = gFalse;
static ArgDesc argDesc[] = {
{"-f", argInt, &firstPage, 0,
"first page to convert"},
{"-l", argInt, &lastPage, 0,
"last page to convert"},
{"-z", argFP, &zoom, 0,
"initial zoom level (1.0 means 72dpi)"},
{"-r", argInt, &resolution, 0,
"resolution, in DPI (default is 150)"},
{"-nofonts", argFlag, &noFonts, 0,
"do not extract embedded fonts"},
{"-skipinvisible", argFlag, &skipInvisible, 0,
"do not draw invisible text"},
{"-allinvisible", argFlag, &allInvisible, 0,
"treat all text as invisible"},
{"-opw", argString, ownerPassword, sizeof(ownerPassword),
"owner password (for encrypted files)"},
{"-upw", argString, userPassword, sizeof(userPassword),
"user password (for encrypted files)"},
{"-q", argFlag, &quiet, 0,
"don't print any messages or errors"},
{"-cfg", argString, cfgFileName, sizeof(cfgFileName),
"configuration file to use in place of .xpdfrc"},
{"-v", argFlag, &printVersion, 0,
"print copyright and version info"},
{"-h", argFlag, &printHelp, 0,
"print usage information"},
{"-help", argFlag, &printHelp, 0,
"print usage information"},
{"--help", argFlag, &printHelp, 0,
"print usage information"},
{"-?", argFlag, &printHelp, 0,
"print usage information"},
{NULL}
};
//------------------------------------------------------------------------
static int writeToFile(void *file, const char *data, int size) {
return (int)fwrite(data, 1, size, (FILE *)file);
}
int main(int argc, char *argv[]) {
PDFDoc *doc;
char *fileName;
char *htmlDir;
GString *ownerPW, *userPW;
HTMLGen *htmlGen;
GString *htmlFileName, *pngFileName, *pngURL;
FILE *htmlFile, *pngFile;
int pg, err, exitCode;
GBool ok;
exitCode = 99;
// parse args
fixCommandLine(&argc, &argv);
ok = parseArgs(argDesc, &argc, argv);
if (!ok || argc != 3 || printVersion || printHelp) {
fprintf(stderr, "pdftohtml version %s [www.xpdfreader.com]\n", xpdfVersion);
fprintf(stderr, "%s\n", xpdfCopyright);
if (!printVersion) {
printUsage("pdftohtml", "<PDF-file> <html-dir>", argDesc);
}
goto err0;
}
fileName = argv[1];
htmlDir = argv[2];
// read config file
globalParams = new GlobalParams(cfgFileName);
if (quiet) {
globalParams->setErrQuiet(quiet);
}
globalParams->setupBaseFonts(NULL);
globalParams->setTextEncoding("UTF-8");
// open PDF file
if (ownerPassword[0] != '\001') {
ownerPW = new GString(ownerPassword);
} else {
ownerPW = NULL;
}
if (userPassword[0] != '\001') {
userPW = new GString(userPassword);
} else {
userPW = NULL;
}
doc = new PDFDoc(fileName, ownerPW, userPW);
if (userPW) {
delete userPW;
}
if (ownerPW) {
delete ownerPW;
}
if (!doc->isOk()) {
exitCode = 1;
goto err1;
}
// check for copy permission
if (!doc->okToCopy()) {
error(errNotAllowed, -1,
"Copying of text from this document is not allowed.");
exitCode = 3;
goto err1;
}
// get page range
if (firstPage < 1) {
firstPage = 1;
}
if (lastPage < 1 || lastPage > doc->getNumPages()) {
lastPage = doc->getNumPages();
}
// create HTML directory
if (makeDir(htmlDir, 0755)) {
error(errIO, -1, "Couldn't create HTML output directory '{0:s}'",
htmlDir);
exitCode = 2;
goto err1;
}
// set up the HTMLGen object
htmlGen = new HTMLGen(resolution);
if (!htmlGen->isOk()) {
exitCode = 99;
goto err1;
}
htmlGen->setZoom(zoom);
htmlGen->setDrawInvisibleText(!skipInvisible);
htmlGen->setAllTextInvisible(allInvisible);
htmlGen->setExtractFontFiles(!noFonts);
htmlGen->startDoc(doc);
// convert the pages
for (pg = firstPage; pg <= lastPage; ++pg) {
htmlFileName = GString::format("{0:s}/page{1:d}.html", htmlDir, pg);
pngFileName = GString::format("{0:s}/page{1:d}.png", htmlDir, pg);
if (!(htmlFile = openFile(htmlFileName->getCString(), "wb"))) {
error(errIO, -1, "Couldn't open HTML file '{0:t}'", htmlFileName);
delete htmlFileName;
delete pngFileName;
goto err2;
}
if (!(pngFile = openFile(pngFileName->getCString(), "wb"))) {
error(errIO, -1, "Couldn't open PNG file '{0:t}'", pngFileName);
fclose(htmlFile);
delete htmlFileName;
delete pngFileName;
goto err2;
}
pngURL = GString::format("page{0:d}.png", pg);
err = htmlGen->convertPage(pg, pngURL->getCString(), htmlDir,
&writeToFile, htmlFile,
&writeToFile, pngFile);
delete pngURL;
fclose(htmlFile);
fclose(pngFile);
delete htmlFileName;
delete pngFileName;
if (err != errNone) {
error(errIO, -1, "Error converting page {0:d}", pg);
exitCode = 2;
goto err2;
}
}
// create the master index
if (!createIndex(htmlDir)) {
exitCode = 2;
goto err2;
}
exitCode = 0;
// clean up
err2:
delete htmlGen;
err1:
delete doc;
delete globalParams;
err0:
// check for memory leaks
Object::memCheck(stderr);
gMemReport(stderr);
return exitCode;
}
static GBool createIndex(char *htmlDir) {
GString *htmlFileName;
FILE *html;
int pg;
htmlFileName = GString::format("{0:s}/index.html", htmlDir);
html = openFile(htmlFileName->getCString(), "w");
delete htmlFileName;
if (!html) {
error(errIO, -1, "Couldn't open HTML file '{0:t}'", htmlFileName);
return gFalse;
}
fprintf(html, "<html>\n");
fprintf(html, "<body>\n");
for (pg = firstPage; pg <= lastPage; ++pg) {
fprintf(html, "<a href=\"page%d.html\">page %d</a><br>\n", pg, pg);
}
fprintf(html, "</body>\n");
fprintf(html, "</html>\n");
fclose(html);
return gTrue;
}