init repo
This commit is contained in:
31
ActiveX/ASCOfficeUtils/GOCR/headers/amiga.h
Normal file
31
ActiveX/ASCOfficeUtils/GOCR/headers/amiga.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
this file was suggested by Uffe Holst Jun05,2000
|
||||
to compile gocr using SAS/C under AmigaOS
|
||||
|
||||
uhc@post6.tele.dk
|
||||
|
||||
SAS/C propably does not support ANSI C++, therefore this changes
|
||||
|
||||
I am a little bit confused about using declaration and
|
||||
macro definition of abs(). I think that should not be necessary.
|
||||
Tell me, if you have an Amiga and you can give answer
|
||||
to the following questions.
|
||||
|
||||
Joerg Schulenburg, see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
#ifdef _AMIGA
|
||||
#ifdef __SASC
|
||||
#if 0
|
||||
#include <string.h> /* may be this can be removed ??? */
|
||||
#include <stdlib.h> /* may be this can be removed ??? */
|
||||
extern int abs(int); /* may be this can be removed ??? */
|
||||
#endif
|
||||
#ifndef abs
|
||||
#define abs(i) ((i) < 0 ? -(i) : (i))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
11
ActiveX/ASCOfficeUtils/GOCR/headers/barcode.h
Normal file
11
ActiveX/ASCOfficeUtils/GOCR/headers/barcode.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef _BARCODE_H
|
||||
#define _BARCODE_H
|
||||
#include "pnm.h"
|
||||
|
||||
/*
|
||||
detect barcode and add a string to the box (obj-pointer)
|
||||
*/
|
||||
|
||||
int detect_barcode(job_t *job);
|
||||
|
||||
#endif
|
||||
295
ActiveX/ASCOfficeUtils/GOCR/headers/gocr.h
Normal file
295
ActiveX/ASCOfficeUtils/GOCR/headers/gocr.h
Normal file
@@ -0,0 +1,295 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2006 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
sometimes I have written comments in german language, sorry for that
|
||||
|
||||
- look for ??? for preliminary code
|
||||
*/
|
||||
|
||||
/* General headerfile with gocr-definitions */
|
||||
|
||||
#ifndef __GOCR_H__
|
||||
#define __GOCR_H__
|
||||
|
||||
#include "pnm.h"
|
||||
#include "unicode.h"
|
||||
#include "list.h"
|
||||
#include <stddef.h>
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* wchar_t should always exist (ANSI), but WCHAR.H is sometimes missing
|
||||
* USE_UNICODE should be removed or replaced by HAVE_WCHAR_H in future
|
||||
*/
|
||||
#ifdef HAVE_WCHAR_H
|
||||
#define USE_UNICODE 1
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
/* ------------------------ feature extraction ----------------- */
|
||||
#define AT 7 /* mark */
|
||||
#define M1 1 /* mark */
|
||||
enum direction {
|
||||
UP=1, DO, RI, LE
|
||||
};
|
||||
typedef enum direction DIRECTION;
|
||||
#define ST 7 /* stop */
|
||||
/* ------------------------------------------------------------- */
|
||||
/* detect maximas in of line overlapps (return in %) and line koord */
|
||||
#define HOR 1 /* horizontal */
|
||||
#define VER 2 /* vertikal */
|
||||
#define RIS 3 /* rising=steigend */
|
||||
#define FAL 4 /* falling=fallend */
|
||||
|
||||
#define MAXlines 1024
|
||||
|
||||
/* ToDo: if we have a tree instead of a list, a line could be a node object */
|
||||
struct tlines {
|
||||
int num;
|
||||
int dx, dy; /* direction of text lines (straight/skew) */
|
||||
int m1[MAXlines], /* start of line = upper bound of 'A' */
|
||||
m2[MAXlines], /* upper bound of 'e' */
|
||||
m3[MAXlines], /* lower bound of 'e' = baseline */
|
||||
m4[MAXlines]; /* stop of line = lower bound of 'q' */
|
||||
/* ToDo: add sureness per m1,m2 etc? */
|
||||
int x0[MAXlines],
|
||||
x1[MAXlines]; /* left and right border */
|
||||
int wt[MAXlines]; /* weight, how sure thats correct in percent, v0.41 */
|
||||
int pitch[MAXlines]; /* word pitch (later per box?), v0.41 */
|
||||
int mono[MAXlines]; /* spacing type, 0=proportional, 1=monospaced */
|
||||
};
|
||||
|
||||
#define NumAlt 10 /* maximal number of alternative chars (table length) */
|
||||
#define MaxNumFrames 8 /* maximum number of frames per char/box */
|
||||
#define MaxFrameVectors 128 /* maximum vectors per frame (*8=1KB/box) */
|
||||
/* ToDo: use only malloc_box(),free_box(),copybox() for creation, destroy etc.
|
||||
* adding reference_counter to avoid pointer pointing to freed box
|
||||
*/
|
||||
struct box { /* this structure should contain all pixel infos of a letter */
|
||||
int x0,x1,y0,y1,x,y,dots; /* xmin,xmax,ymin,ymax,reference-pixel,i-dots */
|
||||
int num_boxes, /* 1 "abc", 2 "!i?", 3 "ä" (composed objects) 0.41 */
|
||||
num_subboxes; /* 1 for "abdegopqADOPQR", 2 for "B" (holes) 0.41 */
|
||||
wchar_t c; /* detected char (same as tac[0], obsolete?) */
|
||||
wchar_t modifier; /* default=0, see compose() in unicode.c */
|
||||
int num; /* same number = same char */
|
||||
int line; /* line number (points to struct tlines lines) */
|
||||
int m1,m2,m3,m4; /* m2 = upper boundary, m3 = baseline */
|
||||
/* planed: sizeof hole_1, hole_2, certainty (run1=100%,run2=90%,etc.) */
|
||||
pix *p; /* pointer to pixmap (v0.2.5) */
|
||||
/* tac, wac is used together with setac() to manage very similar chars */
|
||||
int num_ac; /* length of table (alternative chars), default=0 */
|
||||
wchar_t tac[NumAlt]; /* alternative chars, only used by setac(),getac() */
|
||||
int wac[NumAlt]; /* weight of alternative chars */
|
||||
char *tas[NumAlt]; /* alternative UTF8-strings or XML codes if tac[]=0 */
|
||||
/* replacing old obj */
|
||||
/* ToDo: (*obj)[NumAlt] + olen[NumAlt] ??? */
|
||||
/* ToDo: bitmap for possible Picture|Object|Char ??? */
|
||||
/* char *obj; */ /* pointer to text-object ... -> replaced by tas[] */
|
||||
/* ... (melted chars, barcode, picture coords, ...) */
|
||||
/* must be freed before box is freed! */
|
||||
/* do _not_ copy only the pointer to object */
|
||||
/* --------------------------------------------------------
|
||||
* extension since v0.41 js05, Store frame vectors,
|
||||
* which is a table of vectors sourrounding the char and its
|
||||
* inner white holes. The advantage is the independence from
|
||||
* resolution, handling of holes, overlap and rotation.
|
||||
* --------------------------------------------------------- */
|
||||
int num_frames; /* number of frames: 1 for cfhklmnrstuvwxyz */
|
||||
/* 2 for abdegijopq */
|
||||
int frame_vol[MaxNumFrames]; /* volume inside frame +/- (black/white) */
|
||||
int frame_per[MaxNumFrames]; /* periphery, summed length of vectors */
|
||||
int num_frame_vectors[MaxNumFrames]; /* index to next frame */
|
||||
/* biggest frame should be stored first (outer frame) */
|
||||
/* biggest has the maximum pair distance */
|
||||
/* num vector loops */
|
||||
int frame_vector[MaxFrameVectors][2]; /* may be 16*int=fixpoint_number */
|
||||
|
||||
};
|
||||
typedef struct box Box;
|
||||
|
||||
/* true if the coordination pair (a,b) is outside the image p */
|
||||
#define outbounds(p, a, b) (a < 0 || b < 0 || a >= (p)->x || b >= (p)->y)
|
||||
|
||||
/* ToDo: this structure seems to be obsolete, remove it */
|
||||
typedef struct path {
|
||||
int start; /* color at the beginning of the path, (0=white, 1=black) */
|
||||
int *x; /* x coordinates of transitions */
|
||||
int *y; /* y coordinates of transitions */
|
||||
int num; /* current number of entries in x or y */
|
||||
int max; /* maximum number of entries in x or y */
|
||||
/* (if more values need to be stored, the arrays are enlarged) */
|
||||
} path_t;
|
||||
|
||||
/* job_t contains all information needed for an OCR task */
|
||||
typedef struct job_s {
|
||||
struct { /* source data */
|
||||
char *fname; /* input filename; default value: "-" */
|
||||
pix p; /* source pixel data, pixelmap 8bit gray */
|
||||
} src;
|
||||
struct { /* temporary stuff, e.g. buffers */
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
struct timeval init_time; /* starting time of this job */
|
||||
#endif
|
||||
pix ppo; /* pixmap for visual debugging output, obsolete */
|
||||
|
||||
/* sometimes recognition function is called again and again, if result was 0
|
||||
n_run tells the pixel function to return alternative results */
|
||||
int n_run; /* num of run, if run_2 critical pattern get other results */
|
||||
/* used for 2nd try, pixel uses slower filter function etc. */
|
||||
List dblist; /* list of boxes loaded from the character database */
|
||||
} tmp;
|
||||
struct { /* results */
|
||||
List boxlist; /* store every object in a box, which contains */
|
||||
/* the characteristics of the object (see struct box) */
|
||||
List linelist; /* recognized text lines after recognition */
|
||||
|
||||
struct tlines lines; /* used to access to line-data (statistics) */
|
||||
/* here the positions (frames) of lines are */
|
||||
/* stored for further use */
|
||||
int avX,avY; /* average X,Y (avX=sumX/numC) */
|
||||
int sumX,sumY,numC; /* sum of all X,Y; num chars */
|
||||
} res;
|
||||
struct { /* configuration */
|
||||
int cs; /* critical grey value (pixel<cs => black pixel) */
|
||||
/* range: 0..255, 0 means autodetection */
|
||||
int spc; /* spacewidth/dots (0 = autodetect); default value: 0 */
|
||||
int mode; /* operation modes; default value: 0 */
|
||||
/* operation mode (see --help) */
|
||||
int dust_size; /* dust size; default value: 10 */
|
||||
int only_numbers; /* numbers only; default value: 0 */
|
||||
int verbose; /* verbose mode; default value: 0 */
|
||||
/* verbose option (see --help) */
|
||||
FORMAT out_format; /* output format; default value: ISO8859_1*/
|
||||
char *lc; /* debuglist of chars (_ = not recognized chars) */
|
||||
/* default value: "_" */
|
||||
char *db_path; /* pathname for database; default value: NULL */
|
||||
char *cfilter; /* char filter; default value: NULL, ex: "A-Za-z" */
|
||||
/* limit of certainty where chars are accepted as identified */
|
||||
int certainty; /* in units of 100 (percent); 0..100; default 95 */
|
||||
char *unrec_marker; /* output this string for every unrecognized char */
|
||||
} cfg;
|
||||
} job_t;
|
||||
|
||||
/* initialze job structure */
|
||||
void job_init(job_t *job);
|
||||
|
||||
/* free job structure */
|
||||
void job_free(job_t *job);
|
||||
|
||||
/*FIXME jb: remove JOB; */
|
||||
extern job_t *JOB;
|
||||
|
||||
/* calculate the overlapp of the line (0-1) with black points
|
||||
* by rekursiv bisection
|
||||
* (evl. Fehlertoleranz mit pixel in Umgebung dx,dy suchen) (umschaltbar) ???
|
||||
* MidPoint Line Algorithm (Bresenham) Foley: ComputerGraphics better?
|
||||
* will be replaced by vector functions
|
||||
*/
|
||||
|
||||
/* gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
|
||||
* incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y)) */
|
||||
int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
|
||||
int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
|
||||
|
||||
/* look for white 0x02 or black 0x01 dots (0x03 = white+black) */
|
||||
char get_bw(int x0, int x1, int y0, int y1,
|
||||
pix *p, int cs,int mask);
|
||||
|
||||
/* look for black crossing a line x0,y0,x1,y1
|
||||
* follow line and count crossings ([white]-black-transitions)
|
||||
*/
|
||||
int num_cross(int x0, int x1, int y0, int y1,
|
||||
pix *p, int cs);
|
||||
|
||||
/* memory allocation with error checking */
|
||||
void *xrealloc(void *ptr, size_t size);
|
||||
|
||||
/* follow a line x0,y0,x1,y1 recording locations of transitions,
|
||||
* return count of transitions
|
||||
*/
|
||||
int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path);
|
||||
|
||||
/* -------------------------------------------------------------
|
||||
* mark edge-points
|
||||
* - first move forward until b/w-edge
|
||||
* - more than 2 pixel?
|
||||
* - loop around
|
||||
* - if forward pixel : go up, rotate right
|
||||
* - if forward no pixel : rotate left
|
||||
* - stop if found first 2 pixel in same order
|
||||
* mit an rechter-Wand-entlang-gehen strategie
|
||||
* --------------------------------------------------------------
|
||||
* turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
|
||||
* out: last-position
|
||||
* Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
|
||||
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
*
|
||||
* is this the right place for declaration?
|
||||
*/
|
||||
void turmite(pix *p, int *x, int *y,
|
||||
int x0, int x1, int y0, int y1, int cs, int rw, int rb);
|
||||
|
||||
/* test if points are connected via t-pixel (rekursiv!) */
|
||||
int joined(pix *p, int x0, int y0, int x1, int y1, int cs);
|
||||
|
||||
/* move from x,y to direction r until pixel or l steps
|
||||
* return number of steps
|
||||
*/
|
||||
int loop(pix *p, int x, int y, int l, int cs, int col, DIRECTION r);
|
||||
|
||||
#define MAX_HOLES 3
|
||||
typedef struct list_holes {
|
||||
int num; /* numbers of holes, initialize with 0 */
|
||||
struct hole_s {
|
||||
int size,x,y,x0,y0,x1,y1; /* size, start point, outer rectangle */
|
||||
} hole[MAX_HOLES];
|
||||
} holes_t;
|
||||
|
||||
/* look for white holes surrounded by black points
|
||||
* at moment white point with black in all four directions
|
||||
*/
|
||||
int num_hole(int x0, int x1, int y0, int y1, pix *p, int cs, holes_t *holes);
|
||||
|
||||
/* count for black nonconnected objects --- used for i,auml,ouml,etc. */
|
||||
int num_obj(int x0, int x1, int y0, int y1, pix *p, int cs);
|
||||
|
||||
int distance( pix *p1, struct box *box1, /* box-frame */
|
||||
pix *p2, struct box *box2, int cs);
|
||||
|
||||
/* call the OCR engine ;) */
|
||||
/* char whatletter(struct box *box1,int cs); */
|
||||
|
||||
/* declared in pixel.c */
|
||||
/* getpixel() was pixel() but it may collide with netpnm pixel declaration */
|
||||
int getpixel(pix *p, int x, int y);
|
||||
int marked(pix *p, int x, int y);
|
||||
void put(pix * p, int x, int y, int ia, int io);
|
||||
|
||||
char* PNMToText(char* buf, long size, char *outputformat, long graylevel, long dustsize, long spacewidthdots, long certainty);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
|
||||
#endif /* __GOCR_H__ */
|
||||
90
ActiveX/ASCOfficeUtils/GOCR/headers/list.h
Normal file
90
ActiveX/ASCOfficeUtils/GOCR/headers/list.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
#ifndef GOCR_LIST_H
|
||||
#define GOCR_LIST_H
|
||||
|
||||
#ifdef DEBUG
|
||||
#define g_debug(a) a
|
||||
#else
|
||||
#define g_debug(a)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Structures
|
||||
*/
|
||||
|
||||
struct element {
|
||||
struct element *next, *previous;
|
||||
void *data;
|
||||
};
|
||||
typedef struct element Element;
|
||||
|
||||
struct list {
|
||||
Element start; /* simplifies for(each_element) { ... */
|
||||
Element stop; /* ... list_del() ... } v0.41 */
|
||||
Element **current; /* for(each_element) */
|
||||
int n; /* number of elements */
|
||||
int level; /* level of nested fors */
|
||||
};
|
||||
typedef struct list List;
|
||||
|
||||
/*
|
||||
* Functions
|
||||
*/
|
||||
|
||||
void list_init ( List *l );
|
||||
int list_app ( List *l, void *data );
|
||||
int list_ins ( List *l, void *data_after, void *data);
|
||||
Element*list_element_from_data ( List *l, void *data );
|
||||
int list_del ( List *l, void *data );
|
||||
void list_free ( List *l );
|
||||
int list_and_data_free ( List *l, void (*free_data)(void *data));
|
||||
int list_higher_level ( List *l );
|
||||
void list_lower_level ( List *l );
|
||||
void * list_next ( List *l, void *data );
|
||||
void * list_prev ( List *l, void *data );
|
||||
void list_sort ( List *l, int (*compare)(const void *, const void *) );
|
||||
|
||||
#define list_empty(l) ((l)->start.next == &(l)->stop ? 1 : 0)
|
||||
#define list_get_header(l) ((l)->start.next->data)
|
||||
#define list_get_tail(l) ((l)->stop.previous->data)
|
||||
#define list_get_current(l) ((l)->current[(l)->level]->data)
|
||||
#define list_get_cur_prev(l) ((l)->current[(l)->level]->previous == NULL ? \
|
||||
NULL : (l)->current[(l)->level]->previous->data )
|
||||
#define list_get_cur_next(l) ((l)->current[(l)->level]->next == NULL ? \
|
||||
NULL : (l)->current[(l)->level]->next->data )
|
||||
#define list_total(l) ((l)->n)
|
||||
|
||||
#define for_each_data(l) \
|
||||
if (list_higher_level(l) == 0) { \
|
||||
for ( ; (l)->current[(l)->level] \
|
||||
&& (l)->current[(l)->level]!=&(l)->stop; (l)->current[(l)->level] = \
|
||||
(l)->current[(l)->level]->next ) {
|
||||
|
||||
|
||||
#define end_for_each(l) \
|
||||
} \
|
||||
list_lower_level(l); \
|
||||
}
|
||||
|
||||
#endif
|
||||
63
ActiveX/ASCOfficeUtils/GOCR/headers/ocr0.h
Normal file
63
ActiveX/ASCOfficeUtils/GOCR/headers/ocr0.h
Normal file
@@ -0,0 +1,63 @@
|
||||
#ifndef _OCR0_H
|
||||
#define _OCR0_H
|
||||
#include "pgm2asc.h"
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
- functions with thousand of lines make the compilation very slow
|
||||
therefore the ocr0-function is splitted in subfunctions
|
||||
- shared data used often in ocr0-subroutines are stored
|
||||
in ocr0_shared structure.
|
||||
* ------------------------------------------------------------ */
|
||||
|
||||
typedef struct ocr0_shared { /* shared variables and properties */
|
||||
|
||||
struct box *box1; /* box in whole image */
|
||||
pix *bp; /* extracted temporarly box, cleaned */
|
||||
int cs; /* global threshold value (gray level) */
|
||||
|
||||
/* ToDo: or MACROS: X0 = box1->x0 */
|
||||
int x0, x1, y0, y1; /* box coordinates related to box1 */
|
||||
int dx, dy; /* size of box */
|
||||
int hchar, gchar; /* relation to m1..m4 */
|
||||
int aa[4][4]; /* corner points, see xX (x,y,dist^2,vector_idx) v0.41 */
|
||||
holes_t holes; /* list of holes (max MAX_HOLES) */
|
||||
|
||||
} ocr0_shared_t;
|
||||
|
||||
/* tests for umlaut */
|
||||
int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier);
|
||||
/* detect chars */
|
||||
wchar_t ocr0(struct box *box1, pix *b, int cs);
|
||||
/* detect numbers */
|
||||
wchar_t ocr0n(ocr0_shared_t *sdata);
|
||||
|
||||
static int sq(int x) { return x*x; } /* square */
|
||||
|
||||
/*
|
||||
* go from vector j1 to vector j2 and measure maximum deviation of
|
||||
* the steps from the line connecting j1 and j2
|
||||
* return the squared maximum distance
|
||||
* in units of the box size times 1024
|
||||
*/
|
||||
int line_deviation( struct box *box1, int j1, int j2 );
|
||||
|
||||
/*
|
||||
* search vectors between j1 and j2 for nearest point a to point r
|
||||
* example:
|
||||
*
|
||||
* r-> $$...$$ $ - mark vectors
|
||||
* @@$..@@ @ - black pixels
|
||||
* @@$..@@ . - white pixels
|
||||
* @@@@.$@
|
||||
* a-> @@$@$@@
|
||||
* @$.@@@@
|
||||
* @@..$@@
|
||||
* @@..$@@
|
||||
* j1 --> $$...$$ <-- j2
|
||||
*
|
||||
* ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
|
||||
* j1 and j2 must be in the same frame
|
||||
* return aa?
|
||||
*/
|
||||
int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry);
|
||||
#endif
|
||||
3
ActiveX/ASCOfficeUtils/GOCR/headers/ocr1.h
Normal file
3
ActiveX/ASCOfficeUtils/GOCR/headers/ocr1.h
Normal file
@@ -0,0 +1,3 @@
|
||||
/* #include "pgm2asc.h" */
|
||||
#include "pnm.h"
|
||||
/* wchar_t ocr1(struct box *box1, pix *b, int cs); */
|
||||
23
ActiveX/ASCOfficeUtils/GOCR/headers/otsu.h
Normal file
23
ActiveX/ASCOfficeUtils/GOCR/headers/otsu.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/*======================================================================*/
|
||||
/* OTSU global thresholding routine */
|
||||
/* takes a 2D unsigned char array pointer, number of rows, and */
|
||||
/* number of cols in the array. returns the value of the threshold */
|
||||
/*======================================================================*/
|
||||
int
|
||||
otsu (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int vvv);
|
||||
|
||||
|
||||
/*======================================================================*/
|
||||
/* thresholding the image (set threshold to 128+32=160=0xA0) */
|
||||
/* now we have a fixed thresholdValue good to recognize on gray image */
|
||||
/* - so lower bits can used for other things (bad design?) */
|
||||
/*======================================================================*/
|
||||
int
|
||||
thresholding (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int thresholdValue);
|
||||
37
ActiveX/ASCOfficeUtils/GOCR/headers/output.h
Normal file
37
ActiveX/ASCOfficeUtils/GOCR/headers/output.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address */
|
||||
|
||||
#ifndef OUTPUT_H
|
||||
#define OUTPUT_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "pnm.h"
|
||||
#include "gocr.h"
|
||||
#include "list.h"
|
||||
|
||||
void out_b(struct box *px, pix *b, int x0, int y0, int dx, int dy, int cs );
|
||||
void out_x(struct box *px);
|
||||
void out_x2(struct box *box1,struct box *box2);
|
||||
int output_list(job_t *job);
|
||||
int debug_img(char *fname, struct job_s *job, int opt);
|
||||
|
||||
|
||||
#endif
|
||||
9
ActiveX/ASCOfficeUtils/GOCR/headers/pcx.h
Normal file
9
ActiveX/ASCOfficeUtils/GOCR/headers/pcx.h
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
#include "pnm.h"
|
||||
|
||||
void readpcx(char *name,pix *p,int vvv);
|
||||
|
||||
/* write 8bit palette no RLE, ToDo: obsolete? */
|
||||
void writebmp(char *name,pix p,int vvv);
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
110
ActiveX/ASCOfficeUtils/GOCR/headers/pgm2asc.h
Normal file
110
ActiveX/ASCOfficeUtils/GOCR/headers/pgm2asc.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2006 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
#ifndef PGM2ASC_H
|
||||
#define PGM2ASC_H 1
|
||||
|
||||
#include "pnm.h"
|
||||
#include "output.h"
|
||||
#include "list.h"
|
||||
#include "unicode.h"
|
||||
|
||||
#define pixel_at(pic, xx, yy) (pic).p[(xx)+((yy)*((pic).x))]
|
||||
#define pixel_atp(pic, xx, yy) (pic)->p[(xx)+((yy)*((pic)->x))]
|
||||
|
||||
#ifndef HAVE_WCHAR_H
|
||||
wchar_t *wcschr (const wchar_t *wcs, wchar_t wc);
|
||||
wchar_t *wcscpy (wchar_t *dest, const wchar_t *src);
|
||||
size_t wcslen (const wchar_t *s);
|
||||
#endif
|
||||
#ifndef HAVE_WCSDUP
|
||||
wchar_t * wcsdup (const wchar_t *WS); /* its a gnu extension */
|
||||
#endif
|
||||
|
||||
/* declared in pgm2asc.c */
|
||||
/* set alternate chars and its weight, called from the engine
|
||||
if a char is recognized to (weight) percent */
|
||||
int setas(struct box *b, char *as, int weight); /* string + xml */
|
||||
int setac(struct box *b, wchar_t ac, int weight); /* wchar */
|
||||
|
||||
/* for qsort() call */
|
||||
int intcompare (const void *vr, const void *vs);
|
||||
|
||||
/* declared in box.c */
|
||||
int box_gt(struct box *box1, struct box *box2);
|
||||
int reset_box_ac(struct box *box); /* reset and free char table */
|
||||
struct box *malloc_box( struct box *inibox ); /* alloc memory for a box */
|
||||
int free_box( struct box *box ); /* free memory of a box */
|
||||
int copybox( pix *p, int x0, int y0, int dx, int dy, pix *b, int len);
|
||||
int reduce_vectors ( struct box *box1, int mode );
|
||||
int merge_boxes( struct box *box1, struct box *box2 );
|
||||
int cut_box( struct box *box1);
|
||||
|
||||
|
||||
/* declared in database.c */
|
||||
int load_db(void);
|
||||
wchar_t ocr_db(struct box *box1);
|
||||
|
||||
/* declared in detect.c */
|
||||
int detect_lines1(pix * p, int x0, int y0, int dx, int dy);
|
||||
int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r);
|
||||
int detect_rotation_angle(job_t *job);
|
||||
int detect_text_lines(pix * pp, int mo);
|
||||
int adjust_text_lines(pix * pp, int mo);
|
||||
int detect_pictures(job_t *job);
|
||||
|
||||
/* declared in lines.c */
|
||||
void store_boxtree_lines( int mo );
|
||||
/* free memory for internal stored textlines.
|
||||
* Needs to be called _after_ having retrieved the text.
|
||||
* After freeing, no call to getTextLine is possible any
|
||||
* more
|
||||
*/
|
||||
void free_textlines( void );
|
||||
|
||||
/* get result of ocr for a given line number.
|
||||
* If the line is out of range, the function returns 0,
|
||||
* otherwise a pointer to a complete line.
|
||||
*/
|
||||
const char *getTextLine( int );
|
||||
|
||||
/* append a string (s1) to the string buffer (buffer) of length (len)
|
||||
* if buffer is to small or len==0 realloc buffer, len+=512
|
||||
*/
|
||||
char *append_to_line(char *buffer, const char *s1, int *len);
|
||||
|
||||
/* declared in remove.c */
|
||||
int remove_dust( job_t *job );
|
||||
int remove_pictures( job_t *job);
|
||||
int remove_melted_serifs( pix *pp );
|
||||
int remove_rest_of_dust();
|
||||
int smooth_borders( job_t *job );
|
||||
|
||||
/* declared in pixel.c */
|
||||
int marked(pix * p, int x, int y);
|
||||
int pixel(pix *p, int x, int y);
|
||||
void put(pix * p, int x, int y, int ia, int io);
|
||||
|
||||
/* start ocr on a image in job.src.p */
|
||||
int pgm2asc(job_t *job);
|
||||
|
||||
#endif
|
||||
37
ActiveX/ASCOfficeUtils/GOCR/headers/pnm.h
Normal file
37
ActiveX/ASCOfficeUtils/GOCR/headers/pnm.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/* Handle PNM-files Dez98 JS
|
||||
* 0,0 = left up
|
||||
* PAM-formats
|
||||
* PAM any P7
|
||||
* PNM-formats
|
||||
* PGM gray ASCII=P2 RAW=P5 dx dy col gray
|
||||
* PPM RGB ASCII=P3 RAW=P6 dx dy col RGB
|
||||
* PBM B/W ASCII=P1 RAW=P4 dx dy bitmap
|
||||
*/
|
||||
|
||||
#ifndef GOCR_PNM_H
|
||||
#define GOCR_PNM_H 1
|
||||
|
||||
#include "config.h"
|
||||
|
||||
struct pixmap {
|
||||
unsigned char *p; /* pointer of image buffer (pixmap) */
|
||||
int x; /* xsize */
|
||||
int y; /* ysize */
|
||||
int bpp; /* bytes per pixel: 1=gray 3=rgb */
|
||||
};
|
||||
typedef struct pixmap pix;
|
||||
|
||||
/* return 1 on multiple images (holding file open), 0 else */
|
||||
int readpgm(char *name, pix *p, int vvv);
|
||||
/* return 1 on multiple images (holding file open), 0 else */
|
||||
int readpgmFromBuffer(char* buffer, long size, pix *p);
|
||||
|
||||
/* write pgm-map to pnm-file */
|
||||
int writepgm(char *nam, pix *p);
|
||||
int writepbm(char *nam, pix *p);
|
||||
int writeppm(char *nam, pix *p); /* use lowest 3 bits for farbcoding */
|
||||
|
||||
/* ----- count colors ------ create histogram ------- */
|
||||
void makehisto(pix p, unsigned col[256], int vvv);
|
||||
|
||||
#endif
|
||||
42
ActiveX/ASCOfficeUtils/GOCR/headers/progress.h
Normal file
42
ActiveX/ASCOfficeUtils/GOCR/headers/progress.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
---------------------- progress output ----------------------
|
||||
output progress for GUIs to a pipe
|
||||
format: "counter_name" counter maxcounter time estimated_time \r|\n
|
||||
*/
|
||||
#ifndef GOCR_PROGRESS_H
|
||||
#define GOCR_PROGRESS_H "Oct06"
|
||||
#include <time.h>
|
||||
|
||||
/* initialization of progress output, fname="<fileID>","<filename>","-" */
|
||||
int ini_progress(char *fname);
|
||||
|
||||
/* ToDo: add by open_* and close_* */
|
||||
/* place to store values for progress calculation, called often, but
|
||||
* dont call systime so often
|
||||
*/
|
||||
typedef struct progress_counter {
|
||||
const char *name; /* name of counter */
|
||||
int lastprintcount; /* last counter printed for extrapolation */
|
||||
int maxcount; /* max counter */
|
||||
int numskip; /* num of counts to skip before timecall 0..maxcount */
|
||||
time_t starttime; /* start time of this counter */
|
||||
time_t lastprinttime; /* last time printed in seconds */
|
||||
|
||||
} progress_counter_t;
|
||||
|
||||
/* progress output p1=main_progress_0..100% p2=sub_progress_0..100% */
|
||||
/* ToDo: improved_progress: counter, maxcount(ini), counter_name(ini),
|
||||
* printinterval=10 # time before printing out progressmeter
|
||||
* *numskip=1 # if (counter-lastprintcounter<numskip) return; gettime() ...
|
||||
* *startutime, *lastprintutime, *lastprintcounter # numskip*=2 or /=2
|
||||
* only 1output/10s, + estimated endtime (test on pixelfields)
|
||||
* to stderr by default? remove subprogress, ini_progress? rm_progress?
|
||||
* test on tcl
|
||||
*/
|
||||
progress_counter_t *open_progress(int maxcount, const char *name);
|
||||
/* free counter */
|
||||
int close_progress(progress_counter_t *counter);
|
||||
/* output progress for pc */
|
||||
int progress(int counter, progress_counter_t *pc);
|
||||
/* --------------------- end of progress output ---------------------- */
|
||||
#endif
|
||||
6
ActiveX/ASCOfficeUtils/GOCR/headers/tga.h
Normal file
6
ActiveX/ASCOfficeUtils/GOCR/headers/tga.h
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
#include "pnm.h"
|
||||
|
||||
void readtga(char *name,pix *p,int mode); // mode: 0=gray 1=RGB
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
1264
ActiveX/ASCOfficeUtils/GOCR/headers/unicode.h
Normal file
1264
ActiveX/ASCOfficeUtils/GOCR/headers/unicode.h
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user