init repo
This commit is contained in:
333
ActiveX/ASCOfficeUtils/GOCR/GOCR.vcproj
Normal file
333
ActiveX/ASCOfficeUtils/GOCR/GOCR.vcproj
Normal file
@@ -0,0 +1,333 @@
|
||||
<?xml version="1.0" encoding="windows-1251"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="9,00"
|
||||
Name="GOCR"
|
||||
ProjectGUID="{DD328E05-26BE-4C81-A13E-489D15321212}"
|
||||
Keyword="AtlProj"
|
||||
TargetFrameworkVersion="196613"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="Debug"
|
||||
IntermediateDirectory="Debug"
|
||||
ConfigurationType="4"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC70.vsprops"
|
||||
UseOfATL="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="false"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="headers; include"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
EnableFunctionLevelLinking="false"
|
||||
RuntimeTypeInfo="false"
|
||||
UsePrecompiledHeader="0"
|
||||
PrecompiledHeaderFile=".\Debug/gocr.pch"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="true"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="_DEBUG"
|
||||
Culture="1049"
|
||||
AdditionalIncludeDirectories="$(IntDir)"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
OutputFile="$(OutDir)\$(ProjectName).lib"
|
||||
SuppressStartupBanner="true"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="Release"
|
||||
IntermediateDirectory="Release"
|
||||
ConfigurationType="4"
|
||||
UseOfATL="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="false"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="headers; include"
|
||||
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
PrecompiledHeaderFile=".\Release/gocr.pch"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
Culture="1049"
|
||||
AdditionalIncludeDirectories="$(IntDir)"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\src\barcode.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\box.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\database.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\detect.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\gocr.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\jconv.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\job.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\lines.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\list.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\ocr0.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\ocr0n.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\ocr1.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\otsu.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\output.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pcx.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pgm2asc.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pixel.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pnm.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\progress.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\remove.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\tga.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\unicode.c"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\headers\amiga.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\barcode.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\include\config.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\gocr.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\list.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\ocr0.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\ocr1.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\otsu.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\output.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\pcx.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\pgm2asc.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\pnm.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\progress.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Resource.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\tga.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\unicode.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\include\version.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
||||
464
ActiveX/ASCOfficeUtils/GOCR/GOCR2005.vcproj
Normal file
464
ActiveX/ASCOfficeUtils/GOCR/GOCR2005.vcproj
Normal file
@@ -0,0 +1,464 @@
|
||||
<?xml version="1.0" encoding="windows-1251"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="8,00"
|
||||
Name="GOCR"
|
||||
ProjectGUID="{DD328E05-26BE-4C81-A13E-489D15321212}"
|
||||
Keyword="AtlProj"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="Debug"
|
||||
IntermediateDirectory="Debug"
|
||||
ConfigurationType="4"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC70.vsprops"
|
||||
UseOfATL="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="false"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="headers; include"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
EnableFunctionLevelLinking="false"
|
||||
RuntimeTypeInfo="false"
|
||||
UsePrecompiledHeader="0"
|
||||
PrecompiledHeaderFile=".\Debug/gocr.pch"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="true"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="_DEBUG"
|
||||
Culture="1049"
|
||||
AdditionalIncludeDirectories="$(IntDir)"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
OutputFile="$(OutDir)\$(ProjectName).lib"
|
||||
SuppressStartupBanner="true"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="Release"
|
||||
IntermediateDirectory="Release"
|
||||
ConfigurationType="4"
|
||||
UseOfATL="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="false"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="headers; include"
|
||||
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
PrecompiledHeaderFile=".\Release/gocr.pch"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
Culture="1049"
|
||||
AdditionalIncludeDirectories="$(IntDir)"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="ReleaseASC|Win32"
|
||||
OutputDirectory="ReleaseASC"
|
||||
IntermediateDirectory="ReleaseASC"
|
||||
ConfigurationType="4"
|
||||
UseOfATL="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="false"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="headers; include"
|
||||
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
PrecompiledHeaderFile=".\Release/gocr.pch"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG; ASCBUILD"
|
||||
Culture="1049"
|
||||
AdditionalIncludeDirectories="$(IntDir)"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="ReleaseOpenSource|Win32"
|
||||
OutputDirectory="$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
UseOfATL="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="false"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="headers; include"
|
||||
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES;BUILD_CONFIG_OPENSOURCE_VERSION"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
PrecompiledHeaderFile=".\$(ConfigurationName)/gocr.pch"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
Culture="1049"
|
||||
AdditionalIncludeDirectories="$(IntDir)"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\src\barcode.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\box.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\database.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\detect.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\gocr.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\jconv.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\job.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\lines.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\list.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\ocr0.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\ocr0n.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\ocr1.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\otsu.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\output.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pcx.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pgm2asc.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pixel.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\pnm.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\progress.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\remove.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\tga.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\unicode.c"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\headers\amiga.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\barcode.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\include\config.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\gocr.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\list.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\ocr0.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\ocr1.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\otsu.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\output.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\pcx.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\pgm2asc.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\pnm.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\progress.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Resource.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\tga.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\headers\unicode.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\include\version.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
||||
31
ActiveX/ASCOfficeUtils/GOCR/headers/amiga.h
Normal file
31
ActiveX/ASCOfficeUtils/GOCR/headers/amiga.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
this file was suggested by Uffe Holst Jun05,2000
|
||||
to compile gocr using SAS/C under AmigaOS
|
||||
|
||||
uhc@post6.tele.dk
|
||||
|
||||
SAS/C propably does not support ANSI C++, therefore this changes
|
||||
|
||||
I am a little bit confused about using declaration and
|
||||
macro definition of abs(). I think that should not be necessary.
|
||||
Tell me, if you have an Amiga and you can give answer
|
||||
to the following questions.
|
||||
|
||||
Joerg Schulenburg, see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
#ifdef _AMIGA
|
||||
#ifdef __SASC
|
||||
#if 0
|
||||
#include <string.h> /* may be this can be removed ??? */
|
||||
#include <stdlib.h> /* may be this can be removed ??? */
|
||||
extern int abs(int); /* may be this can be removed ??? */
|
||||
#endif
|
||||
#ifndef abs
|
||||
#define abs(i) ((i) < 0 ? -(i) : (i))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
11
ActiveX/ASCOfficeUtils/GOCR/headers/barcode.h
Normal file
11
ActiveX/ASCOfficeUtils/GOCR/headers/barcode.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef _BARCODE_H
|
||||
#define _BARCODE_H
|
||||
#include "pnm.h"
|
||||
|
||||
/*
|
||||
detect barcode and add a string to the box (obj-pointer)
|
||||
*/
|
||||
|
||||
int detect_barcode(job_t *job);
|
||||
|
||||
#endif
|
||||
295
ActiveX/ASCOfficeUtils/GOCR/headers/gocr.h
Normal file
295
ActiveX/ASCOfficeUtils/GOCR/headers/gocr.h
Normal file
@@ -0,0 +1,295 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2006 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
sometimes I have written comments in german language, sorry for that
|
||||
|
||||
- look for ??? for preliminary code
|
||||
*/
|
||||
|
||||
/* General headerfile with gocr-definitions */
|
||||
|
||||
#ifndef __GOCR_H__
|
||||
#define __GOCR_H__
|
||||
|
||||
#include "pnm.h"
|
||||
#include "unicode.h"
|
||||
#include "list.h"
|
||||
#include <stddef.h>
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* wchar_t should always exist (ANSI), but WCHAR.H is sometimes missing
|
||||
* USE_UNICODE should be removed or replaced by HAVE_WCHAR_H in future
|
||||
*/
|
||||
#ifdef HAVE_WCHAR_H
|
||||
#define USE_UNICODE 1
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
/* ------------------------ feature extraction ----------------- */
|
||||
#define AT 7 /* mark */
|
||||
#define M1 1 /* mark */
|
||||
enum direction {
|
||||
UP=1, DO, RI, LE
|
||||
};
|
||||
typedef enum direction DIRECTION;
|
||||
#define ST 7 /* stop */
|
||||
/* ------------------------------------------------------------- */
|
||||
/* detect maximas in of line overlapps (return in %) and line koord */
|
||||
#define HOR 1 /* horizontal */
|
||||
#define VER 2 /* vertikal */
|
||||
#define RIS 3 /* rising=steigend */
|
||||
#define FAL 4 /* falling=fallend */
|
||||
|
||||
#define MAXlines 1024
|
||||
|
||||
/* ToDo: if we have a tree instead of a list, a line could be a node object */
|
||||
struct tlines {
|
||||
int num;
|
||||
int dx, dy; /* direction of text lines (straight/skew) */
|
||||
int m1[MAXlines], /* start of line = upper bound of 'A' */
|
||||
m2[MAXlines], /* upper bound of 'e' */
|
||||
m3[MAXlines], /* lower bound of 'e' = baseline */
|
||||
m4[MAXlines]; /* stop of line = lower bound of 'q' */
|
||||
/* ToDo: add sureness per m1,m2 etc? */
|
||||
int x0[MAXlines],
|
||||
x1[MAXlines]; /* left and right border */
|
||||
int wt[MAXlines]; /* weight, how sure thats correct in percent, v0.41 */
|
||||
int pitch[MAXlines]; /* word pitch (later per box?), v0.41 */
|
||||
int mono[MAXlines]; /* spacing type, 0=proportional, 1=monospaced */
|
||||
};
|
||||
|
||||
#define NumAlt 10 /* maximal number of alternative chars (table length) */
|
||||
#define MaxNumFrames 8 /* maximum number of frames per char/box */
|
||||
#define MaxFrameVectors 128 /* maximum vectors per frame (*8=1KB/box) */
|
||||
/* ToDo: use only malloc_box(),free_box(),copybox() for creation, destroy etc.
|
||||
* adding reference_counter to avoid pointer pointing to freed box
|
||||
*/
|
||||
struct box { /* this structure should contain all pixel infos of a letter */
|
||||
int x0,x1,y0,y1,x,y,dots; /* xmin,xmax,ymin,ymax,reference-pixel,i-dots */
|
||||
int num_boxes, /* 1 "abc", 2 "!i?", 3 "ä" (composed objects) 0.41 */
|
||||
num_subboxes; /* 1 for "abdegopqADOPQR", 2 for "B" (holes) 0.41 */
|
||||
wchar_t c; /* detected char (same as tac[0], obsolete?) */
|
||||
wchar_t modifier; /* default=0, see compose() in unicode.c */
|
||||
int num; /* same number = same char */
|
||||
int line; /* line number (points to struct tlines lines) */
|
||||
int m1,m2,m3,m4; /* m2 = upper boundary, m3 = baseline */
|
||||
/* planed: sizeof hole_1, hole_2, certainty (run1=100%,run2=90%,etc.) */
|
||||
pix *p; /* pointer to pixmap (v0.2.5) */
|
||||
/* tac, wac is used together with setac() to manage very similar chars */
|
||||
int num_ac; /* length of table (alternative chars), default=0 */
|
||||
wchar_t tac[NumAlt]; /* alternative chars, only used by setac(),getac() */
|
||||
int wac[NumAlt]; /* weight of alternative chars */
|
||||
char *tas[NumAlt]; /* alternative UTF8-strings or XML codes if tac[]=0 */
|
||||
/* replacing old obj */
|
||||
/* ToDo: (*obj)[NumAlt] + olen[NumAlt] ??? */
|
||||
/* ToDo: bitmap for possible Picture|Object|Char ??? */
|
||||
/* char *obj; */ /* pointer to text-object ... -> replaced by tas[] */
|
||||
/* ... (melted chars, barcode, picture coords, ...) */
|
||||
/* must be freed before box is freed! */
|
||||
/* do _not_ copy only the pointer to object */
|
||||
/* --------------------------------------------------------
|
||||
* extension since v0.41 js05, Store frame vectors,
|
||||
* which is a table of vectors sourrounding the char and its
|
||||
* inner white holes. The advantage is the independence from
|
||||
* resolution, handling of holes, overlap and rotation.
|
||||
* --------------------------------------------------------- */
|
||||
int num_frames; /* number of frames: 1 for cfhklmnrstuvwxyz */
|
||||
/* 2 for abdegijopq */
|
||||
int frame_vol[MaxNumFrames]; /* volume inside frame +/- (black/white) */
|
||||
int frame_per[MaxNumFrames]; /* periphery, summed length of vectors */
|
||||
int num_frame_vectors[MaxNumFrames]; /* index to next frame */
|
||||
/* biggest frame should be stored first (outer frame) */
|
||||
/* biggest has the maximum pair distance */
|
||||
/* num vector loops */
|
||||
int frame_vector[MaxFrameVectors][2]; /* may be 16*int=fixpoint_number */
|
||||
|
||||
};
|
||||
typedef struct box Box;
|
||||
|
||||
/* true if the coordination pair (a,b) is outside the image p */
|
||||
#define outbounds(p, a, b) (a < 0 || b < 0 || a >= (p)->x || b >= (p)->y)
|
||||
|
||||
/* ToDo: this structure seems to be obsolete, remove it */
|
||||
typedef struct path {
|
||||
int start; /* color at the beginning of the path, (0=white, 1=black) */
|
||||
int *x; /* x coordinates of transitions */
|
||||
int *y; /* y coordinates of transitions */
|
||||
int num; /* current number of entries in x or y */
|
||||
int max; /* maximum number of entries in x or y */
|
||||
/* (if more values need to be stored, the arrays are enlarged) */
|
||||
} path_t;
|
||||
|
||||
/* job_t contains all information needed for an OCR task */
|
||||
typedef struct job_s {
|
||||
struct { /* source data */
|
||||
char *fname; /* input filename; default value: "-" */
|
||||
pix p; /* source pixel data, pixelmap 8bit gray */
|
||||
} src;
|
||||
struct { /* temporary stuff, e.g. buffers */
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
struct timeval init_time; /* starting time of this job */
|
||||
#endif
|
||||
pix ppo; /* pixmap for visual debugging output, obsolete */
|
||||
|
||||
/* sometimes recognition function is called again and again, if result was 0
|
||||
n_run tells the pixel function to return alternative results */
|
||||
int n_run; /* num of run, if run_2 critical pattern get other results */
|
||||
/* used for 2nd try, pixel uses slower filter function etc. */
|
||||
List dblist; /* list of boxes loaded from the character database */
|
||||
} tmp;
|
||||
struct { /* results */
|
||||
List boxlist; /* store every object in a box, which contains */
|
||||
/* the characteristics of the object (see struct box) */
|
||||
List linelist; /* recognized text lines after recognition */
|
||||
|
||||
struct tlines lines; /* used to access to line-data (statistics) */
|
||||
/* here the positions (frames) of lines are */
|
||||
/* stored for further use */
|
||||
int avX,avY; /* average X,Y (avX=sumX/numC) */
|
||||
int sumX,sumY,numC; /* sum of all X,Y; num chars */
|
||||
} res;
|
||||
struct { /* configuration */
|
||||
int cs; /* critical grey value (pixel<cs => black pixel) */
|
||||
/* range: 0..255, 0 means autodetection */
|
||||
int spc; /* spacewidth/dots (0 = autodetect); default value: 0 */
|
||||
int mode; /* operation modes; default value: 0 */
|
||||
/* operation mode (see --help) */
|
||||
int dust_size; /* dust size; default value: 10 */
|
||||
int only_numbers; /* numbers only; default value: 0 */
|
||||
int verbose; /* verbose mode; default value: 0 */
|
||||
/* verbose option (see --help) */
|
||||
FORMAT out_format; /* output format; default value: ISO8859_1*/
|
||||
char *lc; /* debuglist of chars (_ = not recognized chars) */
|
||||
/* default value: "_" */
|
||||
char *db_path; /* pathname for database; default value: NULL */
|
||||
char *cfilter; /* char filter; default value: NULL, ex: "A-Za-z" */
|
||||
/* limit of certainty where chars are accepted as identified */
|
||||
int certainty; /* in units of 100 (percent); 0..100; default 95 */
|
||||
char *unrec_marker; /* output this string for every unrecognized char */
|
||||
} cfg;
|
||||
} job_t;
|
||||
|
||||
/* initialze job structure */
|
||||
void job_init(job_t *job);
|
||||
|
||||
/* free job structure */
|
||||
void job_free(job_t *job);
|
||||
|
||||
/*FIXME jb: remove JOB; */
|
||||
extern job_t *JOB;
|
||||
|
||||
/* calculate the overlapp of the line (0-1) with black points
|
||||
* by rekursiv bisection
|
||||
* (evl. Fehlertoleranz mit pixel in Umgebung dx,dy suchen) (umschaltbar) ???
|
||||
* MidPoint Line Algorithm (Bresenham) Foley: ComputerGraphics better?
|
||||
* will be replaced by vector functions
|
||||
*/
|
||||
|
||||
/* gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
|
||||
* incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y)) */
|
||||
int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
|
||||
int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
|
||||
|
||||
/* look for white 0x02 or black 0x01 dots (0x03 = white+black) */
|
||||
char get_bw(int x0, int x1, int y0, int y1,
|
||||
pix *p, int cs,int mask);
|
||||
|
||||
/* look for black crossing a line x0,y0,x1,y1
|
||||
* follow line and count crossings ([white]-black-transitions)
|
||||
*/
|
||||
int num_cross(int x0, int x1, int y0, int y1,
|
||||
pix *p, int cs);
|
||||
|
||||
/* memory allocation with error checking */
|
||||
void *xrealloc(void *ptr, size_t size);
|
||||
|
||||
/* follow a line x0,y0,x1,y1 recording locations of transitions,
|
||||
* return count of transitions
|
||||
*/
|
||||
int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path);
|
||||
|
||||
/* -------------------------------------------------------------
|
||||
* mark edge-points
|
||||
* - first move forward until b/w-edge
|
||||
* - more than 2 pixel?
|
||||
* - loop around
|
||||
* - if forward pixel : go up, rotate right
|
||||
* - if forward no pixel : rotate left
|
||||
* - stop if found first 2 pixel in same order
|
||||
* mit an rechter-Wand-entlang-gehen strategie
|
||||
* --------------------------------------------------------------
|
||||
* turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
|
||||
* out: last-position
|
||||
* Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
|
||||
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
*
|
||||
* is this the right place for declaration?
|
||||
*/
|
||||
void turmite(pix *p, int *x, int *y,
|
||||
int x0, int x1, int y0, int y1, int cs, int rw, int rb);
|
||||
|
||||
/* test if points are connected via t-pixel (rekursiv!) */
|
||||
int joined(pix *p, int x0, int y0, int x1, int y1, int cs);
|
||||
|
||||
/* move from x,y to direction r until pixel or l steps
|
||||
* return number of steps
|
||||
*/
|
||||
int loop(pix *p, int x, int y, int l, int cs, int col, DIRECTION r);
|
||||
|
||||
#define MAX_HOLES 3
|
||||
typedef struct list_holes {
|
||||
int num; /* numbers of holes, initialize with 0 */
|
||||
struct hole_s {
|
||||
int size,x,y,x0,y0,x1,y1; /* size, start point, outer rectangle */
|
||||
} hole[MAX_HOLES];
|
||||
} holes_t;
|
||||
|
||||
/* look for white holes surrounded by black points
|
||||
* at moment white point with black in all four directions
|
||||
*/
|
||||
int num_hole(int x0, int x1, int y0, int y1, pix *p, int cs, holes_t *holes);
|
||||
|
||||
/* count for black nonconnected objects --- used for i,auml,ouml,etc. */
|
||||
int num_obj(int x0, int x1, int y0, int y1, pix *p, int cs);
|
||||
|
||||
int distance( pix *p1, struct box *box1, /* box-frame */
|
||||
pix *p2, struct box *box2, int cs);
|
||||
|
||||
/* call the OCR engine ;) */
|
||||
/* char whatletter(struct box *box1,int cs); */
|
||||
|
||||
/* declared in pixel.c */
|
||||
/* getpixel() was pixel() but it may collide with netpnm pixel declaration */
|
||||
int getpixel(pix *p, int x, int y);
|
||||
int marked(pix *p, int x, int y);
|
||||
void put(pix * p, int x, int y, int ia, int io);
|
||||
|
||||
char* PNMToText(char* buf, long size, char *outputformat, long graylevel, long dustsize, long spacewidthdots, long certainty);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
|
||||
#endif /* __GOCR_H__ */
|
||||
90
ActiveX/ASCOfficeUtils/GOCR/headers/list.h
Normal file
90
ActiveX/ASCOfficeUtils/GOCR/headers/list.h
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
#ifndef GOCR_LIST_H
|
||||
#define GOCR_LIST_H
|
||||
|
||||
#ifdef DEBUG
|
||||
#define g_debug(a) a
|
||||
#else
|
||||
#define g_debug(a)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Structures
|
||||
*/
|
||||
|
||||
struct element {
|
||||
struct element *next, *previous;
|
||||
void *data;
|
||||
};
|
||||
typedef struct element Element;
|
||||
|
||||
struct list {
|
||||
Element start; /* simplifies for(each_element) { ... */
|
||||
Element stop; /* ... list_del() ... } v0.41 */
|
||||
Element **current; /* for(each_element) */
|
||||
int n; /* number of elements */
|
||||
int level; /* level of nested fors */
|
||||
};
|
||||
typedef struct list List;
|
||||
|
||||
/*
|
||||
* Functions
|
||||
*/
|
||||
|
||||
void list_init ( List *l );
|
||||
int list_app ( List *l, void *data );
|
||||
int list_ins ( List *l, void *data_after, void *data);
|
||||
Element*list_element_from_data ( List *l, void *data );
|
||||
int list_del ( List *l, void *data );
|
||||
void list_free ( List *l );
|
||||
int list_and_data_free ( List *l, void (*free_data)(void *data));
|
||||
int list_higher_level ( List *l );
|
||||
void list_lower_level ( List *l );
|
||||
void * list_next ( List *l, void *data );
|
||||
void * list_prev ( List *l, void *data );
|
||||
void list_sort ( List *l, int (*compare)(const void *, const void *) );
|
||||
|
||||
#define list_empty(l) ((l)->start.next == &(l)->stop ? 1 : 0)
|
||||
#define list_get_header(l) ((l)->start.next->data)
|
||||
#define list_get_tail(l) ((l)->stop.previous->data)
|
||||
#define list_get_current(l) ((l)->current[(l)->level]->data)
|
||||
#define list_get_cur_prev(l) ((l)->current[(l)->level]->previous == NULL ? \
|
||||
NULL : (l)->current[(l)->level]->previous->data )
|
||||
#define list_get_cur_next(l) ((l)->current[(l)->level]->next == NULL ? \
|
||||
NULL : (l)->current[(l)->level]->next->data )
|
||||
#define list_total(l) ((l)->n)
|
||||
|
||||
#define for_each_data(l) \
|
||||
if (list_higher_level(l) == 0) { \
|
||||
for ( ; (l)->current[(l)->level] \
|
||||
&& (l)->current[(l)->level]!=&(l)->stop; (l)->current[(l)->level] = \
|
||||
(l)->current[(l)->level]->next ) {
|
||||
|
||||
|
||||
#define end_for_each(l) \
|
||||
} \
|
||||
list_lower_level(l); \
|
||||
}
|
||||
|
||||
#endif
|
||||
63
ActiveX/ASCOfficeUtils/GOCR/headers/ocr0.h
Normal file
63
ActiveX/ASCOfficeUtils/GOCR/headers/ocr0.h
Normal file
@@ -0,0 +1,63 @@
|
||||
#ifndef _OCR0_H
|
||||
#define _OCR0_H
|
||||
#include "pgm2asc.h"
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
- functions with thousand of lines make the compilation very slow
|
||||
therefore the ocr0-function is splitted in subfunctions
|
||||
- shared data used often in ocr0-subroutines are stored
|
||||
in ocr0_shared structure.
|
||||
* ------------------------------------------------------------ */
|
||||
|
||||
typedef struct ocr0_shared { /* shared variables and properties */
|
||||
|
||||
struct box *box1; /* box in whole image */
|
||||
pix *bp; /* extracted temporarly box, cleaned */
|
||||
int cs; /* global threshold value (gray level) */
|
||||
|
||||
/* ToDo: or MACROS: X0 = box1->x0 */
|
||||
int x0, x1, y0, y1; /* box coordinates related to box1 */
|
||||
int dx, dy; /* size of box */
|
||||
int hchar, gchar; /* relation to m1..m4 */
|
||||
int aa[4][4]; /* corner points, see xX (x,y,dist^2,vector_idx) v0.41 */
|
||||
holes_t holes; /* list of holes (max MAX_HOLES) */
|
||||
|
||||
} ocr0_shared_t;
|
||||
|
||||
/* tests for umlaut */
|
||||
int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier);
|
||||
/* detect chars */
|
||||
wchar_t ocr0(struct box *box1, pix *b, int cs);
|
||||
/* detect numbers */
|
||||
wchar_t ocr0n(ocr0_shared_t *sdata);
|
||||
|
||||
static int sq(int x) { return x*x; } /* square */
|
||||
|
||||
/*
|
||||
* go from vector j1 to vector j2 and measure maximum deviation of
|
||||
* the steps from the line connecting j1 and j2
|
||||
* return the squared maximum distance
|
||||
* in units of the box size times 1024
|
||||
*/
|
||||
int line_deviation( struct box *box1, int j1, int j2 );
|
||||
|
||||
/*
|
||||
* search vectors between j1 and j2 for nearest point a to point r
|
||||
* example:
|
||||
*
|
||||
* r-> $$...$$ $ - mark vectors
|
||||
* @@$..@@ @ - black pixels
|
||||
* @@$..@@ . - white pixels
|
||||
* @@@@.$@
|
||||
* a-> @@$@$@@
|
||||
* @$.@@@@
|
||||
* @@..$@@
|
||||
* @@..$@@
|
||||
* j1 --> $$...$$ <-- j2
|
||||
*
|
||||
* ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
|
||||
* j1 and j2 must be in the same frame
|
||||
* return aa?
|
||||
*/
|
||||
int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry);
|
||||
#endif
|
||||
3
ActiveX/ASCOfficeUtils/GOCR/headers/ocr1.h
Normal file
3
ActiveX/ASCOfficeUtils/GOCR/headers/ocr1.h
Normal file
@@ -0,0 +1,3 @@
|
||||
/* #include "pgm2asc.h" */
|
||||
#include "pnm.h"
|
||||
/* wchar_t ocr1(struct box *box1, pix *b, int cs); */
|
||||
23
ActiveX/ASCOfficeUtils/GOCR/headers/otsu.h
Normal file
23
ActiveX/ASCOfficeUtils/GOCR/headers/otsu.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/*======================================================================*/
|
||||
/* OTSU global thresholding routine */
|
||||
/* takes a 2D unsigned char array pointer, number of rows, and */
|
||||
/* number of cols in the array. returns the value of the threshold */
|
||||
/*======================================================================*/
|
||||
int
|
||||
otsu (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int vvv);
|
||||
|
||||
|
||||
/*======================================================================*/
|
||||
/* thresholding the image (set threshold to 128+32=160=0xA0) */
|
||||
/* now we have a fixed thresholdValue good to recognize on gray image */
|
||||
/* - so lower bits can used for other things (bad design?) */
|
||||
/*======================================================================*/
|
||||
int
|
||||
thresholding (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int thresholdValue);
|
||||
37
ActiveX/ASCOfficeUtils/GOCR/headers/output.h
Normal file
37
ActiveX/ASCOfficeUtils/GOCR/headers/output.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address */
|
||||
|
||||
#ifndef OUTPUT_H
|
||||
#define OUTPUT_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "pnm.h"
|
||||
#include "gocr.h"
|
||||
#include "list.h"
|
||||
|
||||
void out_b(struct box *px, pix *b, int x0, int y0, int dx, int dy, int cs );
|
||||
void out_x(struct box *px);
|
||||
void out_x2(struct box *box1,struct box *box2);
|
||||
int output_list(job_t *job);
|
||||
int debug_img(char *fname, struct job_s *job, int opt);
|
||||
|
||||
|
||||
#endif
|
||||
9
ActiveX/ASCOfficeUtils/GOCR/headers/pcx.h
Normal file
9
ActiveX/ASCOfficeUtils/GOCR/headers/pcx.h
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
#include "pnm.h"
|
||||
|
||||
void readpcx(char *name,pix *p,int vvv);
|
||||
|
||||
/* write 8bit palette no RLE, ToDo: obsolete? */
|
||||
void writebmp(char *name,pix p,int vvv);
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
110
ActiveX/ASCOfficeUtils/GOCR/headers/pgm2asc.h
Normal file
110
ActiveX/ASCOfficeUtils/GOCR/headers/pgm2asc.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2006 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
*/
|
||||
|
||||
#ifndef PGM2ASC_H
|
||||
#define PGM2ASC_H 1
|
||||
|
||||
#include "pnm.h"
|
||||
#include "output.h"
|
||||
#include "list.h"
|
||||
#include "unicode.h"
|
||||
|
||||
#define pixel_at(pic, xx, yy) (pic).p[(xx)+((yy)*((pic).x))]
|
||||
#define pixel_atp(pic, xx, yy) (pic)->p[(xx)+((yy)*((pic)->x))]
|
||||
|
||||
#ifndef HAVE_WCHAR_H
|
||||
wchar_t *wcschr (const wchar_t *wcs, wchar_t wc);
|
||||
wchar_t *wcscpy (wchar_t *dest, const wchar_t *src);
|
||||
size_t wcslen (const wchar_t *s);
|
||||
#endif
|
||||
#ifndef HAVE_WCSDUP
|
||||
wchar_t * wcsdup (const wchar_t *WS); /* its a gnu extension */
|
||||
#endif
|
||||
|
||||
/* declared in pgm2asc.c */
|
||||
/* set alternate chars and its weight, called from the engine
|
||||
if a char is recognized to (weight) percent */
|
||||
int setas(struct box *b, char *as, int weight); /* string + xml */
|
||||
int setac(struct box *b, wchar_t ac, int weight); /* wchar */
|
||||
|
||||
/* for qsort() call */
|
||||
int intcompare (const void *vr, const void *vs);
|
||||
|
||||
/* declared in box.c */
|
||||
int box_gt(struct box *box1, struct box *box2);
|
||||
int reset_box_ac(struct box *box); /* reset and free char table */
|
||||
struct box *malloc_box( struct box *inibox ); /* alloc memory for a box */
|
||||
int free_box( struct box *box ); /* free memory of a box */
|
||||
int copybox( pix *p, int x0, int y0, int dx, int dy, pix *b, int len);
|
||||
int reduce_vectors ( struct box *box1, int mode );
|
||||
int merge_boxes( struct box *box1, struct box *box2 );
|
||||
int cut_box( struct box *box1);
|
||||
|
||||
|
||||
/* declared in database.c */
|
||||
int load_db(void);
|
||||
wchar_t ocr_db(struct box *box1);
|
||||
|
||||
/* declared in detect.c */
|
||||
int detect_lines1(pix * p, int x0, int y0, int dx, int dy);
|
||||
int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r);
|
||||
int detect_rotation_angle(job_t *job);
|
||||
int detect_text_lines(pix * pp, int mo);
|
||||
int adjust_text_lines(pix * pp, int mo);
|
||||
int detect_pictures(job_t *job);
|
||||
|
||||
/* declared in lines.c */
|
||||
void store_boxtree_lines( int mo );
|
||||
/* free memory for internal stored textlines.
|
||||
* Needs to be called _after_ having retrieved the text.
|
||||
* After freeing, no call to getTextLine is possible any
|
||||
* more
|
||||
*/
|
||||
void free_textlines( void );
|
||||
|
||||
/* get result of ocr for a given line number.
|
||||
* If the line is out of range, the function returns 0,
|
||||
* otherwise a pointer to a complete line.
|
||||
*/
|
||||
const char *getTextLine( int );
|
||||
|
||||
/* append a string (s1) to the string buffer (buffer) of length (len)
|
||||
* if buffer is to small or len==0 realloc buffer, len+=512
|
||||
*/
|
||||
char *append_to_line(char *buffer, const char *s1, int *len);
|
||||
|
||||
/* declared in remove.c */
|
||||
int remove_dust( job_t *job );
|
||||
int remove_pictures( job_t *job);
|
||||
int remove_melted_serifs( pix *pp );
|
||||
int remove_rest_of_dust();
|
||||
int smooth_borders( job_t *job );
|
||||
|
||||
/* declared in pixel.c */
|
||||
int marked(pix * p, int x, int y);
|
||||
int pixel(pix *p, int x, int y);
|
||||
void put(pix * p, int x, int y, int ia, int io);
|
||||
|
||||
/* start ocr on a image in job.src.p */
|
||||
int pgm2asc(job_t *job);
|
||||
|
||||
#endif
|
||||
37
ActiveX/ASCOfficeUtils/GOCR/headers/pnm.h
Normal file
37
ActiveX/ASCOfficeUtils/GOCR/headers/pnm.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/* Handle PNM-files Dez98 JS
|
||||
* 0,0 = left up
|
||||
* PAM-formats
|
||||
* PAM any P7
|
||||
* PNM-formats
|
||||
* PGM gray ASCII=P2 RAW=P5 dx dy col gray
|
||||
* PPM RGB ASCII=P3 RAW=P6 dx dy col RGB
|
||||
* PBM B/W ASCII=P1 RAW=P4 dx dy bitmap
|
||||
*/
|
||||
|
||||
#ifndef GOCR_PNM_H
|
||||
#define GOCR_PNM_H 1
|
||||
|
||||
#include "config.h"
|
||||
|
||||
struct pixmap {
|
||||
unsigned char *p; /* pointer of image buffer (pixmap) */
|
||||
int x; /* xsize */
|
||||
int y; /* ysize */
|
||||
int bpp; /* bytes per pixel: 1=gray 3=rgb */
|
||||
};
|
||||
typedef struct pixmap pix;
|
||||
|
||||
/* return 1 on multiple images (holding file open), 0 else */
|
||||
int readpgm(char *name, pix *p, int vvv);
|
||||
/* return 1 on multiple images (holding file open), 0 else */
|
||||
int readpgmFromBuffer(char* buffer, long size, pix *p);
|
||||
|
||||
/* write pgm-map to pnm-file */
|
||||
int writepgm(char *nam, pix *p);
|
||||
int writepbm(char *nam, pix *p);
|
||||
int writeppm(char *nam, pix *p); /* use lowest 3 bits for farbcoding */
|
||||
|
||||
/* ----- count colors ------ create histogram ------- */
|
||||
void makehisto(pix p, unsigned col[256], int vvv);
|
||||
|
||||
#endif
|
||||
42
ActiveX/ASCOfficeUtils/GOCR/headers/progress.h
Normal file
42
ActiveX/ASCOfficeUtils/GOCR/headers/progress.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
---------------------- progress output ----------------------
|
||||
output progress for GUIs to a pipe
|
||||
format: "counter_name" counter maxcounter time estimated_time \r|\n
|
||||
*/
|
||||
#ifndef GOCR_PROGRESS_H
|
||||
#define GOCR_PROGRESS_H "Oct06"
|
||||
#include <time.h>
|
||||
|
||||
/* initialization of progress output, fname="<fileID>","<filename>","-" */
|
||||
int ini_progress(char *fname);
|
||||
|
||||
/* ToDo: add by open_* and close_* */
|
||||
/* place to store values for progress calculation, called often, but
|
||||
* dont call systime so often
|
||||
*/
|
||||
typedef struct progress_counter {
|
||||
const char *name; /* name of counter */
|
||||
int lastprintcount; /* last counter printed for extrapolation */
|
||||
int maxcount; /* max counter */
|
||||
int numskip; /* num of counts to skip before timecall 0..maxcount */
|
||||
time_t starttime; /* start time of this counter */
|
||||
time_t lastprinttime; /* last time printed in seconds */
|
||||
|
||||
} progress_counter_t;
|
||||
|
||||
/* progress output p1=main_progress_0..100% p2=sub_progress_0..100% */
|
||||
/* ToDo: improved_progress: counter, maxcount(ini), counter_name(ini),
|
||||
* printinterval=10 # time before printing out progressmeter
|
||||
* *numskip=1 # if (counter-lastprintcounter<numskip) return; gettime() ...
|
||||
* *startutime, *lastprintutime, *lastprintcounter # numskip*=2 or /=2
|
||||
* only 1output/10s, + estimated endtime (test on pixelfields)
|
||||
* to stderr by default? remove subprogress, ini_progress? rm_progress?
|
||||
* test on tcl
|
||||
*/
|
||||
progress_counter_t *open_progress(int maxcount, const char *name);
|
||||
/* free counter */
|
||||
int close_progress(progress_counter_t *counter);
|
||||
/* output progress for pc */
|
||||
int progress(int counter, progress_counter_t *pc);
|
||||
/* --------------------- end of progress output ---------------------- */
|
||||
#endif
|
||||
6
ActiveX/ASCOfficeUtils/GOCR/headers/tga.h
Normal file
6
ActiveX/ASCOfficeUtils/GOCR/headers/tga.h
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
#include "pnm.h"
|
||||
|
||||
void readtga(char *name,pix *p,int mode); // mode: 0=gray 1=RGB
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
1264
ActiveX/ASCOfficeUtils/GOCR/headers/unicode.h
Normal file
1264
ActiveX/ASCOfficeUtils/GOCR/headers/unicode.h
Normal file
File diff suppressed because it is too large
Load Diff
36
ActiveX/ASCOfficeUtils/GOCR/include/config.h
Normal file
36
ActiveX/ASCOfficeUtils/GOCR/include/config.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/* include/config.h.in. Generated automatically from configure.in by autoheader. */
|
||||
|
||||
/* Define to empty if the keyword does not work. */
|
||||
#undef const
|
||||
|
||||
/* Define if the setvbuf function takes the buffering type as its second
|
||||
argument and the buffer pointer as the third, as on System V
|
||||
before release 3. */
|
||||
#undef SETVBUF_REVERSED
|
||||
|
||||
/* Define if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define if you have the gettimeofday function. */
|
||||
#undef HAVE_GETTIMEOFDAY
|
||||
|
||||
/* Define if you have the popen function. */
|
||||
#undef HAVE_POPEN
|
||||
|
||||
/* Define if you have the wcschr function. */
|
||||
#undef HAVE_WCSCHR
|
||||
|
||||
/* Define if you have the wcsdup function. */
|
||||
#define HAVE_WCSDUP
|
||||
|
||||
/* Define if you have the <pam.h> header file. */
|
||||
#undef HAVE_PAM_H
|
||||
|
||||
/* Define if you have the <pnm.h> header file. */
|
||||
#undef HAVE_PNM_H
|
||||
|
||||
/* Define if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define if you have the <wchar.h> header file. */
|
||||
#define HAVE_WCHAR_H
|
||||
2
ActiveX/ASCOfficeUtils/GOCR/include/version.h
Normal file
2
ActiveX/ASCOfficeUtils/GOCR/include/version.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#define version_string "0.48"
|
||||
#define release_string "20090802"
|
||||
846
ActiveX/ASCOfficeUtils/GOCR/src/barcode.c
Normal file
846
ActiveX/ASCOfficeUtils/GOCR/src/barcode.c
Normal file
@@ -0,0 +1,846 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for email address
|
||||
|
||||
ToDo:
|
||||
- transform special xml bar code symbols (<>&) to xml symbols (<>&)
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
/* #include <math.h> -- we do not want unnecessary dependencies */
|
||||
#include "pgm2asc.h"
|
||||
#include "gocr.h"
|
||||
#include "pnm.h"
|
||||
|
||||
#ifndef DO_DEBUG /* can be defined outside (configure --with-debug) */
|
||||
#define DO_DEBUG 0 /* 0 is the default */
|
||||
#endif
|
||||
|
||||
#undef g_debug
|
||||
#if DO_DEBUG
|
||||
# define g_debug(a) if (JOB->cfg.verbose&1) { a }
|
||||
#else
|
||||
# define g_debug(a)
|
||||
#endif
|
||||
|
||||
/*
|
||||
detect barcode and add a XML-string to the box (obj-pointer, ToDo)
|
||||
ToDo: barcode-output stderr->stdout
|
||||
*/
|
||||
|
||||
double sqr(double x) { return(x*x); }
|
||||
|
||||
/* ----------------------------- code128 ---------------------------- *
|
||||
* "BSBSBS", B=Bar, S=Space, better using 2*6=12bit-integer? */
|
||||
#define Num128 107
|
||||
const char *code128[Num128+1]={ /* can be generated by an algorithm? */
|
||||
/* 00 */"212222","222122","222221","121223","121322","131222","122213","122312",
|
||||
/* 08 */"132212","221213","221312","231212","112232","122132","122231","113222",
|
||||
/* 16 */"123122","123221","223211","221132","221231","213212","223112","312131",
|
||||
/* 24 */"311222","321122","321221","312212","322112","322211","212123","212321",
|
||||
/* 32 */"232121","111323","131123","131321","112313","132113","132311","211313",
|
||||
/* 40 */"231113","231311","112133","112331","132131","113123","113321","133121",
|
||||
/* 48 */"313121","211331","231131","213113","213311","213131","311123","311321",
|
||||
/* 56 */"331121","312113","312311","332111","314111","221411","431111","111224",
|
||||
/* 64 */"111422","121124","121421","141122","141221","112214","112412","122114",
|
||||
/* 72 */"122411","142112","142211","241211","221114","413111","241112","134111",
|
||||
/* 80 */"111242","121142","121241","114212","124112","124211","411212","421112",
|
||||
/* 88 */"421211","212141","214121","412121","111143","111341","131141","114113",
|
||||
/* 96 */"114311","411113","411311","113141","114131","311141","411131","211412",
|
||||
/*104 */"211214","211232","2331112","???"};
|
||||
|
||||
/*
|
||||
code128: see code128.tex by Petr Olsak (108 codes)
|
||||
quiet_zone: size=10 (before and after code128)
|
||||
num_bars=3*(start+chars[N]+crc+stop)+1
|
||||
B=bar S=space char=BSBSBS (size=11), stop=BSBSBSB (size=11+2)
|
||||
Width: Bar,Space=1,[2,3,4] char=11 code=11*(N+3)+2 sumB=even,sumS=odd
|
||||
startA="211412"=103 startB="211214"=104 startC="211232"=105(2dec_digits)
|
||||
mode/code 0..95 96 97 98 99 100 101 102 103 104 105 106
|
||||
1=A x20-x5f,0-x20 F3 F2 uB mC mB F4 F1 ^A ^B ^C $
|
||||
2=B x20-x7f F3 F2 uA mC F4 mA F1 ^A ^B ^C $
|
||||
3=C "00"-"95" "96" "97" "98" "99" mB mA F1 ^A ^B ^C $
|
||||
uA,uB: switch mode for next char, mA,mB: switch mode permanently
|
||||
crc=(start+1*char1+2*char2+3*char3+...+N*charN) mod 103
|
||||
$=stop="2331112"=106 (4bars, with=13) => start/end==211
|
||||
return num of chars or string
|
||||
|
||||
size B+W even-variants odd-variants num_codes
|
||||
11 = 8+3 = (1+3+4,2+2+4,2+3+3)+(1+1+1) => (6+3+3)*(1) = 12
|
||||
= 6+5 = (1+1+4,1+2+3,2+2+2)+(1+1+3,1+2+2) => (3+6+1)*(3+3) = 60
|
||||
= 4+7 = (1+1+2)+(1+2+4,1+3+3,2+2+3) => (3)*(6+3+3) = 36
|
||||
sum = 108
|
||||
*/
|
||||
/* example: barcode -E -e 128c -b 1434600120000884 >a.eps */
|
||||
/* example: barcode -E -e 128b -b 14Test41 >a.eps */
|
||||
/* example: barcode -E -e 128raw -b 105 17 14 30 >a.eps */
|
||||
|
||||
char *decode_code128(int *wb, int num_bars){
|
||||
int i, w, i1, i2, i3=0, i4, i5=0, crc, mode=1;
|
||||
double dww, dw, err, min_err; char cc, *buf;
|
||||
char *result=NULL; /* malloc and store the result */
|
||||
|
||||
for(w=i=0;i<2*num_bars-1;i++) w+=wb[i]; /* summ all bars and spaces */
|
||||
|
||||
/* test code128 characteristics, ToDo: look for correct start/stop 211 seq. */
|
||||
if ((num_bars-1)%3!=0 || num_bars<10 || w<11*(num_bars-1)/3+2)
|
||||
return 0;
|
||||
g_debug(fprintf(stderr," code128 b%d s%d b%d\n",wb[0],wb[1],wb[2]);)
|
||||
if (3*wb[0]<4* wb[1]
|
||||
|| 3*wb[0]<4* wb[2]
|
||||
|| 4*wb[0]<3*(wb[1]+wb[2])
|
||||
|| 3*wb[0]>4*(wb[1]+wb[2])) return 0; /* 211 */
|
||||
dw=3.0*w/((num_bars-1)*11+6);
|
||||
/* get enough memory for all digits in longest mode C */
|
||||
buf =(char *) malloc( (num_bars-7)/3*2+1); if (!buf) return result;
|
||||
result=(char *) malloc(256+(num_bars-7)/3*2+1);
|
||||
|
||||
dww=crc=0;
|
||||
for(i4=i1=0;i1<(num_bars-1)/3;i1++) {
|
||||
for(min_err=1e8,i3=Num128,i5=0;i5<Num128;i5++){ /* get best fit */
|
||||
for(err=i2=0;i2<6;i2++) err+=sqr(code128[i5][i2]-'0'-wb[i1*6+i2]/dw);
|
||||
if (err<min_err) { min_err=err; i3=i5; }
|
||||
} dww+=min_err;
|
||||
g_debug(fprintf(stderr,"\n %7s %3d err=%.3f ",code128[i3],i3,min_err);)
|
||||
if(i3<Num128){ /* valid symbol */
|
||||
if(i1==0){ if (i3>102 && i3<106) mode=i3-103+1; crc=i3; } /* start */
|
||||
if(i1>0 && i1<(num_bars-1)/3-2){
|
||||
crc+=i3*(i1); cc=0; /* first * 1 + second * 2 + third * 3 ... */
|
||||
i5=((mode>3)?mode>>2:mode&3); mode&=3; /* mode can be modified now */
|
||||
switch (i5) { /* mode=1..3=modeA..modeC */
|
||||
case 1: if (i3>=64 && i3<96) cc=i3-64; /* modeA: x20-x5f,0-x20 */
|
||||
else cc=i3+32;
|
||||
if (i3==101) mode=1; /* switch to mode A */
|
||||
if (i3== 99) mode=3; /* switch to mode C */
|
||||
if (i3== 98) mode|=2<<2; /* shift to mode B */
|
||||
break;
|
||||
case 2: cc=i3+32; /* modeB: x20-x7f */
|
||||
if (i3==100) mode=2; /* switch to mode B */
|
||||
if (i3== 99) mode=3; /* switch to mode C */
|
||||
if (i3== 98) mode|=1<<2; /* shift to mode A */
|
||||
break;
|
||||
case 3:
|
||||
if (i3==101) mode=1; /* switch to mode A */
|
||||
if (i3==100) mode=2; /* switch to mode B */
|
||||
}
|
||||
if (i5==3) { buf[i4]='0'+i3/10; i4++;
|
||||
buf[i4]='0'+i3%10; i4++; } /* modeC: two digits */
|
||||
else {
|
||||
if (cc>=0x20 && i3<=0x7f) { buf[i4]=cc; i4++; } /* modeA+B: one digit */
|
||||
if (cc>=0 && cc< 0x20) { buf[i4]='^'; i4++;
|
||||
buf[i4]=cc+'@'; i4++; }
|
||||
}
|
||||
}
|
||||
if(i1==(num_bars-1)/3-2){ crc=(crc+103-i3)%103; }
|
||||
if(i1==(num_bars-1)/3-1){ if(i3!=106) i3=-1; } /* stop code */
|
||||
mode &= 3; /* remove shift */
|
||||
}
|
||||
else fprintf(stderr," %s=%02d? ",buf,i5);
|
||||
}
|
||||
buf[i4]=0; /* end of string */
|
||||
if (result)
|
||||
sprintf(result,"<barcode type=\"128\" chars=\"%d\" code=\"%s\" "
|
||||
"crc=\"%d\" error=\"%.3f\" />",
|
||||
i4,buf,crc,dww/((num_bars-1)));
|
||||
free(buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------- UPC
|
||||
EAN 13 (UPC,(1+6+1+6+1)*2bars,size=3+6*7+5+6*7+3=95)
|
||||
EAN 8 (UPC,(1+4+1+4+1)*2bars,size=3+4*7+5+4*7+3=67)
|
||||
UPC: (10 codes)
|
||||
BSB SBSB^n SBSBS BSBS^n BSB
|
||||
bsb ...... sbsbs ...... bsb
|
||||
111 ...... 11111 ...... 111
|
||||
num_bars=2*(2*6+3) middle=SBSBS=11111 right/left=BSB=111="101"
|
||||
char: left=SBSB right=BSBS (size=7) only_dec_digits
|
||||
SS+BB = (S+S) + (B+B) => BB:SS = 5:2 or 3:4
|
||||
size ev+odd even + odd => variants
|
||||
7 = 2 + 5 = (1+1) + (1+4,2+3) => (1)*(2+2) = 4 codes
|
||||
= 4 + 3 = (1+3,2+2) + (1+3) => (2+1)*(2) = 6 codes += 10 codes
|
||||
ToDo: make it more robust
|
||||
- return error as mean deviation
|
||||
* -------------------------------------------------------------------- */
|
||||
/* example: barcode -E -e upc -b 12345678901 >a.eps # ok */
|
||||
/* example: barcode -E -e ean -b 123456789012 >a.eps # ok */
|
||||
#define NumUPC 20
|
||||
const char *codeUPC[NumUPC+1]={ /* 0..9, first n = SBSB, last n = BSBS */
|
||||
"3211","2221","2122","1411","1132", /* 0,1,2,3,4 normal (+0bit) */
|
||||
"1231","1114","1312","1213","3112", /* 5,6,7,8,9 */
|
||||
"1123","1222","2212","1141","2311", /* 0,1,2,3,4 mirrored (+1bit) */
|
||||
"1321","4111","2131","3121","2113", /* 5,6,7,8,9 */
|
||||
"????"}; /* not found */
|
||||
|
||||
char *decode_UPC(int *wb, int num_bars){ /* ToDo: char *dest, int len */
|
||||
int i, w, i1, i2, i3, i4, i5, crc, mirrored, ean;
|
||||
double err, min_err, dw, dww=0.0; char digit;
|
||||
char *result=NULL, *buf=NULL; /* malloc and store the result */
|
||||
for(w=i=0;i<2*num_bars-1;i++) w+=wb[i];
|
||||
|
||||
dw=2.0*w/((num_bars-6)*7+2*11); /* or min(wb[]) */
|
||||
crc=0;
|
||||
if ((num_bars)%2!=0 || num_bars<10 || w<7*(num_bars-6)/2+11
|
||||
|| ((num_bars-6)/2)%2!=0) return 0; /* should be balanced */
|
||||
/* check front BSB, middle SBSBS and end BSB */
|
||||
dww=0;
|
||||
for (i=0;i<3;i++) { dww=sqr(wb[i ]/dw-1); if (dww>0.4) return 0; }
|
||||
for (i=0;i<5;i++) { dww=sqr(wb[i+ num_bars-3]/dw-1); if (dww>0.4) return 0; }
|
||||
for (i=0;i<3;i++) { dww=sqr(wb[i+2*num_bars-4]/dw-1); if (dww>0.4) return 0; }
|
||||
buf =(char *)malloc( (num_bars-6)/2+1); if (!buf) return result;
|
||||
result=(char *)malloc(256+(num_bars-6)/2+1);
|
||||
|
||||
for(ean=i5=0,i1=3;i1<2*num_bars-4;i1+=4) { /* each digit (2bars+2spaces) */
|
||||
if (i1==num_bars-3) { i1++; continue; } /* skip middle sync SBSBS */
|
||||
for (i4=NumUPC,mirrored=0,digit='?',min_err=16e8,i2=0;i2<NumUPC;i2++) {
|
||||
for (err=0,i3=0;i3<4;i3++) err+=sqr(codeUPC[i2][i3]-'0'-wb[i1+i3]/dw);
|
||||
if (err<min_err) { min_err=err; i4=i2; digit='0'+i2%10; mirrored=i2/10; }
|
||||
} dww+=min_err; crc+=(digit-'0')*((i5&1)?1:3); /* even*3+odd, last char is even */
|
||||
buf[i5++]=digit; if (i5<7) ean=(ean<<1)|mirrored;
|
||||
/* ToDo: error as deviation wb from ideal */
|
||||
g_debug(fprintf(stderr,"\nDBG: UPC digit=%c mirrored=%d err=%.3f err_m=%.3f ",
|
||||
digit,mirrored,min_err/4,dww/(i5*4));)
|
||||
}
|
||||
/* EAN has a 13th leading digit build by 3 of 6 mirorred digits */
|
||||
if (ean & 0x20) ean^=0x3f; /* UPC-E mirrored 1xxxxx => 0yyyyy */
|
||||
switch (ean) {
|
||||
case 11: ean=1; break;
|
||||
case 13: ean=2; break;
|
||||
case 14: ean=3; break;
|
||||
case 19: ean=4; break;
|
||||
case 25: ean=5; break;
|
||||
case 28: ean=6; break;
|
||||
case 21: ean=7; break;
|
||||
case 22: ean=8; break;
|
||||
case 26: ean=9; break;
|
||||
default: ean=0; /* no or invalid EAN digit or UPC-extension */
|
||||
} crc+=ean*1;
|
||||
/* ToDo: fix possible buffer OVL, complement crc */
|
||||
buf[i5]=0;
|
||||
if (result)
|
||||
sprintf(result,"<barcode type=\"UPC\" chars=\"%d\" code=\"%d%s\" "
|
||||
"crc=\"%d\" error=\"%.3f\" />",
|
||||
i5+1,ean,buf,(10-crc%10)%10,dww/((num_bars-6)*2));
|
||||
free(buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* EAN/UPC add-on is either 2 or 5 digits. It always starts with a
|
||||
* guard bar BSB, followed by ([digit + SB] * (N-1)) + digit. Digit is
|
||||
* SBSB. Two digit add-on's have 7 bars, and 5 digit add ons have 16.
|
||||
*/
|
||||
char *decode_UPC_addon(int *wb, int num_bars){ /* ToDo: char *dest, int len */
|
||||
int i, w, i1, i2, i3, i4, i5, digits=num_bars/3;
|
||||
double err, min_err, dw, dww=0.0; char digit;
|
||||
char *result=NULL, *buf=NULL; /* malloc and store the result */
|
||||
if (num_bars!=7 && num_bars!=16)
|
||||
return 0;
|
||||
for(w=i=0;i<2*num_bars-1;i++) w+=wb[i];
|
||||
|
||||
dw=1.0*w/(digits*7+4 + (digits-1)*2);
|
||||
/* check front BSB, and delineators SB */
|
||||
dww=0;
|
||||
for (i=0;i<2;i++) { dww=sqr(wb[i]/dw-1); if (dww>0.4) return 0; }
|
||||
dww=sqr(wb[i]*0.5/dw-1); if (dww>0.4) return 0;
|
||||
for (i=1;i<digits; i++) {
|
||||
for (i1=0; i1<2; i1++) {
|
||||
dww = sqr(wb[i*6 + 1 + i1]/dw-1);
|
||||
if (dww > 0.4) return 0;
|
||||
}
|
||||
}
|
||||
buf =(char *)malloc( digits+1); if (!buf) return result;
|
||||
result=(char *)malloc(256+digits+1);
|
||||
|
||||
for(i5=0,i1=3;i1<2*num_bars-1;i1+=6) { /* each digit (2bars+2spaces) */
|
||||
for (i4=NumUPC,digit='?',min_err=16e8,i2=0;i2<NumUPC;i2++) {
|
||||
for (err=0,i3=0;i3<4;i3++) err+=sqr(codeUPC[i2][i3]-'0'-wb[i1+i3]/dw);
|
||||
if (err<min_err) { min_err=err; i4=i2; digit='0'+i2%10; }
|
||||
}
|
||||
dww+=min_err;
|
||||
buf[i5++]=digit;
|
||||
/* ToDo: error as deviation wb from ideal */
|
||||
g_debug(fprintf(stderr,"\nDBG: UPC digit=%c err=%.3f err_m=%.3f ",
|
||||
digit, min_err/4, dww/(i5*4));)
|
||||
}
|
||||
buf[i5]=0;
|
||||
if (result)
|
||||
sprintf(result, "<barcode type=\"UPC_addon\" chars=\"%d\" code=\"%s\" "
|
||||
"error=\"%.3f\" />",
|
||||
i5, buf, dww/((num_bars-6)*2));
|
||||
free(buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------- *
|
||||
* code 3 of 9, 3 thick of 9 bars
|
||||
* BSBSBSBSB<S> size=7+3*aw aw=2(3), sumS/sumB=2/1?
|
||||
* two widths: size=1 or size=2or3, digit_width=13(16)
|
||||
* 5 bars and 4(+1) spaces per digit, last space is not used
|
||||
* with 2 (or 0) wide bars, 1 (or 3) wide spaces per digit
|
||||
* => 3 of 9 => max=9*8*7=504
|
||||
* evenBB=(0of5)+(2of5) oddSS=(1of4)+(3of4) max=44
|
||||
* ToDo: better code -...-.-.. as 046 or 083 (even,even,odd)
|
||||
*/
|
||||
#define Num39 (40+4) /* (3of9)=(2of5)(1of4)+(0of5)(3of4), (2of5)(.-..)=0..9 */
|
||||
const char *code39= /* rearranged to BBBBBSSSS<S> (bars,spaces) */
|
||||
"0..--..-.."
|
||||
"1-...-.-..""2.-..-.-..""3--....-..""4..-.-.-..""5-.-...-.."
|
||||
"6.--...-..""7...--.-..""8-..-..-..""9.-.-..-.."
|
||||
"A-...-..-.""B.-..-..-.""C--.....-.""D..-.-..-.""E-.-....-."
|
||||
"F.--....-.""G...--..-.""H-..-...-.""I.-.-...-.""J..--...-."
|
||||
"K-...-...-""L.-..-...-""M--......-""N..-.-...-""O-.-.....-"
|
||||
"P.--.....-""Q...--...-""R-..-....-""S.-.-....-""T..--....-"
|
||||
"U-...--...""V.-..--...""W--...-...""X..-.--...""Y-.-..-..."
|
||||
"Z.--..-...""-...---..."".-..-.-..."" .-.-.-...""*..--.-..."
|
||||
/* (0of5)(3of4)=(.....)(3of4) store only 3of4? */
|
||||
"$.....---.""/.....--.-""+.....-.--""%......---"
|
||||
"?xxxxxxxxx";
|
||||
|
||||
/* example: barcode -E -e 39 -b 123abc | gs -sDEVICE=pnggray -r100 */
|
||||
|
||||
/* return index[] according to sorted values[], big first */
|
||||
void sort(int *value, int *idx, int len){
|
||||
int i,j;
|
||||
for (j=0;j<len;j++) idx[j]=j; /* initialize */
|
||||
for(i=1;i;) { /* bubble sort, len...len^2 steps */
|
||||
for(i=j=0;j<len-1;j++) if(value[idx[j]]<value[idx[j+1]]) {
|
||||
i=idx[j]; idx[j]=idx[j+1]; idx[j+1]=i; i=1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
char *decode_39(int *wb, int num_bars){ /* ToDo: char *dest, int len */
|
||||
int i, w, i1, i3, i5, crc, idx[10];
|
||||
double dw,dww,err; char *buf;
|
||||
char *result=NULL; /* malloc and store the result */
|
||||
|
||||
/* check for multiple of 5 bars and minimum start+1char+stop=15 bars */
|
||||
if ((num_bars)%5!=0 || num_bars<15) return 0;
|
||||
for(w=i=0; i<2*num_bars-1;i++ ) w+=wb[i]; /* summ width to w */
|
||||
dw=w*1.0/(16*(num_bars/5)); /* threshold = 1.5..2 */
|
||||
/* whats best and most rigorosely for dw=threshold_width?
|
||||
* - (1.5..2)*mean_width of every 5th space
|
||||
* - (1.5..2)*summ(5bars+5spaces)/(13..16)
|
||||
* - 3/4*summ(three thickest)/3
|
||||
*/
|
||||
dww=crc=0; /* error and checksum (not supported yet) */
|
||||
#if 0 /* should we exclude any non-standard code39? */
|
||||
/* check for correct start and end symbol * or NwNnWnWnN Narrow+Wide */
|
||||
i=2*num_bars-2;
|
||||
if (wb[ 0]>dw*2 || wb[ 1]<=dw*2 || wb[ 2]> dw*2) return 0;
|
||||
if (wb[i-0]>dw*2 || wb[i-1]> dw*2 || wb[i-2]<=dw*2) return 0;
|
||||
#endif
|
||||
g_debug(fprintf(stderr," code39 base=%.3f chars=%2d\n ",dw,(num_bars)/5);)
|
||||
buf =(char *)malloc( 1+(num_bars)/5); if (!buf) return result;
|
||||
result=(char *)malloc(256+(num_bars)/5);
|
||||
|
||||
for(i5=i1=0;i1<2*num_bars-3;i1+=10) {
|
||||
/* ToDo: looking for three widest bars/gaps, 0 or 2 bars, 1 or 3 spaces */
|
||||
sort(wb+i1,idx,9);
|
||||
for(err=0,i3=3;i3<9;i3++) // estimate error ??
|
||||
err+=sqr(wb[i1+idx[i3]]/dw-1.0); /* narrow=1, wide=2..3 */
|
||||
dww+=err;
|
||||
for(i3=0;i3<Num39;i3++)
|
||||
if (code39[10*i3+1+(idx[0]%2)*5+idx[0]/2]=='-'
|
||||
&& code39[10*i3+1+(idx[1]%2)*5+idx[1]/2]=='-'
|
||||
&& code39[10*i3+1+(idx[2]%2)*5+idx[2]/2]=='-') break;
|
||||
if (i5>0 && i5<num_bars/5-2) crc+=i3;
|
||||
buf[i5++]=code39[10*i3];
|
||||
|
||||
/* ToDo: check if wee have even number of black bars within 3 biggest */
|
||||
g_debug(for(i3=0;i3<9;i3++)fprintf(stderr,"%02d ",wb[i1+i3]);
|
||||
fprintf(stderr," posWide=%d,%d,%d %c err=%.3f\n ",
|
||||
idx[0],idx[1],idx[2],buf[i5-1],err/6);)
|
||||
{ int num_ws=0; // Jul09 ~codabar
|
||||
if (idx[0]&1) num_ws++;
|
||||
if (idx[1]&1) num_ws++;
|
||||
if (idx[2]&1) num_ws++;
|
||||
if ((num_ws&1)==0) { // num wide spaces must be 1 or 3
|
||||
free (buf); free(result);
|
||||
g_debug(fprintf(stderr," num wide spaces = %d, abort code39\n", num_ws);)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
buf[i5]=0;
|
||||
if (result)
|
||||
sprintf(result,"<barcode type=\"39\" chars=\"%d\" code=\"%s\" "
|
||||
"crc=\"%c\" error=\"%.3f\" />",
|
||||
i5,buf,code39[(crc%44)*10],dww/((num_bars/5)*6));
|
||||
free(buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* code interleaved 2 of 5 numbers-only (10 bars+spaces = 2 decimal digits)
|
||||
B B B B B <= digit1 bars=1,3,5,...
|
||||
S S S S S <= digit2 spaces=2,4,6,...
|
||||
1122447700 <= weight digit=sum(bit*weight)%11 (except 7+4=11 means 0)
|
||||
N = narrow bar, W = wide bar, n = narrow space, w = wide space
|
||||
"NnNn"+interleaved+"WnN" (odd num W, even num w)
|
||||
18 digits/inch
|
||||
see http://www.barcode-1.net/i25code.html (Jun 2009)
|
||||
minN=0.19mm=0.0075inch
|
||||
sizeN>0.02inch: sizeW=2..3*sizeN (but constant)
|
||||
sizeN<0.02inch: sizeW=2.2..3*sizeN
|
||||
quite zones 10*sizeN or 0.25inch
|
||||
heigh=max(0.15*symbol_length,0.25inch)
|
||||
Len = (numChars*(2*(sizeW/sizeN)+3) + 6 + (sizeW/sizeN)) * sizeN
|
||||
*/
|
||||
#define Num25 10
|
||||
const char *code25= /* is the code sorted randomly? */
|
||||
"1-...-2.-..-3--...4..-.-5-.-..6.--..7...--8-..-.9.-.-.0..--.";
|
||||
|
||||
/* example: barcode -E -e i25 -b 123456 >a.eps */
|
||||
|
||||
/*
|
||||
add i25, patch by: Chris Lee, 13 Jul 2009
|
||||
ToDo: check correctness
|
||||
*/
|
||||
char *decode_i25(int *wb, int num_bars){ /* ToDo: char *dest, int len */
|
||||
int i, w, i1, i3, i5, crc, idx[7], pos;
|
||||
double dw, dww, err; char *buf;
|
||||
char *result=NULL; /* malloc and store the result */
|
||||
|
||||
int *wb_temp;
|
||||
int *wb_check;
|
||||
int code_chars;
|
||||
|
||||
if ((num_bars)%5!=4) return 0; /* chars*5bars + 4 start/stop bars */
|
||||
code_chars = ((num_bars - 4) / 5) * 2;
|
||||
// dw=w*1.0/(9*(num_bars/3)); /* threshold = 1.5..2 */
|
||||
|
||||
wb_temp = (int *)malloc((code_chars * 5)*sizeof(int)); if (!wb_temp) { return NULL; }
|
||||
wb_check = (int *)malloc( 7 *sizeof(int)); if (!wb_check) { return NULL; }
|
||||
|
||||
for (i=0; i<(code_chars * 5)+7; i++) {
|
||||
if (i<4) { wb_check[i] = wb[i]; } /* start sequence NnNn... */
|
||||
else if (i > ((code_chars*5)+3)) { /* end sequence ...WnN */
|
||||
wb_check[(int)(i-(code_chars*5))] = wb[i]; }
|
||||
else {
|
||||
pos = i - 4;
|
||||
/* reinterleave 0,5,1,6,2,7,3,8,4,9,... to 0,1,2,3,4,5,6,7,8,9,... */
|
||||
// pos = (int)(10*(int)(pos/10) + 1.0*(pos%10)/2.0 + 4.5*(pos%2));
|
||||
pos = 10*(pos/10) + (pos%10)/2 + 5*(pos&1);
|
||||
wb_temp[pos] = wb[i];
|
||||
}
|
||||
}
|
||||
wb = wb_temp;
|
||||
|
||||
/* check start / finish codes */
|
||||
sort(wb_check,idx,7);
|
||||
if (idx[0] != 4 /* widest bar W must be the 4th = 1st of end */
|
||||
|| wb_check[idx[0]]==wb_check[idx[1]]) { /* exact 1 widest */
|
||||
free(wb_temp);
|
||||
free(wb_check);
|
||||
g_debug(fprintf(stderr," need exact 1 widest at start of end, abort\n");)
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(w=i=0; i<5*code_chars;i++ ) w+=wb[i]; /* summ width */
|
||||
dw=w*1.0/(16*(num_bars/5)); /* threshold = 1.5..2 */
|
||||
/* whats best and most rigorosely for dw=threshold_width?
|
||||
* - (1.5..2)*mean_width of every 5th space
|
||||
* - (1.5..2)*summ(5bars+5spaces)/(13..16)
|
||||
* - 3/4*summ(three thickest)/3
|
||||
*/
|
||||
dww=crc=0; /* error and checksum (not supported yet) */
|
||||
#if 0 /* should we exclude any non-standard code39? */
|
||||
/* check for correct start and end symbol * or NwNnWnWnN Narrow+Wide */
|
||||
i=2*num_bars-2;
|
||||
if (wb[ 0]>dw*2 || wb[ 1]<=dw*2 || wb[ 2]> dw*2) return 0;
|
||||
if (wb[i-0]>dw*2 || wb[i-1]> dw*2 || wb[i-2]<=dw*2) return 0;
|
||||
#endif
|
||||
g_debug(fprintf(stderr," code25 base=%.3f chars=%2d\n ",dw,code_chars);)
|
||||
buf =malloc( code_chars); if (!buf) return result;
|
||||
result=malloc(256+code_chars);
|
||||
|
||||
for(i5=i1=0;i1<5*code_chars;i1+=5) {
|
||||
/* ToDo: looking for three widest bars/gaps */
|
||||
sort(wb+i1,idx,5);
|
||||
for(err=0,i3=2;i3<5;i3++)
|
||||
err+=sqr(wb[i1+idx[i3]]/dw-1.0); /* narrow=1, wide=2..3 */
|
||||
dww+=err;
|
||||
for(i3=0;i3<Num25;i3++)
|
||||
if (code25[6*i3+1+idx[0]]=='-'
|
||||
&& code25[6*i3+1+idx[1]]=='-') break;
|
||||
//if (i5>0 && i5<num_bars/3-2) crc+=i3;
|
||||
buf[i5++]=code25[6*i3];
|
||||
|
||||
/* ToDo: check if we have even number of black bars within 3 biggest */
|
||||
g_debug(for(i3=0;i3<5;i3++)fprintf(stderr,"%02d ",wb[i1+i3]);
|
||||
fprintf(stderr," posWide=%d,%d %c err=%.3f\n ",
|
||||
idx[0], idx[1], buf[i5-1], err/6);)
|
||||
{
|
||||
/* check that we have exact 2 widest bars, 2nd widest > 3th widest */
|
||||
if (wb[i1+idx[1]]==wb[i1+idx[2]]) {
|
||||
free(buf); free(result);
|
||||
g_debug(fprintf(stderr," need exact 2 widest, abort\n");)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
buf[i5]=0;
|
||||
if (result) // ToDo: fix CRC (not defined?)
|
||||
sprintf(result,"<barcode type=\"i25\" chars=\"%d\" code=\"%s\" crc=\"%c\""
|
||||
" error=\"%.3f\" />", i5,buf,code25[(crc%10)*10],dww/((num_bars/5)*6));
|
||||
|
||||
free(wb_temp);
|
||||
free(wb_check);
|
||||
free(buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* code codabar, numbers only, 4 bars per char (1*wide bar, 1*wide space)
|
||||
robust code (dot-matrix printer)
|
||||
characters have same length (distance between 1st bar to 4th space)
|
||||
??? codeproject: 6*n+2*w=12 or 5*n+3*w=14
|
||||
??? suchymips.de:
|
||||
variant 1: 18 different bar widths (Monarch code)
|
||||
variant 2: 2 different bar widths (Codabar Matrix or ABC-Codabar)
|
||||
9..11 digits/inch, N=narrow bar, W=wide bar, n=narrow space, w=wide space
|
||||
see http://www.barcodeman.com/info/codabar.php (Jul 2009)
|
||||
minN=0.17mm=0.0065inch, 11 digits/inch = 0.0909 = 14*minN
|
||||
2of7 + extensions
|
||||
extensions: 1 wide bar + 2 wide spaces (no narrow space between 2 wide)
|
||||
4 start/stop sets = a/t, b/m, c/ *, d/e
|
||||
- mean wide = thickest of 4 bars
|
||||
- mean narrow = thinnest of 4 bars, thinnest of 3 (4) spaces or every 4th
|
||||
wiki-sample: a31117013206375b (wide spaces between chars) schraeg!
|
||||
barcode: t1234567t n=N=1 w=W=3 c=12,14 (not const.)
|
||||
*/
|
||||
const char *code27= /* 4bars+3spaces, 12+12 chars */
|
||||
// 0..11: 3 nbar + 1 wbar + 2 nspace + 1 wspace
|
||||
"0.....--1....--.2...-..-3--.....4..-..-."
|
||||
"5-....-.6.-....-7.-..-..8.--....9-..-...-...--..$..--..."
|
||||
// 12..15: 1 nbar + 3 wbar + 3 nspace + 0 wspace
|
||||
":-...-.-/-.-...-.-.-.-..+..-.-.-"
|
||||
// 16..23: 3 nbar + 1 wbar + 1 nspace + 2 wspace
|
||||
"a..--.-.b.-.-..-c...-.--d...---.t..--.-.n.-.-..-*...-.--e...---."
|
||||
// EOS
|
||||
"????????";
|
||||
|
||||
/* example: barcode -E -e cbr -b 123456 >a.eps */
|
||||
|
||||
char *decode_27(int *wb, int num_bars){ /* ToDo: char *dest, int len */
|
||||
int i, i1, i2, i3, i4, i5, b_idx[4], s_idx[3], b_w[4], s_w[3],
|
||||
max_wdiff, err=0;
|
||||
// double dw, err;
|
||||
char *buf, char27[8]="......";
|
||||
char *result=NULL; /* malloc and store the result */
|
||||
|
||||
int code_chars;
|
||||
#if 0 // ToDo: verifications nb_max < wb_min etc.
|
||||
int nb_min=99999, nb_max=0, nb_sum=0, nb_num=0; // narrow bar
|
||||
int ns_min=99999, ns_max=0, ns_sum=0, ns_num=0; // narrow space
|
||||
int wb_min=99999, wb_max=0, wb_sum=0, wb_num=0; // wide bar
|
||||
int ws_min=99999, ws_max=0, ws_sum=0, ws_num=0; // wide space
|
||||
#endif
|
||||
|
||||
if ((num_bars)%4!=0) return 0; /* chars*4bars */
|
||||
code_chars = num_bars / 4;
|
||||
// dw=w*1.0/(9*(num_bars/3)); /* threshold = 1.5..2 */
|
||||
|
||||
g_debug(fprintf(stderr," codabar chars= %d\n ", code_chars);)
|
||||
buf =malloc( code_chars); if (!buf) return result;
|
||||
result=malloc(256+code_chars);
|
||||
i5=0; // index output string
|
||||
for (i=0; i< code_chars; i++) {
|
||||
b_w[0]=wb[i*8+0]; // 1st bar
|
||||
b_w[1]=wb[i*8+2]; // 2nd bar
|
||||
b_w[2]=wb[i*8+4]; // 3th bar
|
||||
b_w[3]=wb[i*8+6]; // 4th bar
|
||||
s_w[0]=wb[i*8+1]; // 1st space
|
||||
s_w[1]=wb[i*8+3]; // 2nd space
|
||||
s_w[2]=wb[i*8+5]; // 3th space
|
||||
sort(b_w,b_idx,4); /* idx[0] points to widest bar */
|
||||
sort(s_w,s_idx,3); /* idx[0] points to widest space */
|
||||
g_debug(for(i3=0;i3<7;i3++)fprintf(stderr,"%02d ",wb[8*i+i3]);)
|
||||
if (b_w[b_idx[0]]==b_w[b_idx[3]]) { err=__LINE__; break; } // min. 1 wide + narrow
|
||||
// search max. diff between sorted widths
|
||||
i2=b_w[b_idx[0]]-b_w[b_idx[1]]; i1=1; max_wdiff=i2; // diff widest - 2nd wides
|
||||
i2=b_w[b_idx[1]]-b_w[b_idx[2]]; if (i2>max_wdiff) { i1=2; max_wdiff=i2; }
|
||||
i2=b_w[b_idx[2]]-b_w[b_idx[3]]; if (i2>max_wdiff) { i1=3; max_wdiff=i2; }
|
||||
if (i1==2) { err=__LINE__; break; } // 2 wide + 2 narrow bars not allowed
|
||||
for (i3=0;i3<7;i3++) char27[i3]='.'; // reset char
|
||||
if (i1==1) { // 1 wide bar (1 or 2 wspaces)
|
||||
if (s_w[s_idx[0]]-s_w[s_idx[1]]
|
||||
>s_w[s_idx[1]]-s_w[s_idx[2]]) { // 1 wspace
|
||||
char27[2*b_idx[0]+0]='-';
|
||||
char27[2*s_idx[0]+1]='-';
|
||||
} else { // assume 2 wspaces
|
||||
if (s_w[s_idx[2]]==s_w[s_idx[1]]) { err=__LINE__; break; }
|
||||
char27[2*b_idx[0]+0]='-';
|
||||
char27[2*s_idx[0]+1]='-';
|
||||
char27[2*s_idx[1]+1]='-';
|
||||
}
|
||||
} else { // assume 3 wbars + 0 wspaces
|
||||
char27[2*s_idx[0]+0]='-';
|
||||
char27[2*s_idx[1]+0]='-';
|
||||
char27[2*s_idx[2]+0]='-';
|
||||
}
|
||||
for(i4=24,i3=0;i3<24;i3++) {
|
||||
if (code27[8*i3+1]==char27[0]
|
||||
&& code27[8*i3+2]==char27[1]
|
||||
&& code27[8*i3+3]==char27[2]
|
||||
&& code27[8*i3+4]==char27[3]
|
||||
&& code27[8*i3+5]==char27[4]
|
||||
&& code27[8*i3+6]==char27[5]
|
||||
&& code27[8*i3+7]==char27[6]) {
|
||||
i4=i3; buf[i5++]=code27[8*i3]; break; }
|
||||
}
|
||||
g_debug(fprintf(stderr," %s c27= %c\n ", char27, ((i5)?buf[i5-1]:'?'));)
|
||||
if (i4==24) { err=__LINE__; break; }
|
||||
} // each char
|
||||
if (i>=code_chars) { // else: inconsistent char
|
||||
g_debug(fprintf(stderr," code27 base=%.3f chars=%2d\n ",0.0,code_chars);)
|
||||
} else {
|
||||
g_debug(fprintf(stderr," error %d at char %d, abort\n", err, i);)
|
||||
free(result); result=0;
|
||||
}
|
||||
buf[i5]=0;
|
||||
if (result) // ToDo: fix CRC (not defined?)
|
||||
sprintf(result,"<barcode type=\"codabar\" chars=\"%d\" code=\"%s\""
|
||||
" crc=\"%c\" error=\"%.3f\" />", i5,buf,'?',0.0);
|
||||
|
||||
free(buf);
|
||||
return result; // free at parent!
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
decode barcode
|
||||
- check main characteristics (num bars, min+max width, etc.)
|
||||
- detect code type
|
||||
- the current algorithm measures the width of bars and spaces
|
||||
called by detect_barcode()
|
||||
ToDo: - like storing sequence of widths for 1D code
|
||||
store array of bits for 2D matrix code and decode later
|
||||
*/
|
||||
char *decode_barcode(struct box *bb){ /* ToDo: char *dest, int len */
|
||||
int i, num_bars, yy, w, ww, dx, xx, cs=JOB->cfg.cs, *wb;
|
||||
char *result=NULL; /* store the result */
|
||||
yy=(bb->y0+bb->y1)/2;
|
||||
w=ww=bb->x1-bb->x0+1;
|
||||
num_bars = num_cross(bb->x0,bb->x1,yy,yy,bb->p,JOB->cfg.cs);
|
||||
if(JOB->cfg.verbose)
|
||||
fprintf(stderr,"\n# ... detect bars=%3d w=%4d",num_bars,ww);
|
||||
|
||||
/* store width of bars and spaces to buffer wb */
|
||||
wb=(int *)malloc(2*num_bars*sizeof(int)); if(!wb) return NULL;
|
||||
xx=bb->x0;
|
||||
xx-=loop(bb->p,xx,yy, 8,cs,1,LE);
|
||||
xx+=loop(bb->p,xx,yy,ww,cs,0,RI); /* start with a bar! */
|
||||
for (i=0;i<2*num_bars;i++) {
|
||||
dx=loop(bb->p,xx,yy,w,cs,1^(i&1),RI);
|
||||
xx+=dx;
|
||||
w-=dx;
|
||||
wb[i]=dx;
|
||||
} wb[2*num_bars-1]=0;
|
||||
|
||||
/* ToDo: what about probability? if not unique
|
||||
* - add argument char *result which can be modified or not,
|
||||
* - or add box2? (would reuse of this code more difficult)
|
||||
*/
|
||||
/* test code128 characteristics, ToDo: look for correct start/stop 211 seq. */
|
||||
if ((num_bars-1)%3==0 && num_bars>=10 && ww>=11*(num_bars-1)/3+2){
|
||||
if (!result) result=decode_code128(wb,num_bars);
|
||||
}
|
||||
/* test UPC/EAN characteristics */
|
||||
if ((num_bars)%2==0 && num_bars>=8 && ww>=7*(num_bars-6)/2+11
|
||||
&& ((num_bars-6)/2)%2==0){ /* should be balanced */
|
||||
if (!result) result=decode_UPC(wb,num_bars);
|
||||
}
|
||||
/* test UPC_addon by Michael van Rooyen, often on books */
|
||||
if (num_bars==7 || num_bars==16)
|
||||
if (!result) result=decode_UPC_addon(wb,num_bars);
|
||||
|
||||
/* test code39 characteristics */
|
||||
if ((num_bars)%5==0 && num_bars>14){
|
||||
if (!result) result=decode_39(wb,num_bars);
|
||||
}
|
||||
/* test i2of5 chartacteristics */
|
||||
if ((num_bars)%5==4 && num_bars>3) {
|
||||
if (!result) result=decode_i25(wb,num_bars);
|
||||
}
|
||||
|
||||
/* test codabar chartacteristics */
|
||||
if ((num_bars)%4==0 && num_bars>3) {
|
||||
if (!result) result=decode_27(wb,num_bars);
|
||||
}
|
||||
|
||||
free(wb);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* taking the list of boxes and search for groups of bars (1D-barcodes)
|
||||
*/
|
||||
int detect_barcode(job_t *job)
|
||||
{
|
||||
int j=0, j2=0, bx0, by0, bx1, by1, bdx, bdy, bbx, rm,
|
||||
x0, y0, dx, dy, cs, y, yl0, yl1, yr0, yr1;
|
||||
struct box *box2, *box3;
|
||||
|
||||
if(JOB->cfg.verbose)
|
||||
fprintf(stderr,"# barcode.c detect_barcode ");
|
||||
x0=y0=0; rm=0; dx=job->src.p.x; dy=job->src.p.y; cs=JOB->cfg.cs;
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
/* detect width (bdx) and height (bdy) of possible bar */
|
||||
/* ToDo: better check for a line */
|
||||
bdx=box2->x1-box2->x0+1 /* substract correction for skewed bars */
|
||||
-loop(box2->p,box2->x1,(box2->y0+box2->y1)/2,box2->x1-box2->x0,cs,0,LE)
|
||||
-loop(box2->p,box2->x0,(box2->y0+box2->y1)/2,box2->x1-box2->x0,cs,0,RI);
|
||||
bdy=box2->y1-box2->y0+1;
|
||||
if (box2->c == PICTURE || box2->c == UNKNOWN)
|
||||
if (box2->y0 >= y0 && box2->y1 <= y0 + dy /* within frame? */
|
||||
&& box2->x0 >= x0 && box2->x1 <= x0 + dx
|
||||
&& box2->y1 - box2->y0 > 19 /* min. length */
|
||||
&& box2->y1 - box2->y0 > 8 * bdx
|
||||
) { /* a bar? */
|
||||
j=1; /* number of bars */
|
||||
bx0=box2->x0; bx1=box2->x1; /* initial values for barcode frame */
|
||||
by0=box2->y0; by1=box2->y1;
|
||||
bbx=bx1-bx0+2; /* width of bar */
|
||||
/* this is for scans which are not exactly horizontal */
|
||||
yl0=yr0=by0; /* left and right upper bound */
|
||||
yl1=yr1=by1; /* left and right lower bound */
|
||||
/* --- iteratively take into account next nearest bar ---
|
||||
* this is important, because bar-boxes are not in right order */
|
||||
for (j2=1;j2;) {
|
||||
j2=0;
|
||||
/* expand a frame around the potential barcode (bx0,by0,bx1,by1) */
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
/* bdy=box3->y1-box3->y0+1; */
|
||||
if (box2!=box3)
|
||||
if (box3->c == PICTURE || box3->c == UNKNOWN)
|
||||
if (box3->y0 >= y0 && box3->y1 <= y0 + dy /* within image */
|
||||
&& box3->x0 >= x0 && box3->x1 <= x0 + dx
|
||||
&& box3->y1 - box3->y0 > 19 /* min. length */
|
||||
&& box3->y1 - box3->y0 > 4 * (bdx) /* height > 4*width2 */
|
||||
&& box3->x1 - box3->x0 < 4 * (bdy) /* width < height/4 = bar */
|
||||
&& (( abs(box3->y0-by0)<bdy/16+4 /* within bar-box ? */
|
||||
&& abs(box3->y1-by1)<bdy/2 ) /* lower ends of UPC could be longer */
|
||||
||( abs(box3->y0-yl0)<bdy/16+4 /* left side of frame */
|
||||
&& abs(box3->y1-yl1)<bdy/2
|
||||
&& box3->x0 <= bx0 )
|
||||
||( abs(box3->y0-yr0)<bdy/16+4 /* right side of frame */
|
||||
&& abs(box3->y1-yr1)<bdy/2
|
||||
&& box3->x0 >= bx1 ) )
|
||||
&& box3->x0 > bx0 - 12*bbx /* startspace=5...10 */
|
||||
&& box3->x1 < bx1 + 12*bbx
|
||||
&& box3->x0 > bx0 - bdy/2 /* dont glue two barcodes together */
|
||||
&& box3->x1 < bx1 + bdy/2 /* ex: ean13a.jpg */
|
||||
/* dont check bars which already within the frame twice ? */
|
||||
&& ( box3->x1 > bx1 || box3->x0 < bx0 )
|
||||
) { /* a bar? -> extend barcode frame only in x direction */
|
||||
/* take minimum of y to have valid barcode for all y */
|
||||
if (box3->x0<bx0) { bx0=box3->x0; yl0=box3->y0; yl1=box3->y1; }
|
||||
if (box3->x1>bx1) { bx1=box3->x1; yr0=box3->y0; yr1=box3->y1; }
|
||||
if (4*(box3->y1-box3->y0)>3*(by1-by0)) { /* carefull reduce */
|
||||
if (box3->y0>by0) by0=box3->y0; /* ToDo: fix for non-horizontal */
|
||||
if (box3->y1<by1) by1=box3->y1;
|
||||
}
|
||||
j++; /* found a near bar and count to num bars */
|
||||
j2=1; /* continue searching (endless loop?) */
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
}
|
||||
/* j is the num of bars found above, some inner bars are not counted */
|
||||
/* ToDo: better iterative add next nearest bars from sorted list near bars? */
|
||||
if (j>5) {
|
||||
char *code=0;
|
||||
box2->c=PICTURE; /* BARCODE */
|
||||
box2->x0=bx0; box2->y0=by0;
|
||||
box2->x1=bx1; box2->y1=by1;
|
||||
/* ToDo: add pointer to decoded text */
|
||||
|
||||
y=(box2->y0+box2->y1)/2;
|
||||
if (JOB->cfg.verbose){
|
||||
int nbars;
|
||||
nbars=num_cross(box2->x0,box2->x1,y,y,box2->p,JOB->cfg.cs);
|
||||
fprintf(stderr,"\n# barcode at %3d %3d size %3d %3d nbars %d (%d)",
|
||||
bx0,by0,bx1-bx0+1,by1-by0+1,nbars,j);
|
||||
if (j!=nbars)
|
||||
fprintf(stderr,"\n# ... trouble: num_found_bars != num_cross");
|
||||
/* this is because some far bars are detected before near bars */
|
||||
}
|
||||
/* transport the info to the gocr-output (development) */
|
||||
/* ToDo: decode and print/store barcode bars=j */
|
||||
code=decode_barcode(box2); /* ToDo: char *dest, int len */
|
||||
if (!code) { /* failed */
|
||||
code=(char *)malloc(128);
|
||||
/* ToDo: analyze and output num_bars, width of bars etc. */
|
||||
if(code) strncpy(code,"<barcode type=\"unknown\" />",128);
|
||||
}
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr,"\n# ... decoded as: %s", code);
|
||||
setas(box2,code,99); /* ToDo: set a better weight */
|
||||
free(code);
|
||||
|
||||
/* remove inner boxes, only if sure!? (ToDo: use cfg.certainty) */
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
/* bdy=box3->y1-box3->y0+1; */
|
||||
if (box2!=box3)
|
||||
if (box3->c == PICTURE || box3->c == UNKNOWN)
|
||||
if ( abs(box3->y0-by0)<bdy/16+4 /* within bar-box ? */
|
||||
&& abs(box3->y1-by1)<bdy/2 /* lower ends of UPC could be longer */
|
||||
&& box3->x1 <= bx1
|
||||
&& box3->x0 >= bx0
|
||||
) {
|
||||
rm++; /* count removed boxes */
|
||||
list_del(&(JOB->res.boxlist),box3);
|
||||
free_box(box3);
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr,"\n# ... removed boxes: %d", rm);
|
||||
rm=0;
|
||||
}
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
|
||||
/* recalculate averages without bars */
|
||||
JOB->res.numC=JOB->res.sumX=JOB->res.sumY=j2=0;
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
j2++;
|
||||
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box3->c==PICTURE) continue;
|
||||
JOB->res.numC++; /* count remaining boxes */
|
||||
JOB->res.sumX+=box3->x1-box3->x0+1;
|
||||
JOB->res.sumY+=box3->y1-box3->y0+1;
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if(JOB->cfg.verbose)
|
||||
fprintf(stderr,"\n# ... boxes %d nC %d\n",
|
||||
j2, JOB->res.numC);
|
||||
|
||||
/* ToDo: detect DataMatrix = iec16022
|
||||
* search square of 2 lines and 2 dottet lines (first no rotation)
|
||||
* output characteristics pixel size, bytes, code type, etc.
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
372
ActiveX/ASCOfficeUtils/GOCR/src/box.c
Normal file
372
ActiveX/ASCOfficeUtils/GOCR/src/box.c
Normal file
@@ -0,0 +1,372 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL address
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
/* do we need #include <math.h>? conflicts with INFINITY in unicode.h */
|
||||
#include "gocr.h"
|
||||
#include "pgm2asc.h"
|
||||
|
||||
/* for sorting letters by position on the image
|
||||
/ ToDo: - use function same line like this or include lines.m1 etc. */
|
||||
int box_gt(struct box *box1, struct box *box2) {
|
||||
// box1 after box2 ?
|
||||
if (box1->line > box2->line)
|
||||
return 1;
|
||||
if (box1->line < box2->line)
|
||||
return 0;
|
||||
if (box1->x0 > box2->x1) // before
|
||||
return 1;
|
||||
if (box1->x1 < box2->x0) // before
|
||||
return 0;
|
||||
if (box1->x0 > box2->x0) // before, overlapping!
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* --- copy part of pix p into new pix b ---- len=10000
|
||||
* Returns: 0 on success, 1 on error.
|
||||
* naming it as copybox isnt very clever, because it dont have to do with the
|
||||
* char boxes (struct box)
|
||||
*/
|
||||
int copybox (pix * p, int x0, int y0, int dx, int dy, pix * b, int len) {
|
||||
int x, y;
|
||||
|
||||
/* test boundaries */
|
||||
if (b->p == NULL || dx < 0 || dy < 0 || dx * dy > len) {
|
||||
fprintf(stderr, " error-copybox x=%5d %5d d=%5d %5d\n", x0, y0, dx, dy);
|
||||
return 1;
|
||||
}
|
||||
|
||||
b->x = dx;
|
||||
b->y = dy;
|
||||
b->bpp = 1;
|
||||
#ifdef FASTER_INCOMPLETE
|
||||
for (y = 0; y < dy; y++)
|
||||
memcpy(&pixel_atp(b, 0, y), &pixel_atp(p, x0, y + y0 ), dx);
|
||||
// and unmark pixels
|
||||
#else
|
||||
for (y = 0; y < dy; y++)
|
||||
for (x = 0; x < dx; x++)
|
||||
pixel_atp(b, x, y) = getpixel(p, x + x0, y + y0);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* reset table of alternative chars (and free memory) */
|
||||
int reset_box_ac(struct box *box){
|
||||
int i;
|
||||
for (i=0; i<box->num_ac; i++)
|
||||
if (box->tas[i]) {
|
||||
/* fprintf(stderr,"DBG free_s[%d] %p %s\n",i,box->tas[i],box->tas[i]); */
|
||||
free(box->tas[i]);
|
||||
box->tas[i]=0; /* prevent double freeing */
|
||||
}
|
||||
box->num_ac=0; /* mark as freed */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ini or copy a box: get memory for box and initialize the memory */
|
||||
struct box *malloc_box (struct box *inibox) {
|
||||
struct box *buf;
|
||||
int i;
|
||||
|
||||
buf = (struct box *) malloc(sizeof(struct box));
|
||||
if (!buf)
|
||||
return NULL;
|
||||
if (inibox) {
|
||||
memcpy(buf, inibox, sizeof(struct box));
|
||||
/* only pointer are copied, we want to copy the contents too */
|
||||
for (i=0;i<inibox->num_ac;i++) {
|
||||
if (inibox->tas[i]) {
|
||||
buf->tas[i]=(char *)malloc(strlen(inibox->tas[i])+1);
|
||||
memcpy(buf->tas[i], inibox->tas[i], strlen(inibox->tas[i])+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else { /* ToDo: init it */
|
||||
buf->num_ac=0;
|
||||
buf->num_frames=0;
|
||||
}
|
||||
/* fprintf(stderr,"\nDBG ini_box %p",buf); */
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* free memory of box */
|
||||
int free_box (struct box *box) {
|
||||
if (!box) return 0;
|
||||
/* fprintf(stderr,"DBG free_box %p\n",box); out_x(box); */
|
||||
reset_box_ac(box); /* free alternative char table */
|
||||
free(box); /* free the box memory */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* simplify the vectorgraph,
|
||||
* but what is the best way?
|
||||
* a) melting two neighbouring vectors with nearly same direction?
|
||||
* (nearest angle to pi)
|
||||
* b) melting three neigbours with smallest area?
|
||||
* ToDo:
|
||||
* mode = 0 - only lossless
|
||||
* mode = 1 - reduce one vector, smallest possible loss
|
||||
* mode = 2 - remove jitter (todo, or somewhere else)
|
||||
* ToDo: include also loop around (last - first element)
|
||||
* ToDo: reduce by 10..50%
|
||||
*/
|
||||
int reduce_vectors ( struct box *box1, int mode ) {
|
||||
int i1, i2, nx, ny, mx, my, len,
|
||||
minlen=1024, /* minlength of to neighbouring vectors */
|
||||
besti1=0, /* frame for best reduction */
|
||||
besti2=2; /* vector replacing its predecessor */
|
||||
double sprod, maxsprod=-1;
|
||||
if (mode!=1) fprintf(stderr,"ERR not supported yet, ToDo\n");
|
||||
for (i2=1,i1=0; i1<box1->num_frames; i1++) { /* every frame */
|
||||
for (;i2<box1->num_frame_vectors[i1]-1; i2++) { /* every vector */
|
||||
/* predecessor n */
|
||||
nx = box1->frame_vector[i2-0][0] - box1->frame_vector[i2-1][0];
|
||||
ny = box1->frame_vector[i2-0][1] - box1->frame_vector[i2-1][1];
|
||||
/* successor m */
|
||||
mx = box1->frame_vector[i2+1][0] - box1->frame_vector[i2-0][0];
|
||||
my = box1->frame_vector[i2+1][1] - box1->frame_vector[i2-0][1];
|
||||
/* angle is w = a*b/(|a|*|b|) = 1 means parallel */
|
||||
/* normalized: minimize w^2 = (a*b/(|a|*|b|)-1)^2 */
|
||||
/* -1=90grd, 0=0grd, -2=180grd */
|
||||
sprod = /* fabs */(abs(nx*mx+ny*my)*(nx*mx+ny*my)
|
||||
/(1.*(nx*nx+ny*ny)*(mx*mx+my*my))-1);
|
||||
/* we dont include math.h because INFINITY conflicts to unicode,h */
|
||||
if (sprod<0) sprod=-sprod;
|
||||
len = (mx*mx+my*my)*(nx*nx+ny*ny); /* sum lengths^2 */
|
||||
// ..c ###c ... .. ...
|
||||
// .b. len=2+2 #b.. len=2+5 #bc len=1+2 bc len=1+1 b#a len=4+5
|
||||
// a.. spr=0 a... spr=1/10 a.. spr=1/4 a. spr=1 ##c spr=9/5
|
||||
//
|
||||
if ( len* sprod* sprod* sprod* sprod
|
||||
<minlen*maxsprod*maxsprod*maxsprod*maxsprod
|
||||
|| maxsprod<0) /* Bad! ToDo! */
|
||||
{ maxsprod=sprod; besti1=i1; besti2=i2; minlen=len; }
|
||||
}
|
||||
}
|
||||
if (box1->num_frames>0)
|
||||
for (i2=besti2; i2<box1->num_frame_vectors[ box1->num_frames-1 ]-1; i2++) {
|
||||
box1->frame_vector[i2][0]=box1->frame_vector[i2+1][0];
|
||||
box1->frame_vector[i2][1]=box1->frame_vector[i2+1][1];
|
||||
}
|
||||
for (i1=besti1; i1<box1->num_frames; i1++)
|
||||
box1->num_frame_vectors[i1]--;
|
||||
// fprintf(stderr,"\nDBG_reduce_vectors i= %d nv= %d sprod=%f len2=%d\n# ...",
|
||||
// besti2,box1->num_frame_vectors[ box1->num_frames-1 ],maxsprod,minlen);
|
||||
// out_x(box1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* add the contents of box2 to box1
|
||||
* especially add vectors of box2 to box1
|
||||
*/
|
||||
int merge_boxes( struct box *box1, struct box *box2 ) {
|
||||
int i1, i2, i3, i4;
|
||||
struct box tmpbox, *bsmaller, *bbigger; /* for mixing and sorting */
|
||||
/* DEBUG, use valgrind to check uninitialized memory */
|
||||
#if 0
|
||||
fprintf(stderr,"\nDBG merge_boxes_input:"); out_x(box1); out_x(box2);
|
||||
#endif
|
||||
/* pair distance is to expendable, taking borders is easier */
|
||||
if ((box2->x1 - box2->x0)*(box2->y1 - box2->y0)
|
||||
>(box1->x1 - box1->x0)*(box1->y1 - box1->y0)) {
|
||||
bbigger=box2; bsmaller=box1; }
|
||||
else {
|
||||
bbigger=box1; bsmaller=box2; }
|
||||
/* ToDo: does not work if a third box is added */
|
||||
if (box2->y0>box1->y1 || box2->y1<box1->y0
|
||||
|| box2->x0>box1->x1 || box2->x1<box1->x0) {
|
||||
box1->num_boxes += box2->num_boxes; /* num seperate objects 2=ij */
|
||||
} else {
|
||||
if (box2->num_boxes>box1->num_boxes) box1->num_boxes=box2->num_boxes;
|
||||
box1->num_subboxes += box2->num_subboxes+1; /* num holes 1=abdepq 2=B */
|
||||
}
|
||||
box1->dots += box2->dots; /* num i-dots */
|
||||
if ( box2->x0 < box1->x0 ) box1->x0 = box2->x0;
|
||||
if ( box2->x1 > box1->x1 ) box1->x1 = box2->x1;
|
||||
if ( box2->y0 < box1->y0 ) box1->y0 = box2->y0;
|
||||
if ( box2->y1 > box1->y1 ) box1->y1 = box2->y1;
|
||||
i1 = i2 = 0;
|
||||
if (bbigger->num_frames)
|
||||
i1 = bbigger->num_frame_vectors[ bbigger->num_frames - 1 ];
|
||||
if (bsmaller->num_frames)
|
||||
i2 = bsmaller->num_frame_vectors[ bsmaller->num_frames - 1 ];
|
||||
while (i1+i2 > MaxFrameVectors) {
|
||||
if (i1>i2) { reduce_vectors( bbigger, 1 ); i1--; }
|
||||
else { reduce_vectors( bsmaller, 1 ); i2--; }
|
||||
}
|
||||
/* if i1+i2>MaxFrameVectors simplify the vectorgraph */
|
||||
/* if sum num_frames>MaxNumFrames through shortest graph away and warn */
|
||||
/* first copy the bigger box */
|
||||
memcpy(&tmpbox, bbigger, sizeof(struct box));
|
||||
/* attach the smaller box */
|
||||
for (i4=i3=0; i3<bsmaller->num_frames; i3++) {
|
||||
if (tmpbox.num_frames>=MaxNumFrames) break;
|
||||
|
||||
for (; i4<bsmaller->num_frame_vectors[i3]; i4++) {
|
||||
memcpy(tmpbox.frame_vector[i1],
|
||||
bsmaller->frame_vector[i4],2*sizeof(int));
|
||||
i1++;
|
||||
}
|
||||
tmpbox.num_frame_vectors[ tmpbox.num_frames ] = i1;
|
||||
tmpbox.frame_vol[ tmpbox.num_frames ] = bsmaller->frame_vol[ i3 ];
|
||||
tmpbox.frame_per[ tmpbox.num_frames ] = bsmaller->frame_per[ i3 ];
|
||||
tmpbox.num_frames++;
|
||||
if (tmpbox.num_frames>=MaxNumFrames) {
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr,"\nDBG merge_boxes MaxNumFrames reached");
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* copy tmpbox to destination */
|
||||
box1->num_frames = tmpbox.num_frames;
|
||||
memcpy(box1->num_frame_vectors,
|
||||
tmpbox.num_frame_vectors,sizeof(int)*MaxNumFrames);
|
||||
memcpy(box1->frame_vol,
|
||||
tmpbox.frame_vol,sizeof(int)*MaxNumFrames);
|
||||
memcpy(box1->frame_per,
|
||||
tmpbox.frame_per,sizeof(int)*MaxNumFrames);
|
||||
memcpy(box1->frame_vector,
|
||||
tmpbox.frame_vector,sizeof(int)*2*MaxFrameVectors);
|
||||
#if 0
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr,"\nDBG merge_boxes_result:"); out_x(box1);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* used for division of glued chars
|
||||
* after a box is splitted into 2, where vectors are copied to both,
|
||||
* vectors outside the new box are cutted and thrown away,
|
||||
* later replaced by
|
||||
* - 1st remove outside vectors with outside neighbours (complete frames?)
|
||||
* add vector on outside vector with inside neighbours
|
||||
* care about connections through box between outside vectors
|
||||
* - 2nd reduce outside crossings (inclusive splitting frames if necessary)
|
||||
* depending on direction (rotation) of outside connections
|
||||
* - 3th shift outside vectors to crossing points
|
||||
* - split add this points, connect only in-out...out-in,
|
||||
* - cutting can result in more objects
|
||||
* ToDo:
|
||||
* dont connect --1---2--------3----4-- new-y1 (inside above not drawn)
|
||||
* \ \->>>>-/ / outside
|
||||
* \----<<<<-----/ old-y1
|
||||
* |======| subtractable?
|
||||
*
|
||||
* only connect --1---2--------3----4-- new-y1
|
||||
* \>>/ \>>>/ old-y1 outside
|
||||
* ToDo: what about cutting 2 frames (example: 2fold melted MN)
|
||||
* better restart framing algo?
|
||||
*
|
||||
* ToDo: new vol, per
|
||||
*/
|
||||
int cut_box( struct box *box1) {
|
||||
int i1, i2, i3, i4, x, y, lx, ly, dbg=0;
|
||||
if (JOB->cfg.verbose) dbg=1; // debug level, enlarge to get more output
|
||||
if (dbg) fprintf(stderr,"\n cut box x= %3d %3d", box1->x0, box1->y0);
|
||||
/* check if complete frames are outside the box */
|
||||
for (i1=0; i1<box1->num_frames; i1++){
|
||||
if (dbg>2) fprintf(stderr,"\n checking frame %d outside", i1);
|
||||
i2 = ((i1)?box1->num_frame_vectors[ i1-1 ]:0); // this frame
|
||||
i3 = box1->num_frame_vectors[ i1 ]; // next frame
|
||||
for (i4=i2; i4 < i3; i4++) {
|
||||
x = box1->frame_vector[i4][0];
|
||||
y = box1->frame_vector[i4][1];
|
||||
/* break, if one vector is lying inside */
|
||||
if (x>=box1->x0 && x<=box1->x1 && y>=box1->y0 && y<=box1->y1) break;
|
||||
}
|
||||
if (i4==i3) { /* all vectors outside */
|
||||
if (dbg>1) fprintf(stderr,"\n remove frame %d",i1);
|
||||
/* replace all frames i1,i1+1,... by i1+1,i1+2,... */
|
||||
/* replace (x,y) pairs first */
|
||||
for (i4=i2; i4<box1->num_frame_vectors[ box1->num_frames-1 ]-(i3-i2);
|
||||
i4++) {
|
||||
box1->frame_vector[i4][0] = box1->frame_vector[i4+i3-i2][0];
|
||||
box1->frame_vector[i4][1] = box1->frame_vector[i4+i3-i2][1];
|
||||
}
|
||||
/* replace the num_frame_vectors */
|
||||
for (i4=i1; i4<box1->num_frames-1; i4++)
|
||||
box1->num_frame_vectors[ i4 ] =
|
||||
box1->num_frame_vectors[ i4+1 ]-(i3-i2);
|
||||
box1->num_frames--; i1--;
|
||||
}
|
||||
}
|
||||
/* remove vectors outside the box */
|
||||
i3=0;
|
||||
for (i1=0; i1<box1->num_frames; i1++){
|
||||
if (dbg>2) fprintf(stderr,"\n check cutting vectors on frame %d", i1);
|
||||
x = box1->frame_vector[0][0]; /* last x */
|
||||
y = box1->frame_vector[0][1]; /* last y */
|
||||
/* ToDo: start inside to get a closed object */
|
||||
if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) i3=1;
|
||||
for (i2=0; i2<box1->num_frame_vectors[ i1 ]; i2++) {
|
||||
lx = x; /* last x */
|
||||
ly = y; /* last y */
|
||||
x = box1->frame_vector[i2][0];
|
||||
y = box1->frame_vector[i2][1];
|
||||
// fprintf(stderr,"DBG LEV3 i2= %3d xy= %3d %3d",i2,x,y);
|
||||
/* check if outside */
|
||||
if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) {
|
||||
/* replace by nearest point at border, ToDo: better crossingpoint */
|
||||
if (i3==0) { /* wrong if it starts outside */
|
||||
if (x < box1->x0) x = box1->frame_vector[i2][0] = box1->x0;
|
||||
if (x > box1->x1) x = box1->frame_vector[i2][0] = box1->x1;
|
||||
if (y < box1->y0) y = box1->frame_vector[i2][1] = box1->y0;
|
||||
if (y > box1->y1) y = box1->frame_vector[i2][1] = box1->y1;
|
||||
} else {
|
||||
/* remove vector */
|
||||
if (dbg>1) fprintf(stderr,"\n remove vector[%d][%d] x= %2d %2d",i1,i2,x-box1->x0,y-box1->y0);
|
||||
for (i4=i2;i4<box1->num_frame_vectors[ box1->num_frames-1 ]-1;i4++) {
|
||||
box1->frame_vector[i4][0] = box1->frame_vector[i4+1][0];
|
||||
box1->frame_vector[i4][1] = box1->frame_vector[i4+1][1];
|
||||
}
|
||||
for (i4=i1; i4<box1->num_frames; i4++)
|
||||
box1->num_frame_vectors[ i4 ]--;
|
||||
i2--; /* next element is shiftet now, setting back the counter */
|
||||
}
|
||||
i3++;
|
||||
// fprintf(stderr," outside i3= %d\n",i3);
|
||||
continue;
|
||||
}
|
||||
// fprintf(stderr," inside i3= %d",i3);
|
||||
if (i3) { /* ToDo: better crossing point last vector and border */
|
||||
if (lx < box1->x0) lx = box1->x0;
|
||||
if (lx > box1->x1) lx = box1->x1;
|
||||
if (ly < box1->y0) ly = box1->y0;
|
||||
if (ly > box1->y1) ly = box1->y1;
|
||||
x = box1->frame_vector[i2][0] = lx;
|
||||
y = box1->frame_vector[i2][1] = ly;
|
||||
i3 = 0;
|
||||
}
|
||||
// fprintf(stderr," xy= %3d %3d\n",x,y);
|
||||
}
|
||||
}
|
||||
if (dbg>2) { fprintf(stderr,"\nDBG cut_box_result:"); out_x(box1); }
|
||||
return 0;
|
||||
}
|
||||
|
||||
462
ActiveX/ASCOfficeUtils/GOCR/src/database.c
Normal file
462
ActiveX/ASCOfficeUtils/GOCR/src/database.c
Normal file
@@ -0,0 +1,462 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL address
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "gocr.h"
|
||||
#include "pnm.h"
|
||||
#include "pgm2asc.h"
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#define Blen 256
|
||||
|
||||
// load boxes from database into boxlist (for faster access)
|
||||
// used as alternate engine, comparing chars with database
|
||||
int load_db(void) {
|
||||
FILE *f1;
|
||||
char s1[Blen+1],
|
||||
s2[Blen+1] = "./db/", /* ToDo: replace by constant! by configure */
|
||||
*s3;
|
||||
int i, j, ii, i2, line;
|
||||
struct box *box1;
|
||||
pix *pp;
|
||||
|
||||
if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
|
||||
i2=strlen(s2);
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, "# load database %s %s ... ",s2,JOB->cfg.db_path);
|
||||
|
||||
strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
|
||||
f1 = fopen(s2, "r");
|
||||
if (!f1) {
|
||||
fprintf(stderr, " DB %s not found\n",s2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
line = 0; /* line counter for better error report */
|
||||
for (ii = 0; !feof(f1); ii++) {
|
||||
/* bbg: should write a better input routine */
|
||||
if (!fgets(s1, Blen, f1)) break; line++;
|
||||
j = strlen(s1);
|
||||
/* remove carriage return sequences from line */
|
||||
while (j > 0 && (s1[j - 1] == '\r' || s1[j - 1] == '\n'))
|
||||
s1[--j] = 0;
|
||||
if (!j) continue; /* skip empty line */
|
||||
if (s1[0]=='#') continue; /* skip comments (v0.44) */
|
||||
/* copy file name */
|
||||
for (i = 0; i < j && i+i2 < Blen && strchr(" \t,;",s1[i]) == 0; i++)
|
||||
s2[i2 + i] = s1[i];
|
||||
s2[i2+i]=0;
|
||||
/* skip spaces */
|
||||
for (; i < j && strchr(" \t",s1[i]) != 0; i++);
|
||||
/* by now: read pix, fill box, goto next ??? */
|
||||
pp = (pix *)malloc(sizeof(pix));
|
||||
if( !pp ) fprintf(stderr,"malloc error in load_db pix\n");
|
||||
|
||||
// if (JOB->cfg.verbose) fprintf(stderr,"\n# readpgm %s ",s2);
|
||||
if (readpgm(s2, pp, 0 * JOB->cfg.verbose)!=0) {
|
||||
fprintf(stderr,"\ndatabase error: readpgm %s\n", s2);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
box1 = (struct box *)malloc_box(NULL);
|
||||
if(!box1) fprintf(stderr,"malloc error in load_db box1\n");
|
||||
box1->x0 = 0;
|
||||
box1->x1 = pp->x-1; // white border 1 pixel width
|
||||
box1->y0 = 0;
|
||||
box1->y1 = pp->y-1;
|
||||
box1->x = 1;
|
||||
box1->y = 1;
|
||||
box1->dots = 0;
|
||||
box1->c = 0;
|
||||
box1->modifier = 0; /* ToDo: obsolete */
|
||||
box1->tas[0]=NULL;
|
||||
box1->tac[0]=0;
|
||||
box1->wac[0]=100; /* really 100% sure? */
|
||||
box1->num_ac=1;
|
||||
if (s1[i]=='"'){ /* parse a string */
|
||||
j=strrchr(s1+i+1,'"')-(s1+i+1); /* we only look for first and last "" */
|
||||
if (j>=1) {
|
||||
s3=(char *)malloc(j+1);
|
||||
if (!s3) fprintf (stderr, "malloc error in load_db s3\n");
|
||||
if (s3) {
|
||||
memcpy(s3,s1+i+1,j);
|
||||
s3[j]=0;
|
||||
box1->tas[0]=s3;
|
||||
// fprintf(stderr,"\nstring=%s",s3);
|
||||
}
|
||||
} else { fprintf(stderr,"load_db: string parse error L%d\n",line); }
|
||||
} else {
|
||||
box1->tac[0] = box1->c = s1[i]; /* try to interpret as ASCII */
|
||||
/* we can live without hexcode in future if we use UTF8-strings */
|
||||
s3=s1+i;
|
||||
j=strtol( s1+i, &s3, 16); /* try to read 4 to 8 digit hex unicode */
|
||||
/* if its an hexcode, ASCII interpretation is overwritten */
|
||||
if( j && i+3<=Blen && s3-s1-i>3 ) box1->tac[0] = box1->c = j;
|
||||
// fprintf(stderr,"\nhexcode=%04x=%04x %d",(int)j,(int)box1->c,s3-s1-i);
|
||||
}
|
||||
box1->num = 0;
|
||||
box1->line = -1;
|
||||
box1->m1 = 0; /* ToDo: should be given too in the database! */
|
||||
box1->m2 = 0;
|
||||
box1->m3 = 0;
|
||||
box1->m4 = 0;
|
||||
box1->p = pp;
|
||||
list_app(&JOB->tmp.dblist, box1); // append to list
|
||||
#if 0
|
||||
out_x(box1);
|
||||
#endif
|
||||
}
|
||||
fclose(f1);
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, " %d chars loaded\n", ii);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// expand database from box/boxlist name=db_$utime.pbm
|
||||
// this is added in version v0.3.3
|
||||
int store_db(struct box *box1) {
|
||||
FILE *f1;
|
||||
char s2[Blen+1] = "./db/", s3[Blen+1];
|
||||
int i2, dx, dy;
|
||||
unsigned c_out;
|
||||
pix b; /* temporary mini page */
|
||||
|
||||
if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
|
||||
i2=strlen(s2);
|
||||
|
||||
/* add (first) char and time to the file name for better debugging */
|
||||
|
||||
/* decide between 7bit ASCII and UTF8-char or string */
|
||||
c_out = ((box1->num_ac && box1->tas[0]) ?
|
||||
(unsigned char )box1->tas[0][0] /* char */ :
|
||||
box1->c /* wchar */);
|
||||
/* (unsigned int)(( char)0x80) = 0xffffff80 */
|
||||
/* (unsigned int)((unsigned char)0x80) = 0x00000080 */
|
||||
|
||||
/* name generation can cause problems, if called twice within a second */
|
||||
sprintf(s3,"db_%04x_%08lx.pbm", c_out, (unsigned long)time(NULL));
|
||||
/* ToDo: the file name may be not unique */
|
||||
|
||||
strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
|
||||
f1 = fopen(s2, "a");
|
||||
if (!f1) {
|
||||
fprintf(stderr, " could not access %s\n",s2);
|
||||
return 1;
|
||||
}
|
||||
strncpy(s2+i2,s3,strlen(s3)); s2[i2+strlen(s3)]=0;
|
||||
/* store image and infos about the char */
|
||||
/* ToDo: store the vector list instead of the pixelarray */
|
||||
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, "store_db: add file %s to database (nac=%d c=%04x)"
|
||||
"\n#",s3, box1->num_ac, c_out);
|
||||
|
||||
dx=box1->x1-box1->x0+1;
|
||||
dy=box1->y1-box1->y0+1;
|
||||
b.p = (unsigned char *) malloc( dx * dy );
|
||||
if( !b.p ){
|
||||
fprintf( stderr, "\nFATAL: malloc failed, skip store_db" );
|
||||
return 2;
|
||||
}
|
||||
if (copybox(box1->p, box1->x0, box1->y0, dx, dy, &b, dx * dy))
|
||||
return -1;
|
||||
|
||||
writepbm(s2,&b); /* What is to do on error? */
|
||||
free(b.p);
|
||||
|
||||
/* store the database line */
|
||||
/* some infos about box1->m1,..,m4 should added (base line, high etc.) */
|
||||
if (box1->num_ac && box1->tas[0]) {
|
||||
fprintf(f1, "%s \"%s\"\n",s3,box1->tas[0]);
|
||||
/* ToDo: what if tas contains '"'? */
|
||||
} else {
|
||||
if( (box1->c >= '0' && box1->c <= '9')
|
||||
|| (box1->c >= 'A' && box1->c <= 'Z')
|
||||
|| (box1->c >= 'a' && box1->c <= 'z') )
|
||||
fprintf(f1, "%s %c\n",s3,(char)box1->c);
|
||||
else {
|
||||
if (((box1->c)>>16)>>16)
|
||||
fprintf(f1, "%s %08x\n",s3,(unsigned int)box1->c);
|
||||
else
|
||||
fprintf(f1, "%s %04x\n",s3,(unsigned int)box1->c);
|
||||
}
|
||||
}
|
||||
fclose(f1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* function is only for user prompt on console to identify chars
|
||||
it prints out a part of pixmap b at point x0,y0 to stderr
|
||||
using dots .,; if no pixel, and @xoO for pixels
|
||||
*/
|
||||
void out_env(struct box *px ){
|
||||
int x0,y0,x1,y1,dx,dy,x,y,x2,y2,yy0,tx,ty,i,cs;
|
||||
char c1, c2; pix *b;
|
||||
cs=JOB->cfg.cs;
|
||||
yy0=px->y0;
|
||||
{ /* overwrite rest of arguments */
|
||||
b=px->p;
|
||||
x0=px->x0; x1=px->x1; dx=x1-x0+1;
|
||||
y0=px->y0; y1=px->y1; dy=y1-y0+1;
|
||||
y0-=2; y1+=2;
|
||||
if (px->m4 && y0>px->m1) y0=px->m1;
|
||||
if (px->m4 && y1<px->m4) y1=px->m4;
|
||||
if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
|
||||
if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
|
||||
if (x1-x0+1<62) { x0-=5; x1+=5; }
|
||||
if (y1-y0+1<10) { y0-= 4; y1+= 4; } /* fragment? */
|
||||
if (x0<0) x0=0; if (x1>=b->x) x1=b->x-1;
|
||||
if (y0<0) y0=0; if (y1>=b->y) y1=b->y-1;
|
||||
dx=x1-x0+1;
|
||||
dy=y1-y0+1; yy0=y0;
|
||||
fprintf(stderr,"\n# show box + environment");
|
||||
fprintf(stderr,"\n# show box x= %4d %4d d= %3d %3d r= %d %d",
|
||||
px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
|
||||
px->x - px->x0, px->y - px->y0);
|
||||
if (px->num_ac){ /* output table of chars and its probabilities */
|
||||
fprintf(stderr,"\n# list box char: ");
|
||||
for(i=0;i<px->num_ac && i<NumAlt;i++)
|
||||
/* output the (xml-)string (picture position, barcodes, glyphs, ...) */
|
||||
if (px->tas[i])
|
||||
fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
|
||||
else
|
||||
fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
if (px->dots && px->m2 && px->m1<y0) { yy0=px->m1; dy=px->y1-yy0+1; }
|
||||
}
|
||||
tx=dx/80+1;
|
||||
ty=dy/40+1; // step, usually 1, but greater on large maps
|
||||
fprintf(stderr,"# show pattern x= %4d %4d d= %3d %3d t= %d %d\n",
|
||||
x0,y0,dx,dy,tx,ty);
|
||||
if (dx>0)
|
||||
for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
|
||||
|
||||
/* image is the boxframe + environment in the original bitmap */
|
||||
for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
|
||||
c1='.';
|
||||
for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
|
||||
for(x2=x;x2<x+tx && x2<x0+dx;x2++)
|
||||
{ if((getpixel(b,x2,y2)<cs)) c1='#'; }
|
||||
// show pixels outside the box thinner/weaker
|
||||
if (x+tx-1 < px->x0 || x > px->x1
|
||||
|| y+ty-1 < px->y0 || y > px->y1) c1=((c1=='#')?'O':',');
|
||||
fprintf(stderr,"%c", c1 );
|
||||
}
|
||||
|
||||
c1=c2=' ';
|
||||
/* mark lines with < */
|
||||
if (px) if (y==px->m1 || y==px->m2 || y==px->m3 || y==px->m4) c1='<';
|
||||
if (y==px->y0 || y==px->y1) c2='-'; /* boxmarks */
|
||||
fprintf(stderr,"%c%c\n",c1,c2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
// second variant, for database (with slightly other behaviour)
|
||||
// new variant
|
||||
// look at the environment of the pixel too (contrast etc.)
|
||||
// detailed analysis only of diff pixels!
|
||||
//
|
||||
// 100% * distance, 0 is best fit
|
||||
// = similarity of 2 chars for recognition of noisy chars
|
||||
// weigth of pixels with only one same neighbour set to 0
|
||||
// look at contours too!
|
||||
ToDo: especially on small boxes distance should only be 0 if
|
||||
characters are 100% identical!
|
||||
*/
|
||||
// #define DEBUG 2
|
||||
int distance2( pix *p1, struct box *box1,
|
||||
pix *p2, struct box *box2, int cs){
|
||||
int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,
|
||||
x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2,tx,ty;
|
||||
#if DEBUG == 2
|
||||
if(JOB->cfg.verbose)
|
||||
fprintf(stderr," DEBUG: distance2\n");
|
||||
#endif
|
||||
x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
|
||||
dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);dx=dx1;
|
||||
dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);dy=dy1;
|
||||
if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) rbad++; // how to weight?
|
||||
// compare relations to baseline and upper line
|
||||
if(box1->m4>0 && box2->m4>0){ // used ???
|
||||
if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
|
||||
if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
|
||||
}
|
||||
tx=dx/16; if(dx<17)tx=1; // raster
|
||||
ty=dy/32; if(dy<33)ty=1;
|
||||
// compare pixels
|
||||
for( y=0;y<dy;y+=ty )
|
||||
for( x=0;x<dx;x+=tx ) { // try global shift too ???
|
||||
v1=((getpixel(p1,x1+x*dx1/dx,y1+y*dy1/dy)<cs)?1:0); i1=8; // better gray?
|
||||
v2=((getpixel(p2,x2+x*dx2/dx,y2+y*dy2/dy)<cs)?1:0); i2=8; // better gray?
|
||||
if(v1==v2) { rgood+=16; continue; } // all things are right!
|
||||
// what about different pixel???
|
||||
// test overlapp of surounding pixels ???
|
||||
v1=1; rbad+=4;
|
||||
v1=-1;
|
||||
for(i1=-1;i1<2;i1++)
|
||||
for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
|
||||
if( ((getpixel(p1,x1+x*dx1/dx+i1*(1+dx1/32),y1+y*dy1/dy+i2*(1+dy1/32))<cs)?1:0)
|
||||
!=((getpixel(p2,x2+x*dx2/dx+i1*(1+dx2/32),y2+y*dy2/dy+i2*(1+dy2/32))<cs)?1:0) ) v1++;
|
||||
}
|
||||
if(v1>0)
|
||||
rbad+=16*v1;
|
||||
}
|
||||
if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
|
||||
/* if width/high is not correct add badness */
|
||||
rc += ( abs(dx1*dy2-dx2*dy1) * 10 ) / (dy1*dy2);
|
||||
if (rc>100) rc=100;
|
||||
if(/* rc<10 && */ JOB->cfg.verbose /* &1024 */){
|
||||
#if DEBUG == 2
|
||||
fprintf(stderr," distance2 rc=%d rgood=%d rbad=%d\n",rc,rgood,rbad);
|
||||
// out_b(NULL,p1,box1->x0,box1->y0,box1->x1-box1->x0+1,
|
||||
// box1->y1-box1->y0+1,cs);
|
||||
// out_b(NULL,p2,box2->x0,box2->y0,box2->x1-box2->x0+1,
|
||||
// box2->y1-box2->y0+1,cs);
|
||||
out_x(box1);
|
||||
out_x(box2);
|
||||
#endif
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
wchar_t ocr_db(struct box *box1) {
|
||||
int dd = 1000, dist = 1000;
|
||||
wchar_t c = UNKNOWN;
|
||||
unsigned char buf[200]; /* Oct08 JS: add unsigned to avoid UTF problems */
|
||||
Box *box2, *box3;
|
||||
|
||||
if (!list_empty(&JOB->tmp.dblist)){
|
||||
box3 = (Box *)list_get_header(&JOB->tmp.dblist);
|
||||
if(JOB->cfg.verbose)
|
||||
fprintf(stderr,"\n#DEBUG: ocr_db (%d,%d) ",box1->x0, box1->y0);
|
||||
|
||||
for_each_data(&JOB->tmp.dblist) {
|
||||
box2 = (Box *)list_get_current(&JOB->tmp.dblist);
|
||||
/* do preselect!!! distance() slowly */
|
||||
dd = distance2( box2->p, box2, box1->p, box1, JOB->cfg.cs);
|
||||
if (dd <= dist) { /* new best fit */
|
||||
dist = dd;
|
||||
box3 = box2; /* box3 is a pointer and not copied box2 */
|
||||
|
||||
if (dist<100 && 100-dist >= JOB->cfg.certainty) {
|
||||
/* some deviation of the pattern is tolerated */
|
||||
int i, wa;
|
||||
for (i=0;i<box3->num_ac;i++) {
|
||||
wa = (100-dist)*box3->wac[i]/100; /* weight *= (100-dist) */
|
||||
if (box3->tas[i]) setas(box1,box3->tas[i],wa);
|
||||
else setac(box1,box3->tac[i],wa);
|
||||
}
|
||||
if (box3->num_ac) c=box3->tac[0]; /* 0 for strings (!UNKNOWN) */
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, " dist=%4d c= %c 0x%02x %s wc= %3d", dist,
|
||||
((box3->c>32 && box3->c<127) ? (char) box3->c : '.'),
|
||||
(int)box3->c, ((box3->tas[0])?box3->tas[0]:""), box3->wac[0]);
|
||||
}
|
||||
if (dd<=0 && ((box3->num_ac && box3->tas[0]) || box3->c >= 128
|
||||
|| !strchr ("l1|I0O", box3->c)))
|
||||
break; /* speedup if found */
|
||||
}
|
||||
} end_for_each(&JOB->tmp.dblist);
|
||||
|
||||
}
|
||||
|
||||
if( (JOB->cfg.mode&128) != 0 && c == UNKNOWN ) { /* prompt the user */
|
||||
/* should the output go to stderr or special pipe??? */
|
||||
int utf8_ok=0; /* trigger this flag if input is ok */
|
||||
int i, endchar; /* index */
|
||||
out_env(box1); /* old: out_x(box1); */
|
||||
fprintf(stderr,"The above pattern was not recognized.\n"
|
||||
"Enter UTF8 char or string for above pattern. Leave empty if unsure.\n"
|
||||
"Press RET at the end (ALT+RET to store into RAM only) : "
|
||||
); /* ToDo: empty + alt-return (0x1b 0x0a) for help? ^a for skip all */
|
||||
/* UTF-8 (man 7 utf-8):
|
||||
* 7bit = 0xxxxxxx (0000-007F)
|
||||
* 11bit = 110xxxxx 10xxxxxx (0080-07FF)
|
||||
* 16bit = 1110xxxx 10xxxxxx 10xxxxxx (0800-FFFF)
|
||||
* 21bit = 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
* 26bit = 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
* 31bit = 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
*/
|
||||
buf[0]=0;
|
||||
/* shift/ctrl/altgr-enter acts like enter or ^j or ^m,
|
||||
* alt-enter returns 0x1b 0x0a and returns from fgets()
|
||||
* ^d (EOF) returns (nil) from fgets()
|
||||
* x+(2*)ctrl-d returns from fgets() without returning a 0x0a
|
||||
* if not UTF-input-mode, we are in trouble?
|
||||
* ^a=0x01, ^b=0x02, ^e=05, ..., ToDo: meaning of no-input or <=space
|
||||
*/
|
||||
fgets((char *)buf,200,stdin); /* including \n=0x0a */
|
||||
dd=strlen((char *)buf);
|
||||
/* output hexcode if verbose set */
|
||||
if (JOB->cfg.verbose) {
|
||||
fprintf(stderr, "\n# fgets [%d]:", dd);
|
||||
for(i=0; i<dd; i++)
|
||||
fprintf(stderr, " %02x", (unsigned)((unsigned char)buf[i]));
|
||||
fprintf(stderr, "\n#");
|
||||
}
|
||||
/* we dont accept chars which could destroy database file */
|
||||
for (i=0; i<dd; i++) if (buf[i]<32) break; /* need unsigned char here */
|
||||
endchar=buf[i]; /* last char is 0x0a (ret) 0x00 (EOF) or 0x1b (alt+ret) */
|
||||
if (endchar==0x01) { i=0;JOB->cfg.mode&=~128; } /* skip all */
|
||||
buf[dd=i]=0; /* replace final 0x0a or other special codes */
|
||||
if (dd==1 && !(buf[0]&128)) { c=buf[0]; utf8_ok=1; } /* single char */
|
||||
if (dd>1 && dd<7) { /* try to decode single wide char (utf8) */
|
||||
int u0, u1; /* define UTF8-start sequences, u0=0bits u1=1bits */
|
||||
u0= 1<<(7-dd); /* compute start byte from UTF8-length */
|
||||
u1=255&~((1<<(8-dd))-1);
|
||||
/* count number of following 10xxxxxx bytes to i */
|
||||
for (i=1;i<dd;i++) if ((buf[i]&0xc0)!=0x80) break; /* 10xxxxxx */
|
||||
if (i==dd && (buf[0]&(u0|u1))==u1) { utf8_ok=1;
|
||||
c=buf[0]&(u0-1); /* 11..0x.. */
|
||||
for (i=1;i<dd;i++) { c<<=6; c|=buf[i]&0x3F; } /* 10xxxxxx */
|
||||
}
|
||||
}
|
||||
if (dd>0){ /* ToDo: skip space and tab too? */
|
||||
if (utf8_ok==1) { setac(box1, c, 100); } /* store single wchar */
|
||||
if (utf8_ok==0) { /* store a string of chars (UTF8-string) */
|
||||
c='_'; /* what should we do with c? probably a bad idea? */
|
||||
setas(box1, (char *)buf, 100);
|
||||
}
|
||||
/* decide between
|
||||
* 0) just help gocr to find the results and (dont remember, 0x01)
|
||||
* 1) help and remember in the same run (store to memory, 0x1b)
|
||||
* 2) expand the database (dont store ugly chars to the database!)
|
||||
*/
|
||||
if (endchar!=0x01){ /* ^a before hit return */
|
||||
/* is there a reason to dont store to memory? */
|
||||
list_app(&JOB->tmp.dblist, box1); /* append to list for 1+2 */
|
||||
}
|
||||
if (endchar!=0x01 && endchar!=0x1b){
|
||||
store_db(box1); /* store to disk for 2 */
|
||||
}
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, " got char= %c 16bit= 0x%04x string= \"%s\"\n",
|
||||
((c>32 && c<127)?(char)c:'.'), (int)c, buf);
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
943
ActiveX/ASCOfficeUtils/GOCR/src/detect.c
Normal file
943
ActiveX/ASCOfficeUtils/GOCR/src/detect.c
Normal file
@@ -0,0 +1,943 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2007 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
check README for my email address
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h> // toupper, tolower
|
||||
#include "pgm2asc.h"
|
||||
#include "gocr.h"
|
||||
|
||||
// ----- detect lines ---------------
|
||||
/* suggestion: Fourier transform and set line frequency where the
|
||||
amplitude has a maximum (JS: slow and not smarty enough).
|
||||
|
||||
option: range for line numbers 1..1000 or similar
|
||||
todo: look for thickest line, and divide if thickness=2*mean_thickness
|
||||
Set these elements of the box structs:
|
||||
|
||||
m1 <-- top of upper case letters and (bdfhkl) (can differ)
|
||||
m2 <-- top of letters (acegmnopqrsuvwxyz)
|
||||
m3 <-- baseline
|
||||
m4 <-- bottom of hanging letters (gqpy)
|
||||
|
||||
performance can be improved by working with a temporary
|
||||
list of boxes of the special text line
|
||||
|
||||
- Jun23,00 more robustness of m3 (test liebfrau1)
|
||||
- Feb01,02 more robustness of m4 (test s46_084.pgm)
|
||||
- Dec03,12 fix problems with footnotes
|
||||
ToDo:
|
||||
- generate lists of boxes per line (faster access)
|
||||
- use statistics
|
||||
- for each box look at it neighbours and set box-m1..m4
|
||||
- m[1..4].max .min if m4.min-m3.max<1 probability lower
|
||||
*/
|
||||
int detect_lines1(pix * p, int x0, int y0, int dx, int dy)
|
||||
{
|
||||
int i, jj, j2, y, yy, my, mi, mc, i1, i2, i3, i4,
|
||||
m1, m2, m3, m4, ma1, ma2, ma3, ma4, m3pre, m4pre;
|
||||
struct box *box2, *box3; /* box3 is for verbose / debugging */
|
||||
struct tlines *lines = &JOB->res.lines;
|
||||
|
||||
/* ToDo: optional read line-data from external source??? */
|
||||
if (lines->num == 0) { // initialize one dummy-line for pictures etc.
|
||||
lines->m4[0] = 0;
|
||||
lines->m3[0] = 0;
|
||||
lines->m2[0] = 0;
|
||||
lines->m1[0] = 0;
|
||||
lines->x0[0] = p->x; /* expand to left end during detection */
|
||||
lines->x1[0] = 0; /* expand to right end */
|
||||
lines->pitch[0] = JOB->cfg.spc; /* default word pitch */
|
||||
lines->mono[0] = 0; /* default spacing, 0 = prop */
|
||||
lines->num++;
|
||||
}
|
||||
i = lines->num;
|
||||
if (dy < 4)
|
||||
return 0; /* image is to low for latin chars */
|
||||
my = jj = 0;
|
||||
// get the mean height of all hollow chars
|
||||
// (better than mean value of everything including bg-pattern or dust?)
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if ( box2->c != PICTURE
|
||||
&& box2->num_frames>1 && box2->num_frames<3 /* 1 or 2 holes */
|
||||
&& box2->y0 >= y0 && box2->y1 <= y0 + dy
|
||||
&& box2->x0 >= x0 && box2->x1 <= x0 + dx
|
||||
&& box2->frame_vol[0]>0
|
||||
&& box2->frame_vol[1]<0
|
||||
) {
|
||||
jj++;
|
||||
my += box2->y1 - box2->y0 + 1;
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if (jj==0) {
|
||||
// get the mean height of all chars
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if ( box2->c != PICTURE
|
||||
&& box2->y1 - box2->y0 + 1 >= 4 /* 4x6 font */
|
||||
&& box2->y0 >= y0 && box2->y1 <= y0 + dy
|
||||
&& box2->x0 >= x0 && box2->x1 <= x0 + dx ) {
|
||||
jj++;
|
||||
my += box2->y1 - box2->y0 + 1;
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
}
|
||||
if (jj == 0)
|
||||
return 0; /* no chars detected */
|
||||
|
||||
|
||||
/* ToDo: a better way could be to mark good boxes (of typical high a-zA-Z0-9)
|
||||
* first and handle only marked boxes for line scan, exclude ?!,.:;etc
|
||||
* but without setect the chars itself (using good statistics)
|
||||
* see adjust_text_lines()
|
||||
*/
|
||||
my /= jj; /* we only care about chars with high arround my */
|
||||
if (JOB->cfg.verbose & 16)
|
||||
fprintf(stderr,"\n# detect_lines1(%d %d %d %d) vvv&16 chars=%d my=%d\n# ",
|
||||
x0, y0, dx, dy, jj, my);
|
||||
// "my" is the average over the whole image (bad, if different fontsizes)
|
||||
|
||||
if (my < 4)
|
||||
return 0; /* mean high is to small => error */
|
||||
|
||||
m4pre=m3pre=y0; /* lower bond of upper line */
|
||||
// better function for scanning line around a letter ???
|
||||
// or define lines around known chars "eaTmM"
|
||||
for (j2 = y = y0; y < y0 + dy; y++) {
|
||||
// look for max. of upper and lower bound of next line
|
||||
m1 = y0 + dy;
|
||||
jj = 0;
|
||||
#if 1
|
||||
/* this is only for test runs */
|
||||
if (JOB->cfg.verbose & 16)
|
||||
fprintf(stderr,"searching new line %d\n# ",i /* lines->num */);
|
||||
#endif
|
||||
|
||||
box3 = NULL; /* mark the most upper box starting next line */
|
||||
// find highest point of next line => store to m1-min (m1>=y)
|
||||
// only objects greater 2/3*my and smaller 3*my are allowed
|
||||
// a higher "!" at end of line can result in a to low m1
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->line>0 || box2->c == PICTURE) continue;
|
||||
if (lines->dx)
|
||||
yy = lines->dy * box2->x0 / (lines->dx); /* correct crooked lines */
|
||||
else yy=0;
|
||||
if ( box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
|
||||
&& box2->x0 >= x0 && box2->x1 < x0 + dx // within box ?
|
||||
&& box2->c != PICTURE // no picture
|
||||
&& box2->num_boxes <= 1 // ignore 2 for "!?i" 3 for "ä"
|
||||
&& 3 * (box2->y1 - box2->y0) > 2 * my // not to small
|
||||
&& (box2->y1 - box2->y0) < 3 * my // not to big
|
||||
&& (box2->y1 - box2->y0) > 4) // minimum absolute size
|
||||
{
|
||||
if (box2->y0 < m1 + yy) {
|
||||
m1 = box2->y0 - yy; /* highest upper boundary */
|
||||
box3 = box2;
|
||||
}
|
||||
// fprintf(stderr,"\n %3d %3d %+3d %d m1= %3d",
|
||||
// box2->x0, box2->y0, box2->y1 - box2->y0 + 1, box2->num_boxes, m1);
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if (!box3 || m1 >= y0+dy) break; /* no further line found */
|
||||
if (JOB->cfg.verbose & 16)
|
||||
fprintf(stderr," most upper box at new line xy= %4d %4d %+4d %+4d\n# ",
|
||||
box3->x0, box3->y0, box3->x1-box3->x0, box3->y1-box3->y0);
|
||||
|
||||
// at the moment values depend from single chars, which can
|
||||
// result in bad values (ex: 4x6 /\=)
|
||||
// ToDo: 2) mean size of next line (store list of y0,y1)
|
||||
// ToDo: 3) count num0[(y0-m1)*16/my], num1[(y1-m1)*16/my]
|
||||
// ToDo: or down-top search horizontal nerarest neighbours
|
||||
lines->x0[i] = x0 + dx - 1; /* expand during operation to left end */
|
||||
lines->x1[i] = x0; /* expand to the right end of line */
|
||||
m4=m2=m1; mi=m1+my; m3=m1+2*my; jj=0;
|
||||
// find limits for upper bound, base line and ground line
|
||||
// m2-max m3-min m4-max
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->line>0 || box2->c == PICTURE) continue;
|
||||
if ( box2->y0 < y0 || box2->y1 >= y0 + dy
|
||||
|| box2->x0 < x0 || box2->x1 >= x0 + dx ) continue; // out of image
|
||||
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx);
|
||||
else yy=0;
|
||||
/* check for ij-dots, used if chars of same high */
|
||||
if ( box2->y0 >= y + yy
|
||||
&& box2->y0 >= y
|
||||
&& (box2->y1 - box2->y0) < my
|
||||
&& box2->y1 < m1 + yy + my/4
|
||||
&& box2->y0 < mi + yy ) {
|
||||
mi = box2->y0 - yy; /* highest upper boundary i-dot */
|
||||
}
|
||||
// fprintf(stderr,"\n check %3d %3d-%3d y=%d yy=%d m1=%d", box2->x0, box2->y0, box2->y1, y, yy, m1);
|
||||
/* get m2-max m3-min m4-max */
|
||||
if ( box2->y0 >= y + yy // lower than y
|
||||
&& 3 * (box2->y1 - box2->y0 + 1) > 2 * my // right size ?
|
||||
&& (box2->y1 - box2->y0 + 1) < 3 * my // font mix, size = 2.6*my
|
||||
&& (box2->y1 - box2->y0 + 1) > 3 // 4x6 lowercase=4
|
||||
&& box2->y0 >= m1 // in m1 range?
|
||||
&& box2->y0 <= m1 + yy + 9 * my / 8 // my can be to small if mixed
|
||||
// ToDo: we need a better (local?) algorithm for big headlines > 2*my
|
||||
&& box2->y1 <= m1 + yy + 3 * my
|
||||
&& box2->y1 >= m1 + yy + my / 2
|
||||
// lines can differ in high, my may be to small (smaller headlines)
|
||||
&& box2->y0+box2->y1 <= 2*box3->y1
|
||||
)
|
||||
{
|
||||
jj++; // count chars for debugging purpose
|
||||
if (box2->y0 > m2 + yy) {
|
||||
m2 = box2->y0 - yy; /* highest upper boundary */
|
||||
if (JOB->cfg.verbose & 16)
|
||||
fprintf(stderr," set m2= %d yy= %d\n# ",m2, yy);
|
||||
}
|
||||
if (box2->y1 > m4 + yy && (my>6 || box2->y1 < m3+my)) {
|
||||
m4 = box2->y1 - yy; /* lowest lower boundary, small font lines can touch */
|
||||
}
|
||||
if ( box2->y1 < m3 + yy
|
||||
&& ( ( 2*box2->y1 > m2+ m4+yy && m2>m1)
|
||||
|| ( 4*box2->y1 > m1+3*m4+yy) ) ) // care for TeX: \(^1\)Footnote 2003
|
||||
/* "'!?" could cause trouble here, therefore this lines */
|
||||
/* ToDo: get_bw costs time, check pre and next */
|
||||
if( get_bw(box2->x0,box2->x1,box2->y1+1 ,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 0
|
||||
|| get_bw(box2->x0,box2->x1,box2->y1+my/2,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 1
|
||||
|| num_cross(box2->x0,box2->x1,(box2->y0+box2->y1)/2,(box2->y0+box2->y1)/2,box2->p,JOB->cfg.cs)>2 )
|
||||
{
|
||||
m3 = box2->y1 - yy; /* highest lower boundary */
|
||||
// printf("\n# set1 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
|
||||
// out_x(box2);
|
||||
}
|
||||
if (box2->y0 + box2->y1 > 2*(m3 + yy)
|
||||
&& box2->y1 < m4 + yy - my/4 -1
|
||||
&& box2->y1 >= (m2 + m4)/2 // care for TeX: \(^1\)Footnote 2003
|
||||
&& m2 > m1 ) // be sure to not use ', m2 must be ok
|
||||
{
|
||||
m3 = box2->y1 - yy; /* highest lower boundary */
|
||||
// printf("\n# set2 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
|
||||
// out_x(box2);
|
||||
}
|
||||
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
|
||||
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
|
||||
// printf(" m=%3d %+2d %+2d %+2d yy=%3d\n",m1,m2-m1,m3-m1,m4-m1,yy);
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
|
||||
#if 1
|
||||
/* this is only for test runs */
|
||||
if (JOB->cfg.verbose & 16)
|
||||
fprintf(stderr," step 1 y=%4d m= %4d %+3d %+3d %+3d"
|
||||
" my=%2d chars=%3d\n# ",
|
||||
y, m1, m2-m1, m3-m1, m4-m1, my, jj);
|
||||
#endif
|
||||
|
||||
if (m3 == m1)
|
||||
break;
|
||||
#if 1 /* make averages about the line */
|
||||
// same again better estimation
|
||||
mc = (3 * m3 + m1) / 4; /* lower center ? */
|
||||
ma1 = ma2 = ma3 = ma4 = i1 = i2 = i3 = i4 = jj = 0;
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->line>0 || box2->c == PICTURE) continue;
|
||||
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
|
||||
if (box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
|
||||
&& box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
|
||||
&& box2->c != PICTURE // no picture
|
||||
&& 2 * (box2->y1 - box2->y0) > my // right size ?
|
||||
&& (box2->y1 - box2->y0) < 4 * my) {
|
||||
if ( box2->y0 - yy >= m1-my/4
|
||||
&& box2->y0 - yy <= m2+my/4
|
||||
&& box2->y1 - yy >= m3-my/4
|
||||
&& box2->y1 - yy <= m4+my/4 ) { /* its within allowed range! */
|
||||
// jj++; // not used
|
||||
if (abs(box2->y0 - yy - m1) <= abs(box2->y0 - yy - m2))
|
||||
{ i1++; ma1 += box2->y0 - yy; }
|
||||
else { i2++; ma2 += box2->y0 - yy; }
|
||||
if (abs(box2->y1 - yy - m3) < abs(box2->y1 - yy - m4))
|
||||
{ i3++; ma3 += box2->y1 - yy; }
|
||||
else { i4++; ma4 += box2->y1 - yy; }
|
||||
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
|
||||
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
|
||||
}
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
|
||||
if (i1) m1 = (ma1+i1/2) / i1; /* best rounded */
|
||||
if (i2) m2 = (ma2+i2/2) / i2;
|
||||
if (i3) m3 = (ma3+i3-1) / i3; /* round up */
|
||||
if (i4) m4 = (ma4+i4-1) / i4;
|
||||
// printf("\n# .. set3 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
|
||||
|
||||
#endif
|
||||
|
||||
/* expand right and left end of line */
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->line>0 || box2->c == PICTURE) continue;
|
||||
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
|
||||
if ( box2->y0 >= y0 && box2->y1 < y0 + dy
|
||||
&& box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
|
||||
&& box2->c != PICTURE // no picture
|
||||
&& box2->y0 >= m1-1
|
||||
&& box2->y0 <= m4
|
||||
&& box2->y1 >= m1
|
||||
&& box2->y1 <= m4+1 ) { /* its within line */
|
||||
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
|
||||
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
|
||||
#if 1
|
||||
/* this is only for test runs */
|
||||
if (JOB->cfg.verbose & 16)
|
||||
fprintf(stderr," step 2 y=%4d m= %4d %+3d %+3d %+3d\n# ",
|
||||
y,m1,m2-m1,m3-m1,m4-m1);
|
||||
#endif
|
||||
|
||||
if (m4 == m1) {
|
||||
if(m3+m4>2*y) y = (m4+m3)/2; /* lower end may overlap the next line */
|
||||
continue;
|
||||
}
|
||||
jj=0;
|
||||
lines->wt[i] = 100;
|
||||
if (5 * (m2 - m1 +1) < m3 - m2 || (m2 - m1) < 2) jj|=1; /* same high */
|
||||
if (5 * (m4 - m3 +1) < m3 - m2 || (m4 - m3) < 1) jj|=2; /* same base */
|
||||
if (jj&1) lines->wt[i] = 75*lines->wt[i]/100;
|
||||
if (jj&2) lines->wt[i] = 75*lines->wt[i]/100;
|
||||
if (jj>0 && JOB->cfg.verbose) {
|
||||
fprintf(stderr," trouble on line %d, wt*100= %d\n",i,lines->wt[i]);
|
||||
fprintf(stderr,"# m= %4d %+3d %+3d %+3d\n",m1,m2-m1,m3-m1,m4-m1);
|
||||
fprintf(stderr,"# i= %3d %3d %3d %3d (counts)\n",i1,i2,i3,i4);
|
||||
if (jj==3) fprintf(stderr,"# all boxes of same high!\n# ");
|
||||
if (jj==1) fprintf(stderr,"# all boxes of same upper bound!\n# ");
|
||||
if (jj==2) fprintf(stderr,"# all boxes of same lower bound!\n# ");
|
||||
}
|
||||
/* ToDo: check for dots ij,. to get the missing information */
|
||||
#if 1
|
||||
/* jj=3: ABCDEF123456 or mnmno or gqpy or lkhfdtb => we are in trouble */
|
||||
if (jj==3 && (m4-m1)>my) { jj=0; m2=m1+my/8+1; m4=m3+my/8+1; } /* ABC123 */
|
||||
/* using idots, may fail on "ABCDEFGÄÜÖ" */
|
||||
if (jj==3 && mi>0 && mi<m1 && mi>m4pre) { jj=2; m1=mi; } /* use ij dots */
|
||||
if (jj==1 && m2-(m3-m2)/4>m3pre ) { /* expect: acegmnopqrsuvwxyz */
|
||||
if (m1-m4pre<m4-m1) /* fails for 0123ABCD+Q$ */
|
||||
m1 = ( m2 + m4pre ) / 2 ;
|
||||
else
|
||||
m1 = ( m2 - (m3 - m2) / 4 );
|
||||
}
|
||||
if (jj==3)
|
||||
m2 = m1 + (m3 - m1) / 4 + 1; /* expect: 0123456789ABCDEF */
|
||||
if ( (m2 - m1) < 2)
|
||||
m2 = m1 + 2; /* font hight < 8 pixel ? */
|
||||
if (jj&2)
|
||||
m4 = m3 + (m4 - m1) / 4 + 1; /* chars have same lower base */
|
||||
if (jj>0 && JOB->cfg.verbose & 16) {
|
||||
fprintf(stderr," m= %4d %+2d %+2d %+2d my= %4d\n# ",
|
||||
m1, m2-m1, m3-m1, m4-m1, my);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
{ // empty space between lines
|
||||
lines->m4[i] = m4;
|
||||
lines->m3[i] = m3;
|
||||
lines->m2[i] = m2;
|
||||
lines->m1[i] = m1;
|
||||
lines->pitch[i] = JOB->cfg.spc; /* default word pitch */
|
||||
lines->mono[i] = 0; /* default spacing, 0=prop, 1=mono */
|
||||
if (JOB->cfg.verbose & 16)
|
||||
fprintf(stderr, " m= %4d %+3d %+3d %+3d w= %d (line=%d)\n# ",
|
||||
m1, m2 - m1, m3 - m1, m4 - m1, lines->wt[i], i);
|
||||
if (i < MAXlines && m4 - m1 > 4)
|
||||
i++;
|
||||
if (i >= MAXlines) {
|
||||
fprintf(stderr, "Warning: lines>MAXlines\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (m3+m4>2*y) y = (m3+m4)/2; /* lower end may overlap the next line */
|
||||
if (m3>m3pre) m3pre = m3; else m3=y0; /* set for next-line scan */
|
||||
if (m4>m4pre) m4pre = m4; else m4=y0; /* set for next-line scan */
|
||||
}
|
||||
lines->num = i;
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, " num_lines= %d", lines->num-1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ----- layout analyzis of dx*dy region at x0,y0 -----
|
||||
// ----- detect lines via recursive division (new version) ---------------
|
||||
// what about text in frames???
|
||||
// ToDo: change to bottom-top analyse or/and take rotation into account
|
||||
int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r){
|
||||
int i,x2,y2,x3,y3,x4,y4,x5,y5,y6,mx,my,x30,x31,y30,y31;
|
||||
struct box *box2,*box3;
|
||||
// shrink box
|
||||
if(dx<=0 || dy<=0) return 0;
|
||||
if(y0+dy< p->y/128 && y0==0) return 0; /* looks like dust */
|
||||
if(y0>p->y-p->y/128 && y0+dy==p->y) return 0; /* looks like dust */
|
||||
|
||||
if(r>1000){ return -1;} // something is wrong
|
||||
if(JOB->cfg.verbose)fprintf(stderr,"\n# r=%2d ",r);
|
||||
|
||||
mx=my=i=0; // mean thickness
|
||||
// remove border, shrink size
|
||||
x2=x0+dx-1; // min x
|
||||
y2=y0+dy-1; // min y
|
||||
x3=x0; // max x
|
||||
y3=y0; // max y
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if(box3->y0>=y0 && box3->y1<y0+dy &&
|
||||
box3->x0>=x0 && box3->x1<x0+dx)
|
||||
{
|
||||
if( box3->x1 > x3 ) x3=box3->x1; // max x
|
||||
if( box3->x0 < x2 ) x2=box3->x0; // min x
|
||||
if( box3->y1 > y3 ) y3=box3->y1; // max y
|
||||
if( box3->y0 < y2 ) y2=box3->y0; // min y
|
||||
if(box3->c!=PICTURE)
|
||||
if( box3->y1 - box3->y0 > 4 )
|
||||
{
|
||||
i++;
|
||||
mx+=box3->x1-box3->x0+1; // mean x
|
||||
my+=box3->y1-box3->y0+1; // mean y
|
||||
}
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
x0=x2; dx=x3-x2+1;
|
||||
y0=y2; dy=y3-y2+1;
|
||||
|
||||
if(i==0 || dx<=0 || dy<=0) return 0;
|
||||
mx/=i;my/=i;
|
||||
// better look for widest h/v-gap, ToDo: vertical lines?
|
||||
if(r<8){ // max. depth
|
||||
|
||||
// detect widest horizontal gap
|
||||
y2=y3=y4=y5=y6=0;
|
||||
x2=x3=x4=x5=y5=0;// min. 3 lines
|
||||
// position and thickness of gap, y6=num_gaps, nbox^2 ops
|
||||
for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if( box2->c!=PICTURE ) /* ToDo: not sure, that this is a good idea */
|
||||
if( box2->y0>=y0 && box2->y1<y0+dy
|
||||
&& box2->x0>=x0 && box2->x1<x0+dx
|
||||
&& box2->y1-box2->y0>my/2 ){ // no pictures & dust???
|
||||
|
||||
y4=y0+dy-1; // nearest vert. box
|
||||
x4=x0+dx-1;
|
||||
// ToDo: rotate back box2->x1,y1 to x21,y21
|
||||
// look for nearest lowest (y4) and right (x4) neighbour
|
||||
// of every box (box2)
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if(box3!=box2)
|
||||
if(box3->y0>=y0 && box3->y1<y0+dy)
|
||||
if(box3->x0>=x0 && box3->x1<x0+dx)
|
||||
if(box3->c!=PICTURE) /* ToDo: not sure, that this is a good idea */
|
||||
if(box3->y1-box3->y0>my/2 ){
|
||||
// ToDo: here we need the rotation around box2
|
||||
x30=box3->x0;
|
||||
x31=box3->x1;
|
||||
y30=box3->y0;
|
||||
y31=box3->y1;
|
||||
// get min. distances to lower and to right direction
|
||||
if( y31 > box2->y1 && y30 < y4 ) y4=y30-1;
|
||||
if( x31 > box2->x1 && x30 < x4 ) x4=x30-1;
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
// set the witdht and position of largest hor./vert. gap
|
||||
// largest gap: width position
|
||||
if( y4-box2->y1 > y3 ) { y3=y4-box2->y1; y2=(y4+box2->y1)/2; }
|
||||
if( x4-box2->x1 > x3 ) { x3=x4-box2->x1; x2=(x4+box2->x1)/2; }
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
// fprintf(stderr,"\n widest y-gap= %4d %4d",y2,y3);
|
||||
// fprintf(stderr,"\n widest x-gap= %4d %4d",x2,x3);
|
||||
|
||||
i=0; // i=1 at x, i=2 at y
|
||||
// this is the critical point
|
||||
// is this a good decision or not???
|
||||
if(x3>0 || y3>0){
|
||||
if(x3>mx && x3>2*y3 && (dy>5*x3 || (x3>10*y3 && y3>0))) i=1; else
|
||||
if(dx>5*y3 && y3>my) i=2;
|
||||
}
|
||||
|
||||
// compare with largest box???
|
||||
for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if( box2->c == PICTURE )
|
||||
if( box2->y0>=y0 && box2->y1<y0+dy
|
||||
&& box2->x0>=x0 && box2->x1<x0+dx )
|
||||
{ // hline ???
|
||||
// largest gap: width position
|
||||
if( box2->x1-box2->x0+4 > dx && box2->y1+4<y0+dy ) { y3=1; y2=box2->y1+1; i=2; break; }
|
||||
if( box2->x1-box2->x0+4 > dx && box2->y0-4>y0 ) { y3=1; y2=box2->y0-1; i=2; break; }
|
||||
if( box2->y1-box2->y0+4 > dy && box2->x1+4<x0+dx ) { x3=1; x2=box2->x1+1; i=1; break; }
|
||||
if( box2->y1-box2->y0+4 > dy && box2->x0-4>x0 ) { x3=1; x2=box2->x0-1; i=1; break; }
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if(JOB->cfg.verbose)fprintf(stderr," i=%d",i);
|
||||
|
||||
if(JOB->cfg.verbose && i) fprintf(stderr," divide at %s x=%4d y=%4d dx=%4d dy=%4d",
|
||||
((i)?( (i==1)?"x":"y" ):"?"),x2,y2,x3,y3);
|
||||
// divide horizontally if v-gap is thicker than h-gap
|
||||
// and length is larger 5*width
|
||||
if(i==1){ detect_lines2(p,x0,y0,x2-x0+1,dy,r+1);
|
||||
return detect_lines2(p,x2,y0,x0+dx-x2+1,dy,r+1); }
|
||||
// divide vertically
|
||||
if(i==2){ detect_lines2(p,x0,y0,dx,y2-y0+1,r+1);
|
||||
return detect_lines2(p,x0,y2,dx,y0+dy-y2+1,r+1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if(JOB->cfg.verbose) if(dx<5 || dy<7)fprintf(stderr," empty box");
|
||||
if(dx<5 || dy<7) return 0; // do not care about dust
|
||||
if(JOB->cfg.verbose)fprintf(stderr, " box detected at %4d %4d %4d %4d",x0,y0,dx,dy);
|
||||
if(JOB->tmp.ppo.p){
|
||||
for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0 ,255,16);
|
||||
for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0+dy-1,255,16);
|
||||
for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0 ,y0+i ,255,16);
|
||||
for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0+dx-1,y0+i ,255,16);
|
||||
// writebmp("out10.bmp",p2,JOB->cfg.verbose); // colored should be better
|
||||
}
|
||||
return detect_lines1(p,x0-0*1,y0-0*2,dx+0*2,dy+0*3);
|
||||
|
||||
/*
|
||||
struct tlines *lines = &JOB->res.lines;
|
||||
i=lines->num; lines->num++;
|
||||
lines->m1[i]=y0; lines->m2[i]=y0+5*dy/16;
|
||||
lines->m3[i]=y0+12*dy/16; lines->m4[i]=y0+dy-1;
|
||||
lines->x0[i]=x0; lines->x1[i]=x0+dx-1;
|
||||
if(JOB->cfg.verbose)fprintf(stderr," - line= %d",lines->num);
|
||||
return 0;
|
||||
*/
|
||||
}
|
||||
|
||||
/* ToDo: herons algorithm for square root x=(x+y/x)/2 is more efficient
|
||||
* than interval subdivision (?) (germ.: Intervallschachtelung)
|
||||
* without using matlib
|
||||
* see http://www.math.vt.edu/people/brown/doc/sqrts.pdf
|
||||
*/
|
||||
int my_sqrt(int x){
|
||||
int y0=0,y1=x,ym;
|
||||
for (;y0<y1-1;){
|
||||
ym=(y0+y1)/2;
|
||||
if (ym*ym<x) y0=ym; else y1=ym;
|
||||
}
|
||||
return y0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Detect rotation angle (one for whole image)
|
||||
** old: longest text-line and determining the angle of this line.
|
||||
*
|
||||
* search right nearest neighbour of each box and average vectors
|
||||
* to get the text orientation,
|
||||
* upside down decision is not made here (I dont know how to do it)
|
||||
* ToDo: set job->res.lines.{dx,dy}
|
||||
* pass 1: get mean vector to nearest char
|
||||
* pass 2: get mean vector to nearest char without outriders to pass 1
|
||||
* extimate direction as (dx,dy,num)[pass]
|
||||
* ToDo: estimate an error, boxes only work fine for zero-rotation
|
||||
* for 45 degree use vectors, not boxes to get base line
|
||||
*/
|
||||
#define INorm 1024 /* integer unit 1.0 */
|
||||
int detect_rotation_angle(job_t *job){
|
||||
struct box *box2, *box3,
|
||||
*box_nn; /* nearest neighbour box */
|
||||
int x2, y2, x3, y3, dist, mindist, pass,
|
||||
rx=0, ry=0, re=0, // final result
|
||||
/* to avoid 2nd run, wie store pairs in 2 different categories */
|
||||
nn[4]={0,0,0,0}, /* num_pairs used for estimation [(pass-1)%2,pass%2] */
|
||||
dx[4]={0,0,0,0}, /* x-component of rotation vector per pass */
|
||||
dy[4]={0,0,0,0}, /* y-component of rotation vector per pass */
|
||||
er[4]={INorm/4,0,0,0}; /* mean angle deviation to pass-1 (radius^2) */
|
||||
// de; /* ToDo: absolute maximum error (dx^2+dy^2) */
|
||||
// ToDo: next pass: go to bigger distances and reduce max error
|
||||
// error is diff between passes? or diff of bottoms and top borders (?)
|
||||
|
||||
rx=1024; ry=0; // default
|
||||
for (pass=0;pass<4;pass++) {
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if (box2->c==PICTURE) continue;
|
||||
/* subfunction probability of char */
|
||||
// i?
|
||||
// if (box2->x1 - box2->x0 < 3) continue; /* smallest font is 4x6 */
|
||||
if (box2->y1 - box2->y0 < 4) continue;
|
||||
/* set maximum possible distance */
|
||||
box_nn=box2; // initial box to compare with
|
||||
|
||||
// ToDo: clustering or majority
|
||||
// the algorithm is far from being perfect, pitfalls are likely
|
||||
// but its better than the old algorithm, ToDo: database-rotated-images
|
||||
mindist = job->src.p.x * job->src.p.x + job->src.p.y * job->src.p.y;
|
||||
/* get middle point of the box */
|
||||
x2 = (box2->x0 + box2->x1)/2;
|
||||
y2 = (box2->y0 + box2->y1)/2;
|
||||
re=0;
|
||||
/* search for nearest neighbour box_nn[pass+1] of box_nn[pass] */
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box3 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
/* try to select only potential neighbouring chars */
|
||||
/* select out all senseless combinations */
|
||||
if (box3->c==PICTURE || box3==box2) continue;
|
||||
x3 = (box3->x0 + box3->x1)/2;
|
||||
y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
|
||||
if (x3<x2) continue; /* simplify by going right only */
|
||||
// through-away deviation of angles if > pass-1?
|
||||
// scalprod max in direction, cross prod min in direction
|
||||
// a,b (vectors): <a,b>^2/(|a|*|b|)^2 = 0(90deg)..0.5(45deg).. 1(0deg)
|
||||
// * 1024 ??
|
||||
if (pass>0) { // new variant = scalar product
|
||||
// danger of int overflow, ToDo: use int fraction
|
||||
re =(int) ((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
|
||||
*(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
|
||||
/(1.*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
|
||||
*(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1])));
|
||||
if (INorm-re>er[pass-1]) continue; // hits mean deviation
|
||||
}
|
||||
/* neighbours should have same order of size (?) */
|
||||
if (3*(box3->y1-box3->y0+4) < 2*(box2->y1-box2->y0+1)) continue;
|
||||
if (2*(box3->y1-box3->y0+1) > 3*(box2->y1-box2->y0+4)) continue;
|
||||
if (2*(box3->x1-box3->x0+1) > 5*(box2->x1-box2->x0+4)) continue;
|
||||
if (5*(box3->x1-box3->x0+4) < 2*(box2->x1-box2->x0+1)) continue;
|
||||
/* should be in right range, Idea: center3 outside box2? noholes */
|
||||
if ((x3<box2->x1-1) && (x3>box2->x0+1)
|
||||
&& (y3<box2->y1-1) && (y3>box2->y0+1)) continue;
|
||||
// if chars are of different size, connect careful
|
||||
if ( abs(x3-x2) > 2*(box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
|
||||
if ( abs(y3-y2) > (box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
|
||||
dist = (y3-y2)*(y3-y2) + (x3-x2)*(x3-x2);
|
||||
// make distances in pass-1 directions shorter or continue if not in pass-1 range?
|
||||
if (dist<9) continue; /* minimum distance^2 is 3^2 */
|
||||
if (dist<mindist) { mindist=dist; box_nn=box3;}
|
||||
// fprintf(stderr,"x y %d %d %d %d dist %d min %d\n",
|
||||
// x2,y2,x3,y3,dist,mindist);
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
|
||||
if (box_nn==box2) continue; /* has no neighbour, next box */
|
||||
|
||||
box3=box_nn; dist=mindist;
|
||||
x3 = (box3->x0 + box3->x1)/2;
|
||||
y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
|
||||
// dist = my_sqrt(1024*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2)));
|
||||
// compare with first box
|
||||
x2 = (box2->x0 + box2->x1)/2;
|
||||
y2 = (box2->y0 + box2->y1)/2;
|
||||
// if the high of neighbouring boxes differ, use min diff (y0,y1)
|
||||
if (pass>0 && 16*abs(dy[pass-1]) < dx[pass-1]) // dont work for strong rot.
|
||||
if (abs(box2->y1-box2->y0-box3->y1+box3->y0)>(box2->y1-box2->y0)/8) {
|
||||
// ad eh ck ...
|
||||
if (abs(box2->y1-box3->y1)<abs(y3-y2)) { y2=box2->y1; y3=box3->y1; }
|
||||
// ag ep qu ...
|
||||
if (abs(box2->y0-box3->y0)<abs(y3-y2)) { y2=box2->y0; y3=box3->y0; }
|
||||
}
|
||||
if (abs(x3-x2)<4) continue;
|
||||
dx[pass]+=(x3-x2)*1024; /* normalized before averaging */
|
||||
dy[pass]+=(y3-y2)*1024; /* 1024 is for the precision */
|
||||
nn[pass]++;
|
||||
if (pass>0) { // set error = mean deviation from pass -1
|
||||
re = INorm-(int)((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
|
||||
*(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
|
||||
/((1.*(x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
|
||||
*(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1]))
|
||||
);
|
||||
er[pass]+=re;
|
||||
}
|
||||
#if 0
|
||||
if(JOB->cfg.verbose)
|
||||
fprintf(stderr,"# next nb (x,y,dx,dy,re) %6d %6d %5d %5d %5d pass %d\n",
|
||||
x2, y2, x3-x2, y3-y2, re, pass+1);
|
||||
#endif
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (!nn[pass]) break;
|
||||
if (nn[pass]) {
|
||||
/* meanvalues */
|
||||
rx=dx[pass]/=nn[pass];
|
||||
ry=dy[pass]/=nn[pass];
|
||||
if (pass>0) er[pass]/=nn[pass];
|
||||
}
|
||||
if(JOB->cfg.verbose)
|
||||
fprintf(stderr,"# rotation angle (x,y,maxr,num)"
|
||||
" %6d %6d %6d %4d pass %d\n",
|
||||
rx, ry, er[pass], nn[pass], pass+1);
|
||||
}
|
||||
if (abs(ry*100)>abs(rx*50))
|
||||
fprintf(stderr,"<!-- gocr will fail, strong rotation angle detected -->\n");
|
||||
/* ToDo: normalize to 2^10 bit (square fits to 32 it) */
|
||||
JOB->res.lines.dx=rx;
|
||||
JOB->res.lines.dy=ry;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ----- detect lines --------------- */
|
||||
int detect_text_lines(pix * pp, int mo) {
|
||||
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, "# detect.c detect_text_lines (vvv=16 for more info) ");
|
||||
if (mo & 4){
|
||||
if (JOB->cfg.verbose) fprintf(stderr, "# zoning\n# ... ");
|
||||
detect_lines2(pp, 0, 0, pp->x, pp->y, 0); // later replaced by better algo
|
||||
} else
|
||||
detect_lines1(pp, 0, 0, pp->x, pp->y); // old algo
|
||||
|
||||
if(JOB->cfg.verbose) fprintf(stderr,"\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ----- adjust lines --------------- */
|
||||
// rotation angle? JOB->res.lines.dy, .x0 removed later
|
||||
// this is for cases, where m1..m4 is not very sure detected before
|
||||
// chars are recognized
|
||||
int adjust_text_lines(pix * pp, int mo) {
|
||||
struct box *box2;
|
||||
int *m, /* summ m1..m4, num_chars for m1..m4, min m1..m4, max. m1..m4 */
|
||||
l, i, dy, dx, diff=0, y0, y1;
|
||||
|
||||
if ((l=JOB->res.lines.num)<2) return 0; // ???
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, "# adjust text lines ");
|
||||
m=(int *)malloc(l*16*sizeof(int));
|
||||
if (!m) { fprintf(stderr," malloc failed\n"); return 0;}
|
||||
for (i=0;i<16*l;i++) m[i]=0; /* initialize */
|
||||
dy=JOB->res.lines.dy; /* tan(alpha) of skewing */
|
||||
dx=JOB->res.lines.dx; /* old: width of image */
|
||||
// js: later skewing is replaced by one transformation of vectorized image
|
||||
|
||||
if (dx)
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->line<=0) continue;
|
||||
if (box2->num_ac<1) continue;
|
||||
if (box2->wac[0]<95) continue;
|
||||
if (box2->m2==0 || box2->y1<box2->m2) continue; // char outside line
|
||||
if (box2->m3==4 || box2->y0>box2->m3) continue; // char outside line
|
||||
y0=box2->y0-((box2->x1)*dy/dx); /* corrected by page skewing */
|
||||
y1=box2->y1-((box2->x1)*dy/dx);
|
||||
if (strchr("aemnr",(char)box2->tac[0])) { // cC vV sS oO ... is unsure!
|
||||
m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
|
||||
m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
|
||||
if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
|
||||
if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
|
||||
if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
|
||||
if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
|
||||
}
|
||||
if (strchr("bdhklABDEFGHIKLMNRT123456789",(char)box2->tac[0])) {
|
||||
m[box2->line*16+0]+=y0; m[box2->line*16+4]++; // num m1
|
||||
m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
|
||||
if (m[box2->line*16+ 8]>y0) m[box2->line*16+ 8]=y0; /* min m1 */
|
||||
if (m[box2->line*16+12]<y0) m[box2->line*16+12]=y0; /* max m1 */
|
||||
if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
|
||||
if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
|
||||
}
|
||||
if (strchr("gq",(char)box2->tac[0])) {
|
||||
m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
|
||||
m[box2->line*16+3]+=y1; m[box2->line*16+7]++; // num m4
|
||||
if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
|
||||
if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
|
||||
if (m[box2->line*16+11]>y1) m[box2->line*16+11]=y1; /* min m4 */
|
||||
if (m[box2->line*16+15]<y1) m[box2->line*16+15]=y1; /* max m4 */
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
|
||||
for (i=1;i<l;i++) {
|
||||
diff=0; // show diff per line
|
||||
if (m[i*16+4]) diff+=abs(JOB->res.lines.m1[i]-m[i*16+0]/m[i*16+4]);
|
||||
if (m[i*16+5]) diff+=abs(JOB->res.lines.m2[i]-m[i*16+1]/m[i*16+5]);
|
||||
if (m[i*16+6]) diff+=abs(JOB->res.lines.m3[i]-m[i*16+2]/m[i*16+6]);
|
||||
if (m[i*16+7]) diff+=abs(JOB->res.lines.m4[i]-m[i*16+3]/m[i*16+7]);
|
||||
/* recalculate sureness, empirically */
|
||||
if (m[i*16+4]*m[i*16+5]*m[i*16+6]*m[i*16+7] > 0)
|
||||
JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]+100)/2;
|
||||
else
|
||||
JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]*90)/100;
|
||||
// set mean values of sure detected bounds (rounded precisely)
|
||||
if ( m[i*16+4]) JOB->res.lines.m1[i]=(m[i*16+0]+m[i*16+4]/2)/m[i*16+4];
|
||||
if ( m[i*16+5]) JOB->res.lines.m2[i]=(m[i*16+1]+m[i*16+5]/2)/m[i*16+5];
|
||||
if ( m[i*16+6]) JOB->res.lines.m3[i]=(m[i*16+2]+m[i*16+6]/2)/m[i*16+6];
|
||||
if ( m[i*16+7]) JOB->res.lines.m4[i]=(m[i*16+3]+m[i*16+7]/2)/m[i*16+7];
|
||||
// care about very small fonts
|
||||
if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+5]==0 && m[i*16+4])
|
||||
JOB->res.lines.m2[i]=JOB->res.lines.m1[i]+2;
|
||||
if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+4]==0 && m[i*16+5])
|
||||
JOB->res.lines.m1[i]=JOB->res.lines.m2[i]-2;
|
||||
if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+7]==0 && m[i*16+6])
|
||||
JOB->res.lines.m4[i]=JOB->res.lines.m3[i]+2;
|
||||
if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+6]==0 && m[i*16+7])
|
||||
JOB->res.lines.m3[i]=JOB->res.lines.m4[i]-2;
|
||||
if ( m[i*16+7]<1 &&
|
||||
JOB->res.lines.m4[i]
|
||||
<=JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4 )
|
||||
JOB->res.lines.m4[i]=
|
||||
JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4;
|
||||
if ( m[i*16+7]<1 && m[i*16+12+2]>0 && // m4 < max.m3+..
|
||||
JOB->res.lines.m4[i] < 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2 )
|
||||
JOB->res.lines.m4[i] = 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2;
|
||||
if (JOB->res.lines.m4[i]<=JOB->res.lines.m3[i])
|
||||
JOB->res.lines.m4[i]= JOB->res.lines.m3[i]+1; /* 4x6 */
|
||||
|
||||
if (JOB->cfg.verbose & 17)
|
||||
fprintf(stderr, "\n# line= %3d m= %4d %+3d %+3d %+3d "
|
||||
" n= %2d %2d %2d %2d w= %3d diff= %d",
|
||||
i, JOB->res.lines.m1[i],
|
||||
JOB->res.lines.m2[i] - JOB->res.lines.m1[i],
|
||||
JOB->res.lines.m3[i] - JOB->res.lines.m1[i],
|
||||
JOB->res.lines.m4[i] - JOB->res.lines.m1[i],
|
||||
m[i*16+4],m[i*16+5],m[i*16+6],m[i*16+7],
|
||||
JOB->res.lines.wt[i], diff);
|
||||
}
|
||||
diff=0; // count adjusted chars
|
||||
#if 1
|
||||
if (dx)
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->line<=0) continue;
|
||||
/* check if box was on the wrong line, ToDo: search a better line */
|
||||
if (2*box2->y0<2*JOB->res.lines.m1[box2->line]
|
||||
-JOB->res.lines.m4[box2->line]
|
||||
+JOB->res.lines.m1[box2->line]) box2->line=0;
|
||||
if (2*box2->y1>2*JOB->res.lines.m4[box2->line]
|
||||
+JOB->res.lines.m4[box2->line]
|
||||
-JOB->res.lines.m1[box2->line]) box2->line=0;
|
||||
/* do adjustments */
|
||||
if (box2->num_ac>0
|
||||
&& box2->num_ac > 31 && box2->tac[0] < 127 /* islower(>256) may SIGSEGV */
|
||||
&& strchr("cCoOpPsSuUvVwWxXyYzZ",(char)box2->tac[0])) { // no_wchar
|
||||
if (box2->y0-((box2->x1)*dy/dx)
|
||||
< (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line])/2
|
||||
&& islower(box2->tac[0])
|
||||
) { setac(box2,toupper((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
|
||||
if (box2->y0-((box2->x1)*dy/dx)
|
||||
> (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line]+1)/2
|
||||
&& isupper(box2->tac[0])
|
||||
){ setac(box2,tolower((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
|
||||
}
|
||||
box2->m1=JOB->res.lines.m1[box2->line]+((box2->x1)*dy/dx);
|
||||
box2->m2=JOB->res.lines.m2[box2->line]+((box2->x1)*dy/dx);
|
||||
box2->m3=JOB->res.lines.m3[box2->line]+((box2->x1)*dy/dx);
|
||||
box2->m4=JOB->res.lines.m4[box2->line]+((box2->x1)*dy/dx);
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
#endif
|
||||
|
||||
free(m);
|
||||
if(JOB->cfg.verbose) fprintf(stderr,"\n# changed_chars= %d\n",diff);
|
||||
return(diff);
|
||||
}
|
||||
|
||||
/* ---- measure mean character
|
||||
* recalculate mean width and high after changes in boxlist
|
||||
* ToDo: only within a Range?
|
||||
*/
|
||||
int calc_average() {
|
||||
int i = 0, x0, y0, x1, y1;
|
||||
struct box *box4;
|
||||
|
||||
JOB->res.numC = 0;
|
||||
JOB->res.sumY = 0;
|
||||
JOB->res.sumX = 0;
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if( box4->c != PICTURE ){
|
||||
x0 = box4->x0; x1 = box4->x1;
|
||||
y0 = box4->y0; y1 = box4->y1;
|
||||
i++;
|
||||
if (JOB->res.avX * JOB->res.avY > 0) {
|
||||
if (x1 - x0 + 1 > 4 * JOB->res.avX
|
||||
&& y1 - y0 + 1 > 4 * JOB->res.avY) continue; /* small picture */
|
||||
if (4 * (y1 - y0 + 1) < JOB->res.avY || y1 - y0 < 2)
|
||||
continue; // dots .,-_ etc.
|
||||
}
|
||||
if (x1 - x0 + 1 < 4
|
||||
&& y1 - y0 + 1 < 6 ) continue; /* dots etc */
|
||||
JOB->res.sumX += x1 - x0 + 1;
|
||||
JOB->res.sumY += y1 - y0 + 1;
|
||||
JOB->res.numC++;
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if ( JOB->res.numC ) { /* avoid div 0 */
|
||||
JOB->res.avY = (JOB->res.sumY+JOB->res.numC/2) / JOB->res.numC;
|
||||
JOB->res.avX = (JOB->res.sumX+JOB->res.numC/2) / JOB->res.numC;
|
||||
}
|
||||
if (JOB->cfg.verbose){
|
||||
fprintf(stderr, "# averages: mXmY= %d %d nC= %d n= %d\n",
|
||||
JOB->res.avX, JOB->res.avY, JOB->res.numC, i);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ---- analyse boxes, find pictures and mark (do this first!!!)
|
||||
*/
|
||||
int detect_pictures(job_t *job) {
|
||||
int i = 0, x0, y0, x1, y1, num_h;
|
||||
struct box *box2, *box4;
|
||||
|
||||
if ( job->res.numC == 0 ) {
|
||||
if (job->cfg.verbose) fprintf(stderr,
|
||||
"# detect.c L%d Warning: numC=0\n", __LINE__);
|
||||
return -1;
|
||||
}
|
||||
/* ToDo: set Y to uppercase mean value? */
|
||||
job->res.avY = (job->res.sumY+job->res.numC/2) / job->res.numC;
|
||||
job->res.avX = (job->res.sumX+job->res.numC/2) / job->res.numC;
|
||||
/* ToDo: two highest volumes? crosses, on extreme volume + on border */
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr, "# detect.c L%d pictures, frames, mXmY= %d %d ... ",
|
||||
__LINE__, job->res.avX, job->res.avY);
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if (box2->c == PICTURE) continue;
|
||||
x0 = box2->x0; x1 = box2->x1;
|
||||
y0 = box2->y0; y1 = box2->y1;
|
||||
|
||||
/* pictures could be of unusual size */
|
||||
if (x1 - x0 + 1 > 4 * job->res.avX || y1 - y0 + 1 > 4 * job->res.avY) {
|
||||
/* count objects on same baseline which could be chars */
|
||||
/* else: big headlines could be misinterpreted as pictures */
|
||||
num_h=0;
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if (box4->c == PICTURE) continue;
|
||||
if (box4->y1-box4->y0 > 2*(y1-y0)) continue;
|
||||
if (2*(box4->y1-box4->y0) < y1-y0) continue;
|
||||
if (box4->y0 > y0 + (y1-y0+1)/2
|
||||
|| box4->y0 < y0 - (y1-y0+1)/2
|
||||
|| box4->y1 > y1 + (y1-y0+1)/2
|
||||
|| box4->y1 < y1 - (y1-y0+1)/2) continue;
|
||||
// ToDo: continue if numcross() only 1, example: |||IIIll|||
|
||||
num_h++;
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (num_h>4) continue;
|
||||
box2->c = PICTURE;
|
||||
i++;
|
||||
}
|
||||
/* ToDo: pictures could have low contrast=Sum((pixel(p,x,y)-160)^2) */
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
// start second iteration
|
||||
if (job->cfg.verbose) {
|
||||
fprintf(stderr, " %d - boxes %d\n", i, job->res.numC-i);
|
||||
}
|
||||
calc_average();
|
||||
return 0;
|
||||
}
|
||||
432
ActiveX/ASCOfficeUtils/GOCR/src/gocr.c
Normal file
432
ActiveX/ASCOfficeUtils/GOCR/src/gocr.c
Normal file
@@ -0,0 +1,432 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
sometimes I have written comments in german language, sorry for that
|
||||
|
||||
This file was retrieved from pgm2asc.cc of Joerg, in order to have
|
||||
a library of the ocr-engine from Klaas Freitag
|
||||
|
||||
*/
|
||||
#include "config.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "pnm.h"
|
||||
#include "pgm2asc.h"
|
||||
#include "pcx.h"
|
||||
#include "ocr0.h" /* only_numbers */
|
||||
#include "progress.h"
|
||||
#include "version.h"
|
||||
|
||||
static void out_version(int v) {
|
||||
fprintf(stderr, " Optical Character Recognition --- gocr "
|
||||
version_string " " release_string "\n"
|
||||
" Copyright (C) 2001-2009 Joerg Schulenburg GPG=1024D/53BDFBE3\n"
|
||||
" released under the GNU General Public License\n");
|
||||
/* as recommended, (c) and license should be part of the binary */
|
||||
/* no email because of SPAM, see README for contacting the author */
|
||||
if (v)
|
||||
fprintf(stderr, " use option -h for help\n");
|
||||
if (v & 2)
|
||||
exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
static void help(void) {
|
||||
out_version(0);
|
||||
/* output is shortened to essentials, see manual page for details */
|
||||
fprintf(stderr,
|
||||
" using: gocr [options] pnm_file_name # use - for stdin\n"
|
||||
" options (see gocr manual pages for more details):\n"
|
||||
" -h, --help\n"
|
||||
" -i name - input image file (pnm,pgm,pbm,ppm,pcx,...)\n"
|
||||
" -o name - output file (redirection of stdout)\n"
|
||||
" -e name - logging file (redirection of stderr)\n"
|
||||
" -x name - progress output to fifo (see manual)\n"
|
||||
" -p name - database path including final slash (default is ./db/)\n");
|
||||
fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
|
||||
" -f fmt - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)\n"
|
||||
" -l num - threshold grey level 0<160<=255 (0 = autodetect)\n"
|
||||
" -d num - dust_size (remove small clusters, -1 = autodetect)\n"
|
||||
" -s num - spacewidth/dots (0 = autodetect)\n"
|
||||
" -v num - verbose (see manual page)\n"
|
||||
" -c string - list of chars (debugging, see manual)\n"
|
||||
" -C string - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)\n"
|
||||
" -m num - operation modes (bitpattern, see manual)\n");
|
||||
fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
|
||||
" -a num - value of certainty (in percent, 0..100, default=95)\n"
|
||||
" -u string - output this string for every unrecognized character\n");
|
||||
fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
|
||||
" examples:\n"
|
||||
"\tgocr -m 4 text1.pbm # do layout analyzis\n"
|
||||
"\tgocr -m 130 -p ./database/ text1.pbm # extend database\n"
|
||||
"\tdjpeg -pnm -gray text.jpg | gocr - # use jpeg-file via pipe\n"
|
||||
"\n");
|
||||
fprintf(stderr, " webpage: http://jocr.sourceforge.net/\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
/* from the glibc documentation */
|
||||
static int timeval_subtract (struct timeval *result, struct timeval *x,
|
||||
struct timeval *y) {
|
||||
|
||||
/* Perform the carry for the later subtraction by updating Y. */
|
||||
if (x->tv_usec < y->tv_usec) {
|
||||
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
|
||||
y->tv_usec -= 1000000 * nsec;
|
||||
y->tv_sec += nsec;
|
||||
}
|
||||
if (x->tv_usec - y->tv_usec > 1000000) {
|
||||
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
|
||||
y->tv_usec += 1000000 * nsec;
|
||||
y->tv_sec -= nsec;
|
||||
}
|
||||
|
||||
/* Compute the time remaining to wait.
|
||||
`tv_usec' is certainly positive. */
|
||||
result->tv_sec = x->tv_sec - y->tv_sec;
|
||||
result->tv_usec = x->tv_usec - y->tv_usec;
|
||||
|
||||
/* Return 1 if result is negative. */
|
||||
return x->tv_sec < y->tv_sec;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void process_arguments(job_t *job, int argn, char *argv[])
|
||||
{
|
||||
int i;
|
||||
char *s1;
|
||||
|
||||
assert(job);
|
||||
|
||||
if (argn <= 1) {
|
||||
out_version(1);
|
||||
exit(0);
|
||||
}
|
||||
#ifdef HAVE_PGM_H
|
||||
pnm_init(&argn, &argv);
|
||||
#endif
|
||||
|
||||
/* process arguments */
|
||||
for (i = 1; i < argn; i++) {
|
||||
if (strcmp(argv[i], "--help") == 0)
|
||||
help(); /* and quits */
|
||||
if (argv[i][0] == '-' && argv[i][1] != 0) {
|
||||
s1 = "";
|
||||
if (i + 1 < argn)
|
||||
s1 = argv[i + 1];
|
||||
switch (argv[i][1]) {
|
||||
case 'h': /* help */
|
||||
help();
|
||||
break;
|
||||
case 'i': /* input image file */
|
||||
job->src.fname = s1;
|
||||
i++;
|
||||
break;
|
||||
case 'e': /* logging file */
|
||||
if (s1[0] == '-' && s1[1] == '\0') {
|
||||
#ifdef HAVE_UNISTD_H
|
||||
dup2(STDOUT_FILENO, STDERR_FILENO); /* -e /dev/stdout works */
|
||||
#else
|
||||
fprintf(stderr, "stderr redirection not possible without unistd.h\n");
|
||||
#endif
|
||||
}
|
||||
else if (!freopen(s1, "w", stderr)) {
|
||||
fprintf(stderr, "stderr redirection to %s failed\n", s1);
|
||||
}
|
||||
i++;
|
||||
break;
|
||||
case 'p': /* database path */
|
||||
job->cfg.db_path=s1;
|
||||
i++;
|
||||
break;
|
||||
case 'o': /* output file */
|
||||
if (s1[0] == '-' && s1[1] == '\0') { /* default */
|
||||
}
|
||||
else if (!freopen(s1, "w", stdout)) {
|
||||
fprintf(stderr, "stdout redirection to %s failed\n", s1);
|
||||
};
|
||||
i++;
|
||||
break;
|
||||
case 'f': /* output format */
|
||||
if (strcmp(s1, "ISO8859_1") == 0) job->cfg.out_format=ISO8859_1; else
|
||||
if (strcmp(s1, "TeX") == 0) job->cfg.out_format=TeX; else
|
||||
if (strcmp(s1, "HTML") == 0) job->cfg.out_format=HTML; else
|
||||
if (strcmp(s1, "XML") == 0) job->cfg.out_format=XML; else
|
||||
if (strcmp(s1, "SGML") == 0) job->cfg.out_format=SGML; else
|
||||
if (strcmp(s1, "UTF8") == 0) job->cfg.out_format=UTF8; else
|
||||
if (strcmp(s1, "ASCII") == 0) job->cfg.out_format=ASCII; else
|
||||
fprintf(stderr,"Warning: unknown format (-f %s)\n",s1);
|
||||
i++;
|
||||
break;
|
||||
case 'c': /* list of chars (_ = not recognized chars) */
|
||||
job->cfg.lc = s1;
|
||||
i++;
|
||||
break;
|
||||
case 'C': /* char filter, default: NULL (all chars) */
|
||||
/* ToDo: UTF8 input, wchar */
|
||||
job->cfg.cfilter = s1;
|
||||
i++;
|
||||
break;
|
||||
case 'd': /* dust size */
|
||||
job->cfg.dust_size = atoi(s1);
|
||||
i++;
|
||||
break;
|
||||
case 'l': /* grey level 0<160<=255, 0 for autodetect */
|
||||
job->cfg.cs = atoi(s1);
|
||||
i++;
|
||||
break;
|
||||
case 's': /* spacewidth/dots (0 = autodetect) */
|
||||
job->cfg.spc = atoi(s1);
|
||||
i++;
|
||||
break;
|
||||
case 'v': /* verbose mode */
|
||||
job->cfg.verbose |= atoi(s1);
|
||||
i++;
|
||||
break;
|
||||
case 'm': /* operation modes */
|
||||
job->cfg.mode |= atoi(s1);
|
||||
i++;
|
||||
break;
|
||||
case 'n': /* numbers only */
|
||||
job->cfg.only_numbers = atoi(s1);
|
||||
i++;
|
||||
break;
|
||||
case 'x': /* initialize progress output s1=fname */
|
||||
ini_progress(s1);
|
||||
i++;
|
||||
break;
|
||||
case 'a': /* set certainty */
|
||||
job->cfg.certainty = atoi(s1);;
|
||||
i++;
|
||||
break;
|
||||
case 'u': /* output marker for unrecognized chars */
|
||||
job->cfg.unrec_marker = s1;
|
||||
i++;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "# unknown option use -h for help\n");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else /* argument can be filename v0.2.5 */ if (argv[i][0] != '-'
|
||||
|| argv[i][1] == '\0' ) {
|
||||
job->src.fname = argv[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mark_start(job_t *job) {
|
||||
assert(job);
|
||||
|
||||
if (job->cfg.verbose) {
|
||||
out_version(0);
|
||||
/* insert some helpful info for support */
|
||||
fprintf(stderr, "# compiled: " __DATE__ );
|
||||
#if defined(__GNUC__)
|
||||
fprintf(stderr, " GNUC-%d", __GNUC__ );
|
||||
#endif
|
||||
#ifdef __GNUC_MINOR__
|
||||
fprintf(stderr, ".%d", __GNUC_MINOR__ );
|
||||
#endif
|
||||
#if defined(__linux)
|
||||
fprintf(stderr, " linux");
|
||||
#elif defined(__unix)
|
||||
fprintf(stderr, " unix");
|
||||
#endif
|
||||
#if defined(__WIN32) || defined(__WIN32__)
|
||||
fprintf(stderr, " WIN32");
|
||||
#endif
|
||||
#if defined(__WIN64) || defined(__WIN64__)
|
||||
fprintf(stderr, " WIN64");
|
||||
#endif
|
||||
#if defined(__VERSION__)
|
||||
fprintf(stderr, " version " __VERSION__ );
|
||||
#endif
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr,
|
||||
"# options are: -l %d -s %d -v %d -c %s -m %d -d %d -n %d -a %d -C \"%s\"\n",
|
||||
job->cfg.cs, job->cfg.spc, job->cfg.verbose, job->cfg.lc, job->cfg.mode,
|
||||
job->cfg.dust_size, job->cfg.only_numbers, job->cfg.certainty,
|
||||
job->cfg.cfilter);
|
||||
fprintf(stderr, "# file: %s\n", job->src.fname);
|
||||
#ifdef USE_UNICODE
|
||||
fprintf(stderr,"# using unicode\n");
|
||||
#endif
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
gettimeofday(&job->tmp.init_time, NULL);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void mark_end(job_t *job) {
|
||||
assert(job);
|
||||
|
||||
#ifdef HAVE_GETTIMEOFDAY
|
||||
/* show elapsed time */
|
||||
if (job->cfg.verbose) {
|
||||
struct timeval end, result;
|
||||
gettimeofday(&end, NULL);
|
||||
timeval_subtract(&result, &end, &job->tmp.init_time);
|
||||
fprintf(stderr,"Elapsed time: %d:%02d:%3.3f.\n", (int)result.tv_sec/60,
|
||||
(int)result.tv_sec%60, (float)result.tv_usec/1000);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int read_picture(job_t *job) {
|
||||
int rc=0;
|
||||
assert(job);
|
||||
|
||||
if (strstr(job->src.fname, ".pcx"))
|
||||
readpcx(job->src.fname, &job->src.p, job->cfg.verbose);
|
||||
else
|
||||
rc=readpgm(job->src.fname, &job->src.p, job->cfg.verbose);
|
||||
return rc; /* 1 for multiple images, 0 else */
|
||||
}
|
||||
|
||||
static int read_picture2(job_t *job, char* buf, long size) {
|
||||
int rc=0;
|
||||
assert(job);
|
||||
|
||||
rc=readpgmFromBuffer(buf, size, &job->src.p);
|
||||
return rc; /* 1 for multiple images, 0 else */
|
||||
}
|
||||
|
||||
/* subject of change, we need more output for XML (ToDo) */
|
||||
void print_output(job_t *job) {
|
||||
int linecounter = 0;
|
||||
const char *line;
|
||||
|
||||
assert(job);
|
||||
|
||||
linecounter = 0;
|
||||
line = getTextLine(linecounter++);
|
||||
while (line) {
|
||||
/* notice: decode() is shiftet to getTextLine since 0.38 */
|
||||
fputs(line, stdout);
|
||||
if (job->cfg.out_format==HTML) fputs("<br />",stdout);
|
||||
if (job->cfg.out_format!=XML) fputc('\n', stdout);
|
||||
line = getTextLine(linecounter++);
|
||||
}
|
||||
free_textlines();
|
||||
}
|
||||
|
||||
/* subject of change, we need more output for XML (ToDo) */
|
||||
char* print_output2(job_t *job) {
|
||||
int linecounter = 0;
|
||||
const char *line;
|
||||
|
||||
int len = 1024; // initial buffer length for text line
|
||||
char *tmp = (char *)malloc(len);
|
||||
if ( !tmp ) {
|
||||
fprintf(stderr,"malloc failed!\n"); // ToDo: index_to_error_list
|
||||
return NULL;
|
||||
}
|
||||
*tmp = 0;
|
||||
|
||||
assert(job);
|
||||
|
||||
linecounter = 0;
|
||||
line = getTextLine(linecounter++);
|
||||
while (line) {
|
||||
/* notice: decode() is shiftet to getTextLine since 0.38 */
|
||||
tmp = append_to_line(tmp, line, &len);
|
||||
if (job->cfg.out_format==HTML)
|
||||
{
|
||||
tmp = append_to_line(tmp, "<br />", &len);
|
||||
}
|
||||
if (job->cfg.out_format!=XML)
|
||||
{
|
||||
tmp = append_to_line(tmp, "\n", &len);
|
||||
}
|
||||
line = getTextLine(linecounter++);
|
||||
}
|
||||
free_textlines();
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/* FIXME jb: remove JOB; */
|
||||
job_t *JOB;
|
||||
|
||||
char* PNMToText(char* buf, long size, char *outputformat, long graylevel, long dustsize, long spacewidthdots, long certainty) {
|
||||
int multipnm=1;
|
||||
job_t job;
|
||||
|
||||
char *tmp = NULL;
|
||||
|
||||
JOB = &job;
|
||||
setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
|
||||
|
||||
while (multipnm==1) {
|
||||
|
||||
job_init(&job);
|
||||
|
||||
/* output format */
|
||||
if (strcmp(outputformat, "ISO8859_1") == 0) job.cfg.out_format=ISO8859_1;
|
||||
else if (strcmp(outputformat, "TeX") == 0) job.cfg.out_format=TeX;
|
||||
else if (strcmp(outputformat, "HTML") == 0) job.cfg.out_format=HTML;
|
||||
else if (strcmp(outputformat, "XML") == 0) job.cfg.out_format=XML;
|
||||
else if (strcmp(outputformat, "SGML") == 0) job.cfg.out_format=SGML;
|
||||
else if (strcmp(outputformat, "UTF8") == 0) job.cfg.out_format=UTF8;
|
||||
else if (strcmp(outputformat, "ASCII") == 0) job.cfg.out_format=ASCII;
|
||||
|
||||
/* grey level 0<160<=255, 0 for autodetect */
|
||||
job.cfg.cs = graylevel;
|
||||
/* dust size */
|
||||
job.cfg.dust_size = dustsize;
|
||||
/* spacewidth/dots (0 = autodetect) */
|
||||
job.cfg.spc = spacewidthdots;
|
||||
/* set certainty */
|
||||
job.cfg.certainty = certainty;
|
||||
|
||||
// process_arguments(&job, argn, argv);
|
||||
|
||||
mark_start(&job);
|
||||
// multipnm = read_picture(&job);
|
||||
multipnm = read_picture2(&job, buf, size);
|
||||
/* separation of main and rest for using as lib
|
||||
this will be changed later => introduction of set_option()
|
||||
for better communication to the engine */
|
||||
if (multipnm<0) break; /* read error */
|
||||
|
||||
/* call main loop */
|
||||
pgm2asc(&job);
|
||||
|
||||
mark_end(&job);
|
||||
|
||||
tmp=print_output2(&job);
|
||||
|
||||
job_free(&job);
|
||||
|
||||
}
|
||||
|
||||
return tmp;
|
||||
}
|
||||
168
ActiveX/ASCOfficeUtils/GOCR/src/jconv.c
Normal file
168
ActiveX/ASCOfficeUtils/GOCR/src/jconv.c
Normal file
@@ -0,0 +1,168 @@
|
||||
/* OCR Aug00 JS
|
||||
// PGM gray ASCII=P2 RAW=P5
|
||||
// PPM RGB ASCII=P3 RAW=P6
|
||||
// PBM B/W ASCII=P1 RAW=P4
|
||||
// ToDo:
|
||||
// - pbm-raw to pgm also for x!=0 (mod 8)
|
||||
// v0.01 bug eliminated
|
||||
// v0.02 convert renamed into jconv because ImageMagick uses same name
|
||||
// v0.03 code review bbg
|
||||
// program is not used anymore, use "convert -verbose -crop 0x0+1+1" instead
|
||||
*/
|
||||
|
||||
// #include <iostream.h>
|
||||
#include "config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "pnm.h"
|
||||
#ifdef HAVE_PAM_H
|
||||
# include <pam.h>
|
||||
#endif
|
||||
#include "pcx.h"
|
||||
#include "tga.h"
|
||||
|
||||
void help( void ) {
|
||||
printf("jconv version Aug2000 JS (pnm-raw,pcx8,tga24)\n"
|
||||
"use: jconv [options] ?infile.pnm? ?outfile.pgm? ?ox? ?oy? ?dx? ?dy?\n"
|
||||
"options: -shrink -pbm -? -help\n"
|
||||
"example: jconv -shrink -pbm font.pbm font.pbm 0 0 0 0\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argn, char *argv[])
|
||||
{
|
||||
char *inam, *onam;
|
||||
pix bild;
|
||||
int ox, oy, dx, dy, x, y, i, vvv = 0;
|
||||
|
||||
#ifdef HAVE_PAM_H
|
||||
pnm_init(&argn, argv);
|
||||
#endif
|
||||
// skip options
|
||||
for (i = 1; i < argn; i++) {
|
||||
if (argv[i][0] != '-')
|
||||
break;
|
||||
if (!strcmp(argv[i], "-?"))
|
||||
help();
|
||||
else if (!strcmp(argv[i], "-help"))
|
||||
help();
|
||||
else if (!strcmp(argv[i], "-shrink"))
|
||||
vvv |= 2;
|
||||
else if (!strcmp(argv[i], "-pbm"))
|
||||
vvv |= 4;
|
||||
else
|
||||
printf("unknown option: %s\n", argv[i]);
|
||||
}
|
||||
|
||||
if (argn - i != 6)
|
||||
help();
|
||||
inam = argv[i++];
|
||||
onam = argv[i++];
|
||||
ox = atoi(argv[i++]);
|
||||
oy = atoi(argv[i++]);
|
||||
dx = atoi(argv[i++]);
|
||||
dy = atoi(argv[i++]);
|
||||
printf("# in=%s out=%s offs=%d,%d len=%d,%d vvv=%d\n",
|
||||
inam, onam, ox, oy, dx, dy, vvv);
|
||||
|
||||
// ----- read picture
|
||||
if (strstr(inam, ".pbm") ||
|
||||
strstr(inam, ".pgm") ||
|
||||
strstr(inam, ".ppm") ||
|
||||
strstr(inam, ".pnm") ||
|
||||
strstr(inam, ".pam"))
|
||||
readpgm(inam, &bild, 1);
|
||||
else if (strstr(inam, ".pcx"))
|
||||
readpcx(inam, &bild, 1);
|
||||
else if (strstr(inam, ".tga"))
|
||||
readtga(inam, &bild, ((vvv > 1) ? 0 : 1));
|
||||
else {
|
||||
printf("Error: unknown suffix\n");
|
||||
exit(1);
|
||||
}
|
||||
if (ox < 0 || ox >= bild.x)
|
||||
ox = 0;
|
||||
if (oy < 0 || ox >= bild.y)
|
||||
oy = 0;
|
||||
if (dx <= 0 || ox + dx > bild.x)
|
||||
dx = bild.x - ox;
|
||||
if (dy <= 0 || oy + dy > bild.y)
|
||||
dy = bild.y - oy;
|
||||
if ((vvv & 2) == 2 && bild.bpp == 1) { // -shrink
|
||||
int x, y;
|
||||
printf("# shrinking PGM: offs=%d,%d len=%d,%d\n", ox, oy, dx, dy);
|
||||
for (y = 0; y < dy; y++) { // shrink upper border
|
||||
for (x = 0; x < dx; x++)
|
||||
if (bild.p[x + ox + (y + oy) * bild.x] < 127)
|
||||
break;
|
||||
if (x < dx) {
|
||||
if (y > 0)
|
||||
y--;
|
||||
oy += y;
|
||||
dy -= y;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (y = 0; y < dy; y++) { // shrink lower border
|
||||
for (x = 0; x < dx; x++)
|
||||
if (bild.p[ox + x + (oy + dy - y - 1) * bild.x] < 127)
|
||||
break;
|
||||
if (x < dx) {
|
||||
if (y > 0)
|
||||
y--;
|
||||
dy -= y;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (x = 0; x < dx; x++) { // shrink left border
|
||||
for (y = 0; y < dy; y++)
|
||||
if (bild.p[x + ox + (y + oy) * bild.x] < 127)
|
||||
break;
|
||||
if (y < dy) {
|
||||
if (x > 0)
|
||||
x--;
|
||||
ox += x;
|
||||
dx -= x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (x = 0; x < dx; x++) { // shrink right border
|
||||
for (y = 0; y < dy; y++)
|
||||
if (bild.p[ox + dx - x - 1 + (oy + y) * bild.x] < 127)
|
||||
break;
|
||||
if (y < dy) {
|
||||
if (x > 0)
|
||||
x--;
|
||||
dx -= x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("# final dimension: offs=%d,%d len=%d,%d bpp=%d\n",
|
||||
ox, oy, dx, dy, bild.bpp);
|
||||
|
||||
/* bbg: could be changed to memmoves */
|
||||
// ---- new size
|
||||
for (y = 0; y < dy; y++)
|
||||
for (x = 0; x < dx; x++)
|
||||
for (i = 0; i < 3; i++)
|
||||
bild.p[i + bild.bpp * (x + dx * y)] =
|
||||
bild.p[i + bild.bpp * (x + ox + (y + oy) * bild.x)];
|
||||
bild.x = dx;
|
||||
bild.y = dy;
|
||||
// ---- write internal picture of textsite
|
||||
printf("# write %s\n", onam);
|
||||
if (strstr(onam, ".pbm"))
|
||||
writepbm(onam, &bild);
|
||||
else if (strstr(onam, ".pgm"))
|
||||
writepgm(onam, &bild);
|
||||
else if (strstr(onam, ".ppm"))
|
||||
writeppm(onam, &bild);
|
||||
else if (strstr(onam, ".pnm"))
|
||||
writepgm(onam, &bild);
|
||||
else
|
||||
printf("Error: unknown suffix");
|
||||
free( bild.p );
|
||||
}
|
||||
84
ActiveX/ASCOfficeUtils/GOCR/src/job.c
Normal file
84
ActiveX/ASCOfficeUtils/GOCR/src/job.c
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2006 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for email address */
|
||||
|
||||
#include "pgm2asc.h"
|
||||
#include "gocr.h"
|
||||
|
||||
/* initialize job structure */
|
||||
void job_init(job_t *job) {
|
||||
/* init source */
|
||||
job->src.fname = "-";
|
||||
/* FIXME jb: init pix */
|
||||
job->src.p.p = NULL;
|
||||
|
||||
/* init results */
|
||||
list_init( &job->res.boxlist );
|
||||
list_init( &job->res.linelist );
|
||||
job->res.avX = 5;
|
||||
job->res.avY = 8;
|
||||
job->res.sumX = 0;
|
||||
job->res.sumY = 0;
|
||||
job->res.numC = 0;
|
||||
job->res.lines.dy=0;
|
||||
job->res.lines.num=0;
|
||||
|
||||
/* init temporaries */
|
||||
list_init( &job->tmp.dblist );
|
||||
job->tmp.n_run = 0;
|
||||
/* FIXME jb: init ppo */
|
||||
job->tmp.ppo.p = NULL;
|
||||
job->tmp.ppo.x = 0;
|
||||
job->tmp.ppo.y = 0;
|
||||
|
||||
/* init cfg */
|
||||
job->cfg.cs = 0;
|
||||
job->cfg.spc = 0;
|
||||
job->cfg.mode = 0;
|
||||
job->cfg.dust_size = -1; /* auto detect */
|
||||
job->cfg.only_numbers = 0;
|
||||
job->cfg.verbose = 0;
|
||||
job->cfg.out_format = UTF8; /* old: ISO8859_1; */
|
||||
job->cfg.lc = "_";
|
||||
job->cfg.db_path = (char*)NULL;
|
||||
job->cfg.cfilter = (char*)NULL;
|
||||
job->cfg.certainty = 95;
|
||||
job->cfg.unrec_marker = "_";
|
||||
}
|
||||
|
||||
/* free job structure */
|
||||
void job_free(job_t *job) {
|
||||
|
||||
/* if tmp is just a copy of the pointer to the original image */
|
||||
if (job->tmp.ppo.p==job->src.p.p) job->tmp.ppo.p=NULL;
|
||||
|
||||
/* FIMXE jb: free lists
|
||||
* list_free( &job->res.linelist );
|
||||
* list_free( &job->tmp.dblist );
|
||||
*/
|
||||
|
||||
list_and_data_free(&(job->res.boxlist), (void (*)(void *))free_box);
|
||||
|
||||
/* FIXME jb: free pix */
|
||||
if (job->src.p.p) { free(job->src.p.p); job->src.p.p=NULL; }
|
||||
|
||||
/* FIXME jb: free pix */
|
||||
if (job->tmp.ppo.p) { free(job->tmp.ppo.p); job->tmp.ppo.p=NULL; }
|
||||
|
||||
}
|
||||
353
ActiveX/ASCOfficeUtils/GOCR/src/lines.c
Normal file
353
ActiveX/ASCOfficeUtils/GOCR/src/lines.c
Normal file
@@ -0,0 +1,353 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
#include "pgm2asc.h"
|
||||
#include "gocr.h"
|
||||
#include "unicode.h"
|
||||
|
||||
const char *getTextLine (int line) {
|
||||
int i;
|
||||
Element *elem;
|
||||
|
||||
if (line < 0 || line > list_total(&(JOB->res.linelist)))
|
||||
return NULL;
|
||||
|
||||
for ( i = 0, elem = JOB->res.linelist.start.next; i < line && elem != NULL; i++ )
|
||||
elem = elem->next;
|
||||
|
||||
if ( elem != NULL )
|
||||
return (const char *)elem->data;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void free_textlines(void) {
|
||||
for_each_data(&(JOB->res.linelist)) {
|
||||
if (list_get_current(&(JOB->res.linelist)))
|
||||
free(list_get_current(&(JOB->res.linelist)));
|
||||
} end_for_each(&(JOB->res.linelist));
|
||||
list_free(&(JOB->res.linelist));
|
||||
}
|
||||
|
||||
/* append a string (s1) to the string buffer (buffer) of length (len)
|
||||
* if buffer is to small or len==0 realloc buffer, len+=512
|
||||
*/
|
||||
char *append_to_line(char *buffer, const char *s1, int *len) {
|
||||
char *temp;
|
||||
int slen=0, alen;
|
||||
if( s1==NULL || s1[0] == 0 ){
|
||||
fprintf(stderr,"\n#BUG: appending 0 to a line makes no sense!");
|
||||
return buffer;
|
||||
}
|
||||
if ( *len>0 ) slen= strlen(buffer); // used buffer
|
||||
alen = strlen(s1);
|
||||
if ( slen+alen+1 >= *len ) {
|
||||
if(alen+1<=512)
|
||||
*len+=512;
|
||||
else
|
||||
*len+=alen+1;
|
||||
temp = (char *)realloc(buffer, *len);
|
||||
if( !temp ) { fprintf(stderr,"realloc failed!\n"); *len-=512; return buffer; }
|
||||
else buffer = temp; // buffer successfull enlarged
|
||||
}
|
||||
temp = buffer + slen; // end of buffered string
|
||||
memcpy(temp,s1,alen+1); // copy including end sign '\0'
|
||||
return buffer;
|
||||
}
|
||||
|
||||
int calc_median_gap(struct tlines * lines) {
|
||||
int gaps[MAXlines], l;
|
||||
if (lines->num<2) return 0;
|
||||
for (l = 0; l < lines->num - 1; l++)
|
||||
gaps[l] = lines->m2[l + 1] - lines->m3[l];
|
||||
qsort(gaps, lines->num - 1, sizeof(gaps[0]), intcompare);
|
||||
return gaps[(lines->num - 1) / 2];
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the indent in pixels of the least-indented line.
|
||||
* Will be subtracted as base_indent to avoid negativ indent.
|
||||
*
|
||||
* This is adjusted to account for an angle on the page as
|
||||
* a whole. For instance, if the page is rotated clockwise,
|
||||
* lower lines may be physically closer to the left edge
|
||||
* than higher lines that are logically less indented.
|
||||
* We rotate around (0,0). Note that this rotation could
|
||||
* rotate lines "off the left margin", leading to a negative
|
||||
* indent.
|
||||
*
|
||||
* boxlist -- list of character boxes.
|
||||
* dx, dy -- rotation angle as vector
|
||||
*/
|
||||
int get_least_line_indent(List * boxlist, int dx, int dy) {
|
||||
int min_indent = INT_MAX;
|
||||
int adjusted_indent;
|
||||
struct box * box2;
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, "get_least_line_indent: rot.vector dxdy %d %d\n",
|
||||
dx, dy);
|
||||
for_each_data(boxlist) {
|
||||
box2 = (struct box *)list_get_current(boxlist);
|
||||
/* if num == -1, indicates this is a space or newline box,
|
||||
* inserted in list_insert_spaces. */
|
||||
if (box2->num != -1) {
|
||||
adjusted_indent = box2->x0;
|
||||
if (dx) adjusted_indent += box2->y0 * dy / dx;
|
||||
if (adjusted_indent < min_indent) {
|
||||
min_indent = adjusted_indent;
|
||||
if (dy!=0 && JOB->cfg.verbose)
|
||||
fprintf(stderr,
|
||||
"# Line %2d, unadjusted xy %3d %3d, adjusted x %2d\n",
|
||||
box2->line, box2->x0, box2->y0, adjusted_indent);
|
||||
}
|
||||
}
|
||||
} end_for_each(boxlist);
|
||||
if (JOB->cfg.verbose)
|
||||
fprintf(stderr, "# Minimum adjusted x: %d (min_indent)\n", min_indent);
|
||||
return min_indent;
|
||||
}
|
||||
|
||||
/* collect all the chars from the box tree and write them to a string buffer
|
||||
mo is the mode: mode&8 means, use chars even if unsure recognized
|
||||
ToDo: store full text(?), store decoded text+boxes+position chars (v0.4)
|
||||
(HTML,UTF,ASCII,XML), not wchar incl. descriptions (at<95% in red)
|
||||
remove decode(*c, job->cfg.out_format) from gocr.c!
|
||||
XML add alternate-tags, format tags and position tags
|
||||
ToDo: better output XML to stdout instead of circumstantial store to lines
|
||||
not all texts/images follow the line concept?
|
||||
Better use a tree of objects where leafes are chars instead of simple list.
|
||||
Chars or objects are taken into account. Objects can be text strings
|
||||
or XML strings.
|
||||
*/
|
||||
void store_boxtree_lines(int mo) {
|
||||
char *buffer; /* temp buffer for text */
|
||||
int i = 0, j = 0;
|
||||
int len = 1024; // initial buffer length for text line
|
||||
struct box *box2;
|
||||
int median_gap = 0;
|
||||
int max_single_space_gap = 0;
|
||||
struct tlines line_info;
|
||||
int line, line_gap, oldline=-1;
|
||||
int left_margin;
|
||||
int i1=0, i2=0;
|
||||
|
||||
buffer = (char *)malloc(len);
|
||||
if ( !buffer ) {
|
||||
fprintf(stderr,"malloc failed!\n"); // ToDo: index_to_error_list
|
||||
return;
|
||||
}
|
||||
*buffer = 0;
|
||||
|
||||
if ( JOB->cfg.verbose&1 )
|
||||
fprintf(stderr,"# store boxtree to lines ...");
|
||||
|
||||
/* wew: calculate the median line gap, to determine line spacing
|
||||
* for the text output. The line gap used is between one line's
|
||||
* m3 (baseline) and the next line's m2 (height of non-rising
|
||||
* lowercase). We use these lines as they are the least likely
|
||||
* to vary according to actual character content of lines.
|
||||
*/
|
||||
median_gap = calc_median_gap(&JOB->res.lines);
|
||||
if (median_gap <= 0) {
|
||||
fprintf(stderr, "# Warning: non-positive median line gap of %d\n",
|
||||
median_gap);
|
||||
median_gap = 8;
|
||||
max_single_space_gap = 12; /* arbitrary */
|
||||
} else {
|
||||
max_single_space_gap = median_gap * 7 / 4;
|
||||
}
|
||||
|
||||
// Will be subtracted as base_indent to avoid negativ indent.
|
||||
left_margin = get_least_line_indent(&JOB->res.boxlist,
|
||||
JOB->res.lines.dx,
|
||||
JOB->res.lines.dy);
|
||||
|
||||
if (JOB->cfg.out_format==XML) { /* subject of change */
|
||||
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
|
||||
/* output lot of usefull information for XML filter */
|
||||
sprintf(s1,"<page x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\">\n",
|
||||
0,0,0,0);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
sprintf(s1,"<block x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\">\n",
|
||||
0,0,0,0);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
}
|
||||
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
line = box2->line;
|
||||
line_info = JOB->res.lines;
|
||||
/* reset the output char if certainty is below the limit v0.44 */
|
||||
if (box2->num_ac && box2->wac[0]<JOB->cfg.certainty) box2->c=UNKNOWN;
|
||||
if (line!=oldline) {
|
||||
if (JOB->cfg.out_format==XML && oldline>-1) { /* subject of change */
|
||||
buffer=append_to_line(buffer,"</line>\n",&len);
|
||||
list_app( &(JOB->res.linelist), (void *)strdup(buffer) ); // wcsdup
|
||||
memset(buffer, 0, len);
|
||||
j=0; // reset counter for new line
|
||||
}
|
||||
if (JOB->cfg.out_format==XML) { /* subject of change */
|
||||
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
|
||||
/* output lot of usefull information for XML filter */
|
||||
sprintf(s1,"<line x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" value=\"%d\">\n",
|
||||
line_info.x0[line],line_info.m1[line],
|
||||
line_info.x1[line]-line_info.x0[line]+1,
|
||||
line_info.m4[line]-line_info.m1[line],line);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
}
|
||||
oldline=line;
|
||||
}
|
||||
if (box2->c > ' ' &&
|
||||
box2->c <= 'z') i1++; /* count non-space chars */
|
||||
if (box2->c == '\n') {
|
||||
if (JOB->cfg.out_format!=XML) { /* subject of change */
|
||||
line_info = JOB->res.lines;
|
||||
line = box2->line;
|
||||
if (line > 0) {
|
||||
line_gap = line_info.m2[line] - line_info.m3[line - 1];
|
||||
for (line_gap -= max_single_space_gap; line_gap > 0;
|
||||
line_gap -= median_gap) {
|
||||
buffer=append_to_line(buffer,"\n",&len);
|
||||
j++; /* count chars in line */
|
||||
}
|
||||
}
|
||||
list_app( &(JOB->res.linelist), (void *)strdup(buffer) ); // wcsdup
|
||||
memset(buffer, 0, len);
|
||||
j=0; // reset counter for new line
|
||||
}
|
||||
}
|
||||
if (box2->c == ' ') // fill large gaps with spaces
|
||||
{
|
||||
if (JOB->res.avX) { /* avoid SIGFPE */
|
||||
if (JOB->cfg.out_format==XML) { /* subject of change */
|
||||
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
|
||||
/* output lot of usefull information for XML filter */
|
||||
sprintf(s1," <space x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" />\n",
|
||||
box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
} else
|
||||
for (i = (box2->x1 - box2->x0) / (2 * JOB->res.avX) + 1; i > 0; i--) {
|
||||
buffer=append_to_line(buffer," ",&len);
|
||||
j++; /* number of chars in line */
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (box2->c != '\n') {
|
||||
if (j==0 && JOB->res.avX) /* first char in new line? */ {
|
||||
int indent = box2->x0 - JOB->res.lines.x0[box2->line];
|
||||
/* correct for angle of page as a whole. */
|
||||
if (JOB->res.lines.dx)
|
||||
indent += box2->y0 * JOB->res.lines.dy / JOB->res.lines.dx;
|
||||
/* subtract the base margin. */
|
||||
indent -= left_margin;
|
||||
if (JOB->cfg.out_format==XML) { /* subject of change */
|
||||
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
|
||||
/* output lot of usefull information for XML filter */
|
||||
sprintf(s1," <space x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" />\n",
|
||||
box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
} else
|
||||
for (i = indent / JOB->res.avX; i > 0; i--) {
|
||||
buffer=append_to_line(buffer," ",&len); j++;
|
||||
}
|
||||
}
|
||||
if (JOB->cfg.out_format==XML) { /* subject of change */
|
||||
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
|
||||
/* output lot of usefull information for XML filter */
|
||||
sprintf(s1," <box x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" value=\"",
|
||||
box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
if (box2->num_ac>1) { /* ToDo: output a list of alternatives */
|
||||
}
|
||||
}
|
||||
if (box2->c != UNKNOWN && box2->c != 0) {
|
||||
buffer=
|
||||
append_to_line(buffer,decode(box2->c,JOB->cfg.out_format),&len);
|
||||
if (box2->c > ' ' &&
|
||||
box2->c <= 'z') i2++; /* count non-space chars */
|
||||
} else { /* c == UNKNOWN or 0 */
|
||||
wchar_t cc; cc=box2->c;
|
||||
if (box2->num_ac>0 && box2->tas[0]
|
||||
&& (JOB->cfg.out_format!=XML || box2->tas[0][0]!='<')) {
|
||||
/* output glued chars or ... (?) Jan08 */
|
||||
buffer=append_to_line(buffer,box2->tas[0],&len);
|
||||
j+=strlen(box2->tas[0]);
|
||||
} else { /* ToDo: leave string empty? set placeholder per option */
|
||||
/* output dummy string to mark UNKNOWN */
|
||||
if(JOB->cfg.unrec_marker[0])
|
||||
buffer = append_to_line(buffer, JOB->cfg.unrec_marker, &len);
|
||||
}
|
||||
}
|
||||
if (JOB->cfg.out_format==XML) {
|
||||
if (box2->num_ac>0) {
|
||||
/* output alist ToDo: separate <altbox ...> */
|
||||
int i1; char s1[256];
|
||||
sprintf(s1,"\" numac=\"%d\" weights=\"",box2->num_ac);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
for (i1=0;i1<box2->num_ac;i1++) {
|
||||
sprintf(s1,"%d",box2->wac[i1]);
|
||||
buffer=append_to_line(buffer,s1,&len);
|
||||
if (i1+1<box2->num_ac) buffer=append_to_line(buffer,",",&len);
|
||||
}
|
||||
if (box2->num_ac>1)
|
||||
buffer=append_to_line(buffer,"\" achars=\"",&len);
|
||||
for (i1=1;i1<box2->num_ac;i1++) {
|
||||
if (box2->tas[i1] && box2->tas[i1][0]!='<')
|
||||
buffer=append_to_line(buffer,box2->tas[i1],&len);
|
||||
else
|
||||
buffer=append_to_line(buffer,
|
||||
decode(box2->tac[i1],JOB->cfg.out_format),&len);
|
||||
// ToDo: add tas[] (achars->avalues or alternate_strings?
|
||||
if (i1+1<box2->num_ac) buffer=append_to_line(buffer,",",&len);
|
||||
}
|
||||
}
|
||||
buffer=append_to_line(buffer,"\" />\n",&len);
|
||||
}
|
||||
if (box2->num_ac && box2->tas[0]) {
|
||||
if (box2->tas[0][0]=='<') { /* output special XML object */
|
||||
buffer=append_to_line(buffer,box2->tas[0],&len);
|
||||
buffer=append_to_line(buffer,"\n",&len);
|
||||
j+=strlen(box2->tas[0]);
|
||||
}
|
||||
}
|
||||
j++; /* number of chars in line */
|
||||
}
|
||||
i++;
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if (JOB->cfg.out_format==XML && oldline>-1) { /* subject of change */
|
||||
buffer=append_to_line(buffer,"</line>\n",&len);
|
||||
}
|
||||
if (JOB->cfg.out_format==XML) { /* subject of change */
|
||||
buffer=append_to_line(buffer,"</block>\n</page>\n",&len);
|
||||
}
|
||||
|
||||
/* do not forget last line */
|
||||
// is there no \n in the last line? If there is, delete next line.
|
||||
list_app( &(JOB->res.linelist), (void *)strdup(buffer) );
|
||||
free(buffer);
|
||||
if( JOB->cfg.verbose&1 )
|
||||
fprintf(stderr,"... %d lines, boxes= %d, chars= %d\n",i,i1,i2);
|
||||
}
|
||||
334
ActiveX/ASCOfficeUtils/GOCR/src/list.c
Normal file
334
ActiveX/ASCOfficeUtils/GOCR/src/list.c
Normal file
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2006 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for email address
|
||||
|
||||
***********************************IMPORTANT*********************************
|
||||
Notes to the developers: read the following notes before using these
|
||||
functions.
|
||||
* Be careful when using for_each_data() recursively and calling list_del.
|
||||
It may mangle with the current[] pointers, and possibly segfault or do an
|
||||
unpredictable or just undesirable behavior. We have been working on a
|
||||
solution for this problem, and solved some of the biggest problems.
|
||||
In a few words, the problem is this: when you delete a node, it may be
|
||||
the current node of a lower level loop. The current code takes care of
|
||||
access to previous/next elements of the now defunct node. So, if you do
|
||||
something like:
|
||||
|
||||
for_each_data(l) {
|
||||
for_each_data(l) {
|
||||
list_del(l, header_data);
|
||||
free(header_data);
|
||||
} end_for_each(l);
|
||||
+ tempnode = list_cur_next(l);
|
||||
} end_for_each(l);
|
||||
|
||||
It will work, even though the current node in the outer loop was deleted.
|
||||
However, if you replace the line marked with + with the following code:
|
||||
|
||||
tempnode = list_next(l, list_get_current(l));
|
||||
|
||||
it will break, since list_get_current is likely to return NULL or garbage,
|
||||
since you deleted header_data().
|
||||
Conclusion: use list_del carefully. The best way to avoid this problem is
|
||||
to not use list_del inside a big stack of loops.
|
||||
* If you have two elements with the same data, the functions will assume
|
||||
that the first one is the wanted one. Not a bug, a feature. ;-)
|
||||
* avoid calling list_prev and list_next. They are intensive and slow
|
||||
functions. Keep the result in a variable or, if you need something more,
|
||||
use list_get_element_from_data.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "list.h"
|
||||
#include "progress.h"
|
||||
|
||||
void list_init( List *l ) {
|
||||
if ( !l )
|
||||
return;
|
||||
|
||||
l->start.next = &l->stop;
|
||||
l->stop.previous = &l->start;
|
||||
l->start.previous = l->stop.next = NULL;
|
||||
l->start.data = l->stop.data = NULL;
|
||||
l->current = NULL;
|
||||
l->level = -1;
|
||||
l->n = 0;
|
||||
}
|
||||
|
||||
/* inserts data before data_after. If data_after == NULL, appends.
|
||||
Returns 1 on error, 0 if OK. */
|
||||
int list_ins( List *l, void *data_after, void *data) {
|
||||
Element *e, *after_element;
|
||||
|
||||
/* test arguments */
|
||||
if ( !l || !data )
|
||||
return 1;
|
||||
|
||||
if ( !data_after || !l->n )
|
||||
return list_app(l, data);
|
||||
|
||||
/* get data_after element */
|
||||
if ( !(after_element = list_element_from_data(l, data_after)) )
|
||||
return 1;
|
||||
|
||||
/* alloc a new element */
|
||||
if( !(e = (Element *)malloc(sizeof(Element))) )
|
||||
return 1;
|
||||
e->data = data;
|
||||
e->next = after_element;
|
||||
e->previous = after_element->previous;
|
||||
after_element->previous->next = e;
|
||||
after_element->previous = e;
|
||||
l->n++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* appends data to the list. Returns 1 on error, 0 if OK. */
|
||||
/* same as list_ins(l,NULL,data) ??? */
|
||||
int list_app( List *l, void *data ) {
|
||||
Element *e;
|
||||
|
||||
if ( !l || !data )
|
||||
return 1;
|
||||
if ( !(e = (Element *)malloc(sizeof(Element))) )
|
||||
return 1;
|
||||
|
||||
e->data = data;
|
||||
e->previous = l->stop.previous;
|
||||
e->next = l->stop.previous->next;
|
||||
l->stop.previous->next = e;
|
||||
l->stop.previous = e;
|
||||
l->n++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* returns element associated with data. */
|
||||
Element *list_element_from_data( List *l, void *data ) {
|
||||
Element *temp;
|
||||
|
||||
if ( !l || !data || !l->n)
|
||||
return NULL;
|
||||
|
||||
temp = l->start.next;
|
||||
|
||||
while ( temp->data != data ) {
|
||||
if ( !temp || temp==&l->stop )
|
||||
return NULL;
|
||||
temp = temp->next;
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* deletes (first) element with data from list. User must free data.
|
||||
Returns 0 if OK, 1 on error.
|
||||
This is the internal version, that shouldn't be called usually. Use the
|
||||
list_del() macro instead.
|
||||
*/
|
||||
int list_del( List *l, void *data ) {
|
||||
Element *temp;
|
||||
int i;
|
||||
|
||||
if (!data) return 1; /* do not delete start or stop element */
|
||||
|
||||
/* find element associated with data */
|
||||
if ( !(temp = list_element_from_data(l, data)) )
|
||||
return 1;
|
||||
|
||||
/* test if the deleted node is current in some nested loop, and fix it. */
|
||||
for ( i = l->level; i >= 0; i-- ) {
|
||||
if ( l->current[i] == temp ) {
|
||||
l->current[i] = temp->previous;
|
||||
}
|
||||
}
|
||||
|
||||
temp->previous->next = temp->next;
|
||||
temp->next->previous = temp->previous;
|
||||
temp->previous = temp->next = NULL; /* mark as freed */
|
||||
/*
|
||||
fprintf(stderr,"\n# list_del=%p start=%p stop=%p",temp,&l->start,&l->stop);
|
||||
*/
|
||||
|
||||
/* and free stuff */
|
||||
free(temp); /* element pointing to data, fixed mem-leak 0.41 */
|
||||
l->n--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* frees list. See also list_and_data_free() */
|
||||
void list_free( List *l ) {
|
||||
Element *temp, *temp2;
|
||||
|
||||
if ( !l || !l->n )
|
||||
return;
|
||||
|
||||
if ( l->current ) {
|
||||
free(l->current);
|
||||
}
|
||||
l->current = NULL;
|
||||
|
||||
temp = l->start.next;
|
||||
while ( temp && temp!=&l->stop) {
|
||||
temp2 = temp->next;
|
||||
free(temp);
|
||||
temp = temp2;
|
||||
}
|
||||
l->start.next = &l->stop;
|
||||
l->stop.previous = &l->start;
|
||||
}
|
||||
|
||||
/* setup a new level of for_each */
|
||||
int list_higher_level( List *l ) {
|
||||
Element **newcur;
|
||||
|
||||
if ( !l ) return(1);
|
||||
|
||||
/*
|
||||
Security-check: NULL pointer passed to realloc.
|
||||
ANSI allows this, but it may cause portability problems.
|
||||
*/
|
||||
newcur = (Element **)realloc(l->current, (l->level+2)*sizeof(Element *));
|
||||
if (newcur) {
|
||||
l->current = newcur;
|
||||
l->level++;
|
||||
l->current[l->level] = l->start.next;
|
||||
}
|
||||
g_debug(fprintf(stderr, " level++=%d current[]=%p\n",
|
||||
l->level, l->current);)
|
||||
if ( !newcur ) {
|
||||
fprintf(stderr, " realloc failed! abort\n"); return(1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void list_lower_level( List *l ) {
|
||||
if ( !l )
|
||||
return;
|
||||
|
||||
if (!l->level) {
|
||||
free(l->current); /* calm -lefence */
|
||||
l->current = NULL; /* could be important */
|
||||
} else {
|
||||
l->current = (Element **)realloc(l->current, l->level*sizeof(Element *));
|
||||
}
|
||||
l->level--;
|
||||
g_debug(fprintf(stderr, " level--=%d current[]=%p\n", l->level,
|
||||
l->current);)
|
||||
}
|
||||
|
||||
/* returns the next item data */
|
||||
void *list_next( List *l, void *data ) {
|
||||
Element *temp;
|
||||
|
||||
if ( !l || !(temp = list_element_from_data(l, data)) )
|
||||
return NULL;
|
||||
if( !temp->next ) return NULL;
|
||||
return (temp->next->data);
|
||||
}
|
||||
|
||||
/* returns the previous item data */
|
||||
void *list_prev( List *l, void *data ) {
|
||||
Element *temp;
|
||||
|
||||
if ( !l || !(temp = list_element_from_data(l, data)) )
|
||||
return NULL;
|
||||
if( !temp->previous ) return NULL;
|
||||
return (temp->previous->data);
|
||||
}
|
||||
|
||||
/* Similar to qsort. Sorts list, using the (*compare) function, which is
|
||||
provided by the user. The comparison function must return an integer less
|
||||
than, equal to, or greater than zero if the first argument is considered to
|
||||
be respectively less than, equal to, or greater than the second.
|
||||
Uses the bubble sort algorithm.
|
||||
*/
|
||||
void list_sort( List *l, int (*compare)(const void *, const void *) ) {
|
||||
Element *temp, *prev;
|
||||
int i, sorted;
|
||||
progress_counter_t *pc = NULL;
|
||||
|
||||
if ( !l )
|
||||
return;
|
||||
|
||||
/* start progress meter, sorting is slow for huge number of elements */
|
||||
/* l->n is the worst case, real time is less or equal estimated time */
|
||||
pc = open_progress(l->n,"list_sort");
|
||||
|
||||
for (i = 0; i < l->n; i++ ) {
|
||||
sorted = 1; /* Flag for early break */
|
||||
for ( temp = l->start.next->next;
|
||||
temp != NULL && temp != &l->stop; temp = temp->next ) {
|
||||
if ( temp->previous == &l->start ) continue;
|
||||
if ( compare((const void *)temp->previous->data,
|
||||
(const void *)temp->data) > 0 ) {
|
||||
|
||||
sorted = 0; /* rest flag */
|
||||
/* swap with the previous node */
|
||||
prev = temp->previous;
|
||||
prev->previous->next = temp;
|
||||
temp->next->previous = prev;
|
||||
temp->previous = prev->previous;
|
||||
prev->next = temp->next;
|
||||
prev->previous = temp;
|
||||
temp->next = prev;
|
||||
/* and make sure the node in the for loop is correct */
|
||||
temp = prev;
|
||||
|
||||
#ifdef SLOWER_BUT_KEEP_BY_NOW
|
||||
/* this is a slower version, but guaranteed to work */
|
||||
void *data;
|
||||
|
||||
data = temp->data;
|
||||
prev = temp->previous;
|
||||
list_del(l, data);
|
||||
list_ins(l, prev->data, data);
|
||||
temp = prev;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (sorted) break;
|
||||
progress(i,pc); /* progress meter */
|
||||
}
|
||||
|
||||
close_progress(pc);
|
||||
g_debug(fprintf(stderr, "list_sort()\n");)
|
||||
}
|
||||
|
||||
/* calls free_data() for each data in list l,
|
||||
* before free list with list_free() */
|
||||
int list_and_data_free( List *l, void (*free_data)(void *data)) {
|
||||
void *data;
|
||||
|
||||
if ( !l ) return 0;
|
||||
if ( !free_data ) return 1;
|
||||
|
||||
for_each_data(l) {
|
||||
if ((data = list_get_current(l)))
|
||||
free_data(data);
|
||||
} end_for_each(l);
|
||||
|
||||
list_free(l);
|
||||
|
||||
g_debug(fprintf(stderr, "list_and_data_free()\n");)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
6756
ActiveX/ASCOfficeUtils/GOCR/src/ocr0.c
Normal file
6756
ActiveX/ASCOfficeUtils/GOCR/src/ocr0.c
Normal file
File diff suppressed because it is too large
Load Diff
1475
ActiveX/ASCOfficeUtils/GOCR/src/ocr0n.c
Normal file
1475
ActiveX/ASCOfficeUtils/GOCR/src/ocr0n.c
Normal file
File diff suppressed because it is too large
Load Diff
85
ActiveX/ASCOfficeUtils/GOCR/src/ocr1.c
Normal file
85
ActiveX/ASCOfficeUtils/GOCR/src/ocr1.c
Normal file
@@ -0,0 +1,85 @@
|
||||
// test routines - faster to compile
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "pgm2asc.h"
|
||||
#include "unicode.h"
|
||||
#include "amiga.h"
|
||||
#include "gocr.h"
|
||||
|
||||
// for learn_mode/analyze_mode high, with, yoffset, num of pattern_i,
|
||||
// - holes (center,radius in relative coordinates) etc. => cluster analyze
|
||||
// num_hole => min-volume, tolerance border
|
||||
// pattern: @@ @. @@
|
||||
// .@ @. ..
|
||||
// regular filter for large resolutions to make edges more smooth (on boxes)
|
||||
// extra-filter (only if not recognized?)
|
||||
// map + same color to (#==change)
|
||||
// - anti color
|
||||
// . not used
|
||||
// strongest neighbour pixels (3x3) => directions
|
||||
// second/third run with more and more tolerance!?
|
||||
|
||||
/* FIXME jb: following is unused */
|
||||
#if 0
|
||||
struct lobj { // line-object (for fitting to near lines)
|
||||
int x0,y0; // starting point (left up)
|
||||
int x1,y1; // end point (right down)
|
||||
int mt; // minimum thickness
|
||||
int q; // quality, overlapp
|
||||
};
|
||||
|
||||
/* FIXME jb global */
|
||||
struct lobj obj1;
|
||||
#endif
|
||||
|
||||
// that is the first draft of feature extraction
|
||||
// detect main lines and bows
|
||||
// seems bad implemented, looking for better algorithms (ToDo: use autotrace)
|
||||
#define MAXL 10
|
||||
void ocr2(pix *b,int cs){
|
||||
int x1,y1,x2,y2,l,i,j,xa[MAXL],ya[MAXL],xb[MAXL],yb[MAXL],ll[MAXL];
|
||||
for(i=0;i<MAXL;i++)xa[i]=ya[i]=xb[i]=yb[i]=ll[i]=0;
|
||||
for(x1=0;x1<b->x;x1++) // very slowly, but simple to program
|
||||
for(y1=0;y1<b->y;y1++) // brute force
|
||||
for(x2=0;x2<b->x;x2++)
|
||||
for(y2=y1+1;y2<b->y;y2++)
|
||||
{
|
||||
if( get_line2(x1,y1,x2,y2,b,cs,100)>99 )
|
||||
{ // line ???
|
||||
l=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1); // len
|
||||
for(i=0;i<MAXL;i++)
|
||||
{ // remove similar lines (same middle point) IMPROVE IT !!!!!! ???
|
||||
if(
|
||||
abs(x1+x2-xa[i]-xb[i])<1+b->x/2
|
||||
&& abs(y1+y2-ya[i]-yb[i])<1+b->y/2
|
||||
&& abs(y1-ya[i])<1+b->y/4
|
||||
&& abs(x1-xa[i])<1+b->x/4
|
||||
)
|
||||
{
|
||||
if( l>ll[i] )
|
||||
{
|
||||
for(j=i;j<MAXL-1;j++)
|
||||
{ // shift table
|
||||
xa[j]=xa[j+1];ya[j]=ya[j+1];
|
||||
xb[j]=xb[j+1];yb[j]=yb[j+1];ll[j]=ll[j+1];
|
||||
}
|
||||
ll[MAXL-1]=0;
|
||||
}
|
||||
else break; // forget it if shorter
|
||||
}
|
||||
if( l>ll[i] ){ // insert if larger
|
||||
for(j=MAXL-1;j>i;j--){ // shift table
|
||||
xa[j]=xa[j-1];ya[j]=ya[j-1];
|
||||
xb[j]=xb[j-1];yb[j]=yb[j-1];ll[j]=ll[j-1];
|
||||
}
|
||||
xa[i]=x1;ya[i]=y1;xb[i]=x2;yb[i]=y2;ll[i]=l;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for(i=0;i<MAXL;i++){
|
||||
printf(" %2d %2d %2d %2d %3d\n",xa[i],ya[i],xb[i],yb[i],ll[i]);
|
||||
}
|
||||
}
|
||||
|
||||
289
ActiveX/ASCOfficeUtils/GOCR/src/otsu.c
Normal file
289
ActiveX/ASCOfficeUtils/GOCR/src/otsu.c
Normal file
@@ -0,0 +1,289 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
the following code was send by Ryan Dibble <dibbler@umich.edu>
|
||||
|
||||
The algorithm is very simple but works good hopefully.
|
||||
|
||||
Compare the grayscale histogram with a mass density diagram:
|
||||
I think the algorithm is a kind of
|
||||
divide a body into two parts in a way that the mass
|
||||
centers have the largest distance from each other,
|
||||
the function is weighted in a way that same masses have a advantage
|
||||
|
||||
- otsu algorithm is failing on diskrete multi color images
|
||||
|
||||
TODO:
|
||||
RGB: do the same with all colors (CMYG?) seperately
|
||||
|
||||
test: hardest case = two colors
|
||||
bbg: test done, using a two color gray file. Output:
|
||||
# threshold: Value = 43 gmin=43 gmax=188
|
||||
|
||||
my changes:
|
||||
- float -> double
|
||||
- debug option added (vvv & 1..2)
|
||||
- **image => *image, &image[i][1] => &image[i*cols+1]
|
||||
- do only count pixels near contrast regions
|
||||
this makes otsu much better for shadowed fonts or multi colored text
|
||||
on white background
|
||||
|
||||
(m) Joerg Schulenburg (see README for email address)
|
||||
|
||||
ToDo:
|
||||
- measure contrast
|
||||
- detect low-contrast regions
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define Abs(x) ((x<0)?-(x):x)
|
||||
|
||||
/*======================================================================*
|
||||
* global thresholding routine *
|
||||
* takes a 2D unsigned char array pointer, number of rows, and *
|
||||
* number of cols in the array. returns the value of the threshold *
|
||||
* x0,y0,x0+dx,y0+dy are the edgepoints of the interesting region *
|
||||
* vvv is the verbosity for debugging purpose *
|
||||
*======================================================================*/
|
||||
int
|
||||
otsu (unsigned char *image, int rows, int cols,
|
||||
int x0, int y0, int dx, int dy, int vvv) {
|
||||
|
||||
unsigned char *np; // pointer to position in the image we are working with
|
||||
unsigned char op1, op2; // predecessor of pixel *np (start value)
|
||||
int maxc=0; // maximum contrast (start value)
|
||||
int thresholdValue=1; // value we will threshold at
|
||||
int ihist[256]; // image histogram
|
||||
int chist[256]; // contrast histogram
|
||||
|
||||
int i, j, k; // various counters
|
||||
int is, i1, i2, ns, n1, n2, gmin, gmax;
|
||||
double m1, m2, sum, csum, fmax, sb;
|
||||
|
||||
// zero out histogram ...
|
||||
memset(ihist, 0, sizeof(ihist));
|
||||
memset(chist, 0, sizeof(chist));
|
||||
op1=op2=0;
|
||||
|
||||
gmin=255; gmax=0; k=dy/512+1;
|
||||
// v0.43 first get max contrast, dont do it together with next step
|
||||
// because it failes if we have pattern as background (on top)
|
||||
for (i = 0; i < dy ; i+=k) {
|
||||
np = &image[(y0+i)*cols+x0];
|
||||
for (j = 0; j < dx ; j++) {
|
||||
ihist[*np]++;
|
||||
if(*np > gmax) gmax=*np;
|
||||
if(*np < gmin) gmin=*np;
|
||||
if (Abs(*np-op1)>maxc) maxc=Abs(*np-op1); /* new maximum contrast */
|
||||
if (Abs(*np-op2)>maxc) maxc=Abs(*np-op2); /* new maximum contrast */
|
||||
/* we hope that maxc will be find its maximum very fast */
|
||||
op2=op1; /* shift old pixel to next older */
|
||||
op1=*np; /* store old pixel for contrast check */
|
||||
np++; /* next pixel */
|
||||
}
|
||||
}
|
||||
|
||||
// generate the histogram
|
||||
// Aug06 images with large white or black homogeneous
|
||||
// areas give bad results, so we only add pixels on contrast edges
|
||||
for (i = 0; i < dy ; i+=k) {
|
||||
np = &image[(y0+i)*cols+x0];
|
||||
for (j = 0; j < dx ; j++) {
|
||||
if (Abs(*np-op1)>maxc/4
|
||||
|| Abs(*np-op2)>maxc/4)
|
||||
chist[*np]++; // count only relevant pixels
|
||||
op2=op1; /* shift old pixel to next older */
|
||||
op1=*np; /* store old pixel for contrast check */
|
||||
np++; /* next pixel */
|
||||
}
|
||||
}
|
||||
|
||||
// set up everything
|
||||
sum = csum = 0.0;
|
||||
ns = 0;
|
||||
is = 0;
|
||||
|
||||
for (k = 0; k <= 255; k++) {
|
||||
sum += (double) k * (double) chist[k]; /* x*f(x) cmass moment */
|
||||
ns += chist[k]; /* f(x) cmass */
|
||||
is += ihist[k]; /* f(x) imass */
|
||||
// Debug: output to out_hist.dat?
|
||||
// fprintf(stderr,"\chistogram %3d %6d (brightness weight)", k, ihist[k]);
|
||||
}
|
||||
|
||||
if (!ns) {
|
||||
// if n has no value we have problems...
|
||||
fprintf (stderr, "NOT NORMAL, thresholdValue = 160\n");
|
||||
return (160);
|
||||
}
|
||||
|
||||
// ToDo: only care about extremas in a 3 pixel environment
|
||||
// check if there are more than 2 mass centers (more colors)
|
||||
// return object colors and color radius instead of threshold value
|
||||
// also the reagion, where colored objects are found
|
||||
// what if more than one background color? no otsu at all?
|
||||
// whats background? box with lot of other boxes in it
|
||||
// threshold each box (examples/invers.png,colors.png)
|
||||
// get maximum white and minimum black pixel color (possible range)
|
||||
// check range between them for low..high contrast ???
|
||||
// typical scenes (which must be covered):
|
||||
// - white page with text of different colors (gray values)
|
||||
// - binear page: background (gray=1) + black text (gray=0)
|
||||
// - text mixed with big (dark) images
|
||||
// ToDo: recursive clustering for maximum multipol moments?
|
||||
// idea: normalize ihist to max=1024 before otsu?
|
||||
|
||||
// do the otsu global thresholding method
|
||||
|
||||
if ((vvv&1)) // Debug
|
||||
fprintf(stderr,"# threshold: value ihist chist mass_dipol_moment\n");
|
||||
fmax = -1.0;
|
||||
n1 = 0;
|
||||
for (k = 0; k < 255; k++) {
|
||||
n1 += chist[k]; // left mass (integration)
|
||||
if (!n1) continue; // we need at least one foreground pixel
|
||||
n2 = ns - n1; // right mass (num pixels - left mass)
|
||||
if (n2 == 0) break; // we need at least one background pixel
|
||||
csum += (double) k *chist[k]; // left mass moment
|
||||
m1 = csum / n1; // left mass center (black chars)
|
||||
m2 = (sum - csum) / n2; // right mass center (white background)
|
||||
// max. dipol moment?
|
||||
// orig: sb = (double) n1 *(double) n2 * (m1 - m2) * (m1 - m2);
|
||||
sb = (double) n1 *(double) n2 * (m2 - m1); // seems to be better Aug06
|
||||
/* bbg: note: can be optimized. */
|
||||
if (sb > fmax) {
|
||||
fmax = sb;
|
||||
thresholdValue = k + 1;
|
||||
// thresholdValue = (m1 + 3 * m2) / 4;
|
||||
}
|
||||
if ((vvv&1) && ihist[k]) // Debug
|
||||
fprintf(stderr,"# threshold: %3d %6d %6d %8.2f\n",
|
||||
k, ihist[k], chist[k],
|
||||
sb/(dx*dy)); /* normalized dipol moment */
|
||||
}
|
||||
// ToDo: error = left/right point where sb is 90% of maximum?
|
||||
// now we count all pixels for background detection
|
||||
i1 = 0;
|
||||
for (k = 0; k < thresholdValue; k++) {
|
||||
i1 += ihist[k]; // left mass (integration)
|
||||
}
|
||||
i2 = is - i1; // right mass (num pixels - left mass)
|
||||
|
||||
// at this point we have our thresholding value
|
||||
// black_char: value<cs, white_background: value>=cs
|
||||
|
||||
// can it happen? check for sureness
|
||||
if (thresholdValue > gmax) {
|
||||
fprintf(stderr,"# threshold: Value >gmax\n");
|
||||
thresholdValue = gmax;
|
||||
}
|
||||
if (thresholdValue <= gmin) {
|
||||
fprintf(stderr,"# threshold: Value<=gmin\n");
|
||||
thresholdValue = gmin+1;
|
||||
}
|
||||
|
||||
// debug code to display thresholding values
|
||||
if ( vvv & 1 )
|
||||
fprintf(stderr,"# threshold: Value = %d gmin=%d gmax=%d cmax=%d"
|
||||
" b/w= %d %d\n",
|
||||
thresholdValue, gmin, gmax, maxc, i1, i2);
|
||||
|
||||
// this is a primitive criteria for inversion and should be improved
|
||||
// old: i1 >= 4*i2, but 0811qemu1.png has a bit above 1/4
|
||||
if (2*i1 > 7*i2) { // more black than white, obviously black is background
|
||||
if ( vvv & 1 )
|
||||
fprintf(stderr,"# threshold: invert the image\n");
|
||||
// we do inversion here (no data lost)
|
||||
for (i = 0; i < dy ; i++) {
|
||||
np = &image[(y0+i)*cols+x0];
|
||||
for (j = 0; j < dx ; j++) {
|
||||
*np=255-*np;
|
||||
np++; /* next pixel */
|
||||
}
|
||||
}
|
||||
thresholdValue=255-thresholdValue+1;
|
||||
}
|
||||
|
||||
return(thresholdValue);
|
||||
/* range: 0 < thresholdValue <= 255, example: 1 on b/w images */
|
||||
/* 0..threshold-1 is foreground */
|
||||
/* threshold..255 is background */
|
||||
/* ToDo: min=blackmasscenter/2,thresh,max=(whitemasscenter+255)/2 */
|
||||
}
|
||||
|
||||
/*======================================================================*/
|
||||
/* thresholding the image (set threshold to 128+32=160=0xA0) */
|
||||
/* now we have a fixed thresholdValue good to recognize on gray image */
|
||||
/* - so lower bits can used for other things (bad design?) */
|
||||
/* ToDo: different foreground colors, gray on black/white background */
|
||||
/*======================================================================*/
|
||||
int
|
||||
thresholding (unsigned char *image, int rows, int cols,
|
||||
int x0, int y0, int dx, int dy, int thresholdValue) {
|
||||
|
||||
unsigned char *np; // pointer to position in the image we are working with
|
||||
|
||||
int i, j; // various counters
|
||||
int gmin=255,gmax=0;
|
||||
int nmin=255,nmax=0;
|
||||
|
||||
// calculate min/max (twice?)
|
||||
for (i = y0 + 1; i < y0 + dy - 1; i++) {
|
||||
np = &image[i*cols+x0+1];
|
||||
for (j = x0 + 1; j < x0 + dx - 1; j++) {
|
||||
if(*np > gmax) gmax=*np;
|
||||
if(*np < gmin) gmin=*np;
|
||||
np++; /* next pixel */
|
||||
}
|
||||
}
|
||||
|
||||
/* allowed_threshold=gmin+1..gmax v0.43 */
|
||||
if (thresholdValue<=gmin || thresholdValue>gmax){
|
||||
thresholdValue=(gmin+gmax+1)/2; /* range=0..1 -> threshold=1 */
|
||||
fprintf(stderr,"# thresholdValue out of range %d..%d, reset to %d\n",
|
||||
gmin, gmax, thresholdValue);
|
||||
}
|
||||
|
||||
/* b/w: min=0,tresh=1,max=1 v0.43 */
|
||||
// actually performs the thresholding of the image...
|
||||
// later: grayvalues should also be used, only rescaling threshold=160=0xA0
|
||||
// sometimes images have no contrast (thresholdValue == gmin)
|
||||
for (i = y0; i < y0+dy; i++) {
|
||||
np = &image[i*cols+x0];
|
||||
for (j = x0; j < x0+dx; j++) {
|
||||
*np = (unsigned char) (*np >= thresholdValue || thresholdValue == gmin ?
|
||||
(255-(gmax - *np)* 80/(gmax - thresholdValue + 1)) :
|
||||
( 0+(*np - gmin)*150/(thresholdValue - gmin )) );
|
||||
if(*np > nmax) nmax=*np;
|
||||
if(*np < nmin) nmin=*np;
|
||||
np++;
|
||||
}
|
||||
}
|
||||
|
||||
// fprintf(stderr,"# thresholding: nmin=%d nmax=%d\n", nmin, nmax);
|
||||
|
||||
return(128+32); // return the new normalized threshold value
|
||||
/* 0..159 is foreground */
|
||||
/* 160..255 is background */
|
||||
}
|
||||
|
||||
289
ActiveX/ASCOfficeUtils/GOCR/src/output.c
Normal file
289
ActiveX/ASCOfficeUtils/GOCR/src/output.c
Normal file
@@ -0,0 +1,289 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL address
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "unicode.h"
|
||||
#include "output.h"
|
||||
#include "pcx.h"
|
||||
#include "gocr.h" /* extern job_t JOB; */
|
||||
|
||||
/* function is only for debugging and for developing
|
||||
it prints out a part of pixmap b at point x0,y0 to stderr
|
||||
using dots .,; if no pixel, and @xoO for pixels
|
||||
modify n_run and print out what would happen on 2nd, 3th loop!
|
||||
new: output original and copied pixmap in the same figure
|
||||
*/
|
||||
void out_b(struct box *px, pix *b, int x0, int y0, int dx, int dy, int cs ){
|
||||
int x,y,x2,y2,yy0,tx,ty,n1,i;
|
||||
char c1, c2;
|
||||
yy0=y0;
|
||||
if (px) { /* overwrite rest of arguments */
|
||||
if (!b) {
|
||||
b=px->p;
|
||||
x0=px->x0; dx=px->x1-px->x0+1;
|
||||
y0=px->y0; dy=px->y1-px->y0+1; yy0=y0;
|
||||
}
|
||||
if(cs==0) cs=JOB->cfg.cs;
|
||||
fprintf(stderr,"\n# list box x= %4d %4d d= %3d %3d r= %3d %3d"
|
||||
" nrun=%d p=%p", /* ToDo: r,nrun is obsolete */
|
||||
px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
|
||||
px->x - px->x0, px->y - px->y0, JOB->tmp.n_run, (void*)px);
|
||||
fprintf(stderr,"\n# dots=%d boxes=%d subboxes=%d c=%s mod=%s"
|
||||
" line=%d m= %d %d %d %d",
|
||||
px->dots, px->num_boxes, px->num_subboxes,
|
||||
decode(px->c,ASCII), decode(px->modifier,ASCII), px->line,
|
||||
px->m1 - px->y0, px->m2 - px->y0, px->m3 - px->y0, px->m4 - px->y0);
|
||||
if (px->num_frames) {
|
||||
int i,j,jo;
|
||||
fprintf(stderr,"\n# frames= %d (sumvects=%d)",px->num_frames,
|
||||
((px->num_frames)?px->num_frame_vectors[px->num_frames-1]:-1));
|
||||
for (jo=j=i=0; i<px->num_frames; i++, jo=j) {
|
||||
fprintf(stderr,"\n# frame %d (%+4d,%3d,%2d) ",
|
||||
i, px->frame_vol[i], px->frame_per[i],
|
||||
px->num_frame_vectors[i]-jo);
|
||||
/* print only the first vectors of each frame */
|
||||
for (;j<px->num_frame_vectors[i] && j<MaxFrameVectors; j++)
|
||||
fprintf(stderr," #%02d %2d %2d", j,
|
||||
px->frame_vector[j][0] - px->x0,
|
||||
px->frame_vector[j][1] - px->y0);
|
||||
}
|
||||
}
|
||||
if (px->num_ac){ /* output table of chars and its probabilities */
|
||||
fprintf(stderr,"\n# list box char: ");
|
||||
for(i=0;i<px->num_ac && i<NumAlt;i++)
|
||||
/* output the (xml-)string (picture position, barcodes, glyphs, ...) */
|
||||
if (px->tas[i])
|
||||
fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
|
||||
else
|
||||
fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
if (px->m2 && px->m1<y0 && (px->dots || y0>px->m2)) {
|
||||
yy0=px->m1; dy=px->y1-yy0+1;
|
||||
}
|
||||
}
|
||||
tx=dx/80+1;
|
||||
ty=dy/40+1; /* step, usually 1, but greater on large maps */
|
||||
fprintf(stderr,"# list pattern x= %4d %4d d= %3d %3d t= %d %d yy0= %d\n",
|
||||
x0,y0,dx,dy,tx,ty,yy0);
|
||||
if (dx>0)
|
||||
for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
|
||||
/* first image is the copied and modified bitmap of the box */
|
||||
if (px)
|
||||
for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
|
||||
n1=0; c1='.';
|
||||
for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
|
||||
for(x2=x;x2<x+tx && x2<x0+dx;x2++)
|
||||
{
|
||||
if((getpixel(px->p,x2-x0+px->x0,
|
||||
y2-y0+px->y0)<cs)) c1='@';
|
||||
}
|
||||
if (px->num_frames) { /* mark vectors */
|
||||
int i;
|
||||
if (c1!='$' && c1!='S') /* dont mark twice */
|
||||
for (i=0;i<px->num_frame_vectors[px->num_frames-1];i++)
|
||||
if ((px->frame_vector[i][0]-px->x0)/tx==(x-x0)/tx
|
||||
&& (px->frame_vector[i][1]-px->y0)/ty==(y-y0)/ty)
|
||||
{ c1=((c1=='@')?'$':'S'); break; }
|
||||
}
|
||||
fprintf(stderr,"%c", c1 );
|
||||
}
|
||||
|
||||
/* 2nd image is the boxframe in the original bitmap */
|
||||
if (dx<40) fprintf(stderr," ");
|
||||
if (dx<40) /* do it only, if we have enough place */
|
||||
for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
|
||||
c1='.';
|
||||
for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
|
||||
for(x2=x;x2<x+tx && x2<x0+dx;x2++)
|
||||
{ if((getpixel(b,x2,y2)<cs)) c1='@'; }
|
||||
fprintf(stderr,"%c", c1 );
|
||||
}
|
||||
|
||||
c1=c2=' ';
|
||||
/* mark lines with < */
|
||||
if (px) if (y-y0+px->y0==px->m1 || y-y0+px->y0==px->m2
|
||||
|| y-y0+px->y0==px->m3 || y-y0+px->y0==px->m4) c1='<';
|
||||
if (y==y0 || y==yy0+dy-1) c2='-'; /* boxmarks */
|
||||
|
||||
fprintf(stderr,"%c%c\n",c1,c2);
|
||||
}
|
||||
}
|
||||
|
||||
/* same as out_b, but for faster use, only a box as argument
|
||||
*/
|
||||
void out_x(struct box *px) {
|
||||
out_b(px,NULL,0, 0, 0, 0, JOB->cfg.cs);
|
||||
}
|
||||
|
||||
|
||||
/* print out two boxes side by side, for debugging comparision algos */
|
||||
void out_x2(struct box *box1, struct box *box2){
|
||||
int x,y,i,tx,ty,dy;
|
||||
/*FIXME jb static*/static char *c1="OXXXXxx@.,,,,,,,";
|
||||
pix *b=&JOB->src.p;
|
||||
dy=(box1->y1-box1->y0+1);
|
||||
if(dy<box2->y1-box2->y0+1)dy=box2->y1-box2->y0+1;
|
||||
tx=(box1->x1-box1->x0)/40+1;
|
||||
ty=(box1->y1-box1->y0)/40+1; /* step, usually 1, but greater on large maps */
|
||||
if(box2)fprintf(stderr,"\n# list 2 patterns");
|
||||
for(i=0;i<dy;i+=ty) { /* reduce the output to max 78x40??? */
|
||||
fprintf(stderr,"\n"); y=box1->y0+i;
|
||||
for(x=box1->x0;x<=box1->x1;x+=tx)
|
||||
fprintf(stderr,"%c", c1[ ((getpixel(b,x,y)<JOB->cfg.cs)?0:8)+marked(b,x,y) ] );
|
||||
if(!box2) continue;
|
||||
fprintf(stderr," "); y=box2->y0+i;
|
||||
for(x=box2->x0;x<=box2->x1;x+=tx)
|
||||
fprintf(stderr,"%c", c1[ ((getpixel(b,x,y)<JOB->cfg.cs)?0:8)+marked(b,x,y) ] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ---- list output ---- for debugging ---
|
||||
* list all boxes where the results can be found within the c-option
|
||||
*/
|
||||
int output_list(job_t *job) {
|
||||
int i = 0, j;
|
||||
struct box *box2;
|
||||
pix *pp = &job->src.p;
|
||||
char *lc = job->cfg.lc;
|
||||
|
||||
fprintf(stderr,"\n# list shape for charlist %s",lc);
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *) list_get_current(&(JOB->res.boxlist));
|
||||
for (j=0; j<box2->num_ac; j++)
|
||||
if (!lc || (box2->tac[j] && strchr(lc, box2->tac[j]))
|
||||
|| (box2->tas[j] && strstr(lc, box2->tas[j]))) break;
|
||||
if (j<box2->num_ac)
|
||||
fprintf(stderr,"\n# box found in charlist");
|
||||
if (!lc || (strchr(lc, box2->c) && box2->c < 256 && box2->c)
|
||||
|| (strchr(lc, '_') && box2->c==UNKNOWN) /* for compability */
|
||||
|| j<box2->num_ac ){ /* also list alternative chars */
|
||||
if (!pp) pp=box2->p;
|
||||
fprintf(stderr,
|
||||
"\n# list shape %3d x=%4d %4d d= %3d %3d vf=%d ac=%d %04x %s",
|
||||
i, box2->x0, box2->y0,
|
||||
box2->x1 - box2->x0 + 1,
|
||||
box2->y1 - box2->y0 + 1,
|
||||
box2->num_frames, box2->num_ac,
|
||||
(int)box2->c, /* wchar_t -> char ???? */
|
||||
decode(box2->c,ASCII) );
|
||||
if (JOB->cfg.verbose & 4) out_x(box2);
|
||||
}
|
||||
i++;
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
fprintf(stderr,"\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* --- output of image incl. corored lines usefull for developers ---
|
||||
* debugging
|
||||
* bit 0+1 is used for color coding (optical marker)
|
||||
* color/gray: 0x01=red, 0x02=blue, 0x04=green???
|
||||
* opt: 1 - mark unknown boxes red (first pass)
|
||||
* 2 - mark unknown boxes more red (final pass)
|
||||
* 4 - mark lines blue
|
||||
* 8 - reset coloring (remove old marker)
|
||||
*/
|
||||
int debug_img(char *fname, struct job_s *job, int opt) {
|
||||
struct box *box2;
|
||||
int x, y, ic, dx, i, j, col;
|
||||
unsigned char *np;
|
||||
pix *pp = &job->tmp.ppo;
|
||||
|
||||
if ( opt & 8 ) { /* clear debug bits in image */
|
||||
for(y=0;y<pp->y;y++) {
|
||||
np=&pp->p[(pp->x)*y];
|
||||
for(x=0;x<pp->x;x++) {
|
||||
*np = *np & 0xF1;
|
||||
np++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* mark longest line which was used to estimate the rotation angle */
|
||||
if ((job->cfg.verbose&32) && job->res.lines.dx)
|
||||
for(i=0;i<pp->x;i++) {
|
||||
y=pp->y/2;
|
||||
if (job->res.lines.dx) y+=job->res.lines.dy*i/job->res.lines.dx;
|
||||
x=i;
|
||||
if (x<0 || x>=pp->x || y<0 || y>=pp->y) continue;
|
||||
np=&pp->p[x + (pp->x)*y];
|
||||
if (*np<160) continue;
|
||||
if((x&7)<5 && !(x&1)) /* dotted line */
|
||||
put(pp,x,y,255,8);
|
||||
}
|
||||
|
||||
ic = ((opt & 2) ? 1 : 2); /* obsolete */
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *) list_get_current(&(job->res.boxlist));
|
||||
/* mark boxes in 32=0x40=blue */
|
||||
if (box2->c == ' ' || box2->c == '\n') continue;
|
||||
/* mark chars with green left and under line */
|
||||
col = 4; /* green */
|
||||
if (box2->c == UNKNOWN && (opt & 3)) col=2; /* red */
|
||||
if (box2->x0>1)
|
||||
for (y = box2->y0; y <= box2->y1; y++) {
|
||||
np=&pp->p[box2->x0-1 + y * pp->x]; if (*np<160) continue; *np|=col; }
|
||||
if (box2->y1+1<pp->y)
|
||||
for (x = box2->x0; x <= box2->x1; x++) {
|
||||
np=&pp->p[x + (box2->y1+1) * pp->x]; if (*np<160) continue; *np|=col; }
|
||||
/* mark pictures by green cross */
|
||||
if (box2->c == PICTURE)
|
||||
for (x = 0; x < box2->x1-box2->x0+1; x++){
|
||||
y=(box2->y1-box2->y0+1)*x/(box2->x1-box2->x0+1);
|
||||
pp->p[(box2->x0+x) + (box2->y0+y) * pp->x] |= 4;
|
||||
pp->p[(box2->x1-x) + (box2->y0+y) * pp->x] |= 4;
|
||||
}
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
|
||||
if( opt & 4 )
|
||||
{
|
||||
struct tlines *lines = &job->res.lines;
|
||||
int yr;
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr, "# mark lines for %s.ppm\n", fname);
|
||||
/* or read from outside??? */
|
||||
for (i = 0; i < lines->num; i++) { /* mark lines by 0x08 = blue */
|
||||
dx = lines->x1[i] - lines->x0[i] + 1;
|
||||
for (j = -1; j < dx+1; j++) {
|
||||
x = lines->x0[i] + j;
|
||||
if (x<0 || x>=pp->x) continue;
|
||||
for (y=lines->m1[i];y<=lines->m4[i];y++) {
|
||||
/* box arround m2-m3 */
|
||||
if (y>=lines->m2[i] && y<=lines->m3[i] && j>-1 && j<dx) continue;
|
||||
yr = y; /* y.rotated */
|
||||
if (lines->dx) yr += lines->dy * x / (lines->dx);
|
||||
if (yr<0 || yr>=pp->y) continue;
|
||||
np = &(pp->p[x + (pp->x)*yr]);
|
||||
if (*np<160) continue; /* do not touch dark pixels */
|
||||
if ((*np&6)!=0) continue; /* only change white pixels */
|
||||
put(pp, x, yr, 255, 6); /* UPN: 255 and 6 or */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (job->cfg.verbose&1)
|
||||
fprintf(stderr,"# writing %s.ppm\n", fname);
|
||||
writeppm(fname, pp);
|
||||
return 0;
|
||||
}
|
||||
153
ActiveX/ASCOfficeUtils/GOCR/src/pcx.c
Normal file
153
ActiveX/ASCOfficeUtils/GOCR/src/pcx.c
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 1999 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
*/
|
||||
/* plan: use popen("ppm2pcx -packed ...","w"); for writing pcx */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
/* #include <assert.h> */
|
||||
|
||||
#include "pcx.h"
|
||||
|
||||
typedef unsigned char byte;
|
||||
|
||||
#define ERR(x) { fprintf(stderr,"ERROR "__FILE__" L%d: " x "\n",__LINE__);exit(1);}
|
||||
|
||||
int err;
|
||||
/* --- needed for reading PCX-files */
|
||||
unsigned char read_b(FILE *f1){
|
||||
unsigned char c=0; c=fgetc(f1); if(feof(f1) || ferror(f1))err=1; return c;
|
||||
}
|
||||
|
||||
/* something here is wrong! */
|
||||
void readpcx(char *name,pix *p,int vvv){ /* see pcx.format.txt */
|
||||
int page,pages,nx,ny,i,j,b,x,y,bpl,bits,pal[256][3];
|
||||
FILE *f1;
|
||||
unsigned char *pic,h[128],bb,b1,b2,b3;
|
||||
err=0;
|
||||
for(i=0;i<256;i++)for(j=0;j<3;j++)pal[i][j]=i;
|
||||
f1=fopen(name,"rb"); if(!f1) ERR("open");
|
||||
if(fread(h,1,128,f1)!=128)ERR("read PCX header"); /* 128 Byte lesen -> h[] */
|
||||
if(h[0]!=10)ERR("no ZSoft sign"); /* ZSoft sign */
|
||||
if(h[2]> 1)ERR("unknown coding"); /* run length encoding */
|
||||
bits = h[3]; /* 1 or 8 */
|
||||
if(bits!=1 && bits!=8)ERR("only 1 or 8 bits supported");
|
||||
nx = h[ 9]*256+h[ 8] - h[ 5]*256-h[ 4] +1; /* Xmax-Xmin */
|
||||
ny = h[11]*256+h[10] - h[ 7]*256-h[ 6] +1; /* Ymax-Ymin */
|
||||
pages=h[65]; bpl=h[66]+256*h[67]; /* bytes per line */
|
||||
if(vvv)
|
||||
fprintf(stderr,"# PCX version=%d bits=%d x=%d y=%d HRes=%d VRes=%d\n"
|
||||
"# NPlanes=%d BytesPerLine=%d Palette=%s",
|
||||
h[1],bits,nx,ny,h[12]+256*h[13],h[14]+256*h[15],
|
||||
pages,bpl,((h[68]==1)?"1=color/bw":"2=gray"));
|
||||
/* line1(NP=4): RRRRR...,GGGG....,BBBBB...,IIII...., line2: RRRR...,GGGG.... */
|
||||
/* C4 EF = (C4&3F)*EF = EF EF EF EF */
|
||||
fflush(stdout);
|
||||
/* palette: for(i=0;i<16;i++) for(j=0;j<3;j++) h[16+3*i+j] */
|
||||
if(pages>1)for(b=0;b<16;b++) for(i=0;i<16;i++)
|
||||
for(j=0;j< 3;j++) pal[b*16+i][j]=h[16+3*i+j]>>2;
|
||||
if(bits>7){
|
||||
fseek(f1,-3*256,2); if(fread(pal,3,256,f1)!=256)ERR("read palette");
|
||||
for(i=0;i<256;i++) for(j=0;j<3;j++) pal[i][j]>>=2;
|
||||
}
|
||||
fseek(f1,128,0);
|
||||
pic=(unsigned char *)malloc( nx*ny );
|
||||
if(pic==NULL)ERR("no memory"); /* no memory */
|
||||
x=y=0;
|
||||
do {
|
||||
for(page=0;page<pages;page++) /* 192 == 0xc0 => b1=counter */
|
||||
do {
|
||||
b1=1; bb=read_b(f1); b2=bb; if(b1==192)fprintf(stderr,"?");
|
||||
if((b2>=192) && (h[2]==1)){b1=b2&63;bb=read_b(f1);b2=bb;}
|
||||
if(err){fprintf(stderr,"\nread error x=%d y=%d\n",x,y);x=nx;y=ny;break;}
|
||||
for(b3=0;b3<b1;b3++)for(b=0;b<8;b+=bits,x++)if(x<nx){
|
||||
bb=(b2>>(8-bits-b)) & ~((~0)<<bits);
|
||||
if(bits==1 && bb==1) bb=240;
|
||||
if(page==0) pic[x+nx*y] =(byte)bb;
|
||||
else pic[x+nx*y]|=(byte)bb<<(page*bits);
|
||||
}
|
||||
} while(x<(9-bits)*bpl); x=0; y++;
|
||||
} while(y<ny);
|
||||
/* */
|
||||
fclose(f1);
|
||||
p->p=pic; p->x=nx; p->y=ny; p->bpp=1;
|
||||
if(vvv)fprintf(stderr,"\n");
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------------------
|
||||
// write bmp 8bit palette no RLE
|
||||
// bit 2+3 used for color coding (markers)
|
||||
// replaced by writeppm (ppm.gz) and is obsolate now, removed later
|
||||
*/
|
||||
void writebmp(char *name,pix p,int vvv){ /* see pcx.format.txt */
|
||||
int nx,ny,i,y,rest[4]={0,0,0,0};
|
||||
FILE *f1;
|
||||
/*FIXME jb static*/static unsigned char *pic, h[54+4*256];
|
||||
long fs,fo,hs,is; /* filesize, offset, headersize, imagesize */
|
||||
|
||||
nx=p.x; ny=p.y; pic=p.p;
|
||||
if (nx&3) nx+=4-(nx&3); /* must be mod4 ? */
|
||||
hs=40; /* bmi headersize fix */
|
||||
is=nx*ny; /* imagesize */
|
||||
fo=14+hs+4*256;
|
||||
fs=fo+is;
|
||||
for(i=0;i<54;i++){ h[i]=0; }
|
||||
/* BITMAPFILEHEADER */
|
||||
h[ 0]='B'; h[ 1]='M'; /* type of file BMP */
|
||||
h[ 2]= fs &255; h[ 3]=(fs>> 8)&255;
|
||||
h[ 4]=(fs>>16)&255; h[ 5]=(fs>>24)&255; /* size of file */
|
||||
h[10]= fo &255; h[11]=(fo>> 8)&255;
|
||||
h[12]=(fo>>16)&255; h[13]=(fo>>24)&255; /* offset to image data */
|
||||
/* BITMAPINFO (BITMAPCOREHEADER not used here) */
|
||||
/* 14 - HEADER */
|
||||
h[14]= hs &255; h[15]=(hs>> 8)&255;
|
||||
h[16]=(hs>>16)&255; h[17]=(hs>>24)&255; /* bmi-header size */
|
||||
h[18]= nx &255; h[19]=(nx>> 8)&255;
|
||||
h[20]=(0l>>16)&255; h[21]=(0l>>24)&255; /* WIDTH/pixel */
|
||||
h[22]= ny &255; h[23]=(ny>> 8)&255;
|
||||
h[24]=(0l>>16)&255; h[25]=(0l>>24)&255; /* HIGH/pixel */
|
||||
h[26]=1; /* planes */
|
||||
h[28]=8; /* bits/pixel 1,4,8,24 */
|
||||
h[30]=0; /* compression */
|
||||
h[34]= is &255; h[35]=(is>> 8)&255;
|
||||
h[36]=(is>>16)&255; h[37]=(is>>24)&255; /* sizeImage (can be 0 if ~RLE) */
|
||||
h[38]=0;h[39]=1; /* ca 100dpi, x/meter */
|
||||
h[42]=0;h[43]=1; /* y/meter */
|
||||
h[46]=0;h[47]=1; /* colorused (0=maximum) */
|
||||
h[50]=0;h[51]=1; /* colorimportand (0=all) */
|
||||
/* 54 - endofheader */
|
||||
for(i=0;i<256;i++){
|
||||
h[54+4*i+0]=((~((i & 2)*64)) & (i & (128+64)))|63;
|
||||
h[54+4*i+1]=((~((i & 2)*64)) & (~((i & 4)*32)) & (i & (128+64)))|63;
|
||||
h[54+4*i+2]=( ((i & 2)* 8) | ((~((i & 4)*32)) & (i & (128+64)))|63);
|
||||
} /* blue-green-red */
|
||||
f1=fopen(name,"wb"); if(!f1) fprintf(stderr," error opening file\n");
|
||||
if(!f1)ERR("open"); /* open-error */
|
||||
if(fwrite(h,1,54+4*256,f1)!=54+4*256)ERR("write head");
|
||||
if(vvv) fprintf(stderr,"# write BMP x=%d y=%d\n",nx,ny);
|
||||
for(y=ny-1;y>=0;y--){
|
||||
if(((int)fwrite(pic+p.x*y,1,p.x,f1))!=p.x)ERR("write");
|
||||
if(nx>p.x)
|
||||
if(((int)fwrite(rest,1,nx-p.x,f1))!=nx-p.x)ERR("write");
|
||||
}
|
||||
fclose(f1);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
2893
ActiveX/ASCOfficeUtils/GOCR/src/pgm2asc.c
Normal file
2893
ActiveX/ASCOfficeUtils/GOCR/src/pgm2asc.c
Normal file
File diff suppressed because it is too large
Load Diff
537
ActiveX/ASCOfficeUtils/GOCR/src/pixel.c
Normal file
537
ActiveX/ASCOfficeUtils/GOCR/src/pixel.c
Normal file
@@ -0,0 +1,537 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2006 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
Joerg.Schulenburg@physik.uni-magdeburg.de */
|
||||
|
||||
/* Filter by tree, filter by number methods added by
|
||||
* William Webber, william@williamwebber.com. */
|
||||
|
||||
#include "pgm2asc.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Defining this causes assert() calls to be turned off runtime.
|
||||
*
|
||||
* This is normally taken care of by make.
|
||||
*/
|
||||
/* #define NDEBUG */
|
||||
|
||||
// ------------------ (&~7)-pixmap-functions ------------------------
|
||||
|
||||
/* test if pixel marked?
|
||||
* Returns: 0 if not marked, least 3 bits if marked.
|
||||
*/
|
||||
int marked (pix * p, int x, int y) {
|
||||
if (x < 0 || y < 0 || x >= p->x || y >= p->y)
|
||||
return 0;
|
||||
return (pixel_atp(p, x, y) & 7);
|
||||
}
|
||||
|
||||
#define Nfilt3 6 /* number of 3x3 filter */
|
||||
/*
|
||||
* Filters to correct possible scanning or image errors.
|
||||
*
|
||||
* Each of these filters represents a 3x3 pixel area.
|
||||
* 0 represents a white or background pixel, 1 a black or
|
||||
* foreground pixel, and 2 represents a pixel of either value.
|
||||
* Note that this differs from the meaning of pixel values in
|
||||
* the image, where a high value means "white" (background),
|
||||
* and a low value means "black" (foreground).
|
||||
*
|
||||
* These filters are applied to the 3x3 environment of a pixel
|
||||
* to be retrieved from the image, centered around that pixel
|
||||
* (that is, the to-be-retrieved pixel corresponds with the
|
||||
* the fifth position of the filter).
|
||||
* If the filter matches that pixel environment, then
|
||||
* the returned value of the pixel is inverted (black->white
|
||||
* or white->black).
|
||||
*
|
||||
* So, for instance, the second filter below matches this
|
||||
* pattern:
|
||||
*
|
||||
* 000
|
||||
* X0X
|
||||
* 000
|
||||
*
|
||||
* and "fills in" the middle (retrieved) pixel to rejoin a line
|
||||
* that may have been broken by a scanning or image error.
|
||||
*/
|
||||
const char filt3[Nfilt3][9]={
|
||||
{0,0,0, 0,0,1, 1,0,0}, /* (-1,-1) (0,-1) (1,-1) (-1,0) (0,0) ... */
|
||||
{0,0,0, 1,0,1, 0,0,0},
|
||||
{1,0,0, 0,0,1, 0,0,0},
|
||||
{1,1,0, 0,1,0, 2,1,1},
|
||||
{0,0,1, 0,0,0, 2,1,0},
|
||||
{0,1,0, 0,0,0, 1,2,0}
|
||||
};
|
||||
/* 2=ignore_pixel, 0=white_background, 1=black_pixel */
|
||||
|
||||
|
||||
/*
|
||||
* Filter by matrix uses the above matrix of filters directly. Pixel
|
||||
* environments to be filtered are compared pixel by pixel against
|
||||
* these filters.
|
||||
*
|
||||
* Filter by number converts these filters into integer representations
|
||||
* and stores them in a table. Pixel environments are similarly
|
||||
* converted to integers, and looked up in the table.
|
||||
*
|
||||
* Filter by tree converts these filters into a binary tree. Pixel
|
||||
* environments are matched by traversing the tree.
|
||||
*
|
||||
* A typical performance ratio for these three methods is 20:9:7
|
||||
* respectively (i.e., the tree method takes around 35% of the
|
||||
* time of the matrix method).
|
||||
*/
|
||||
#define FILTER_BY_MATRIX 0
|
||||
#define FILTER_BY_NUMBER 1
|
||||
#define FILTER_BY_TREE 2
|
||||
|
||||
#define FILTER_METHOD FILTER_BY_TREE
|
||||
|
||||
/*
|
||||
* Defining FILTER_CHECKED causes filter results from either the tree
|
||||
* or the number method to be checked against results of the other
|
||||
* two methods to ensure correctness. This is for bug checking purposes
|
||||
* only.
|
||||
*/
|
||||
/* #define FILTER_CHECKED */
|
||||
|
||||
/*
|
||||
* Defining FILTER_STATISTICS causes statistics to be kept on how many
|
||||
* times the filters are tried, how many times a filter matches, and
|
||||
* of these matches how many flip a black pixel to white, and how many
|
||||
* the reverse. These statistics are printed to stderr at the end of
|
||||
* the program run. Currently, statistics are only kept if the tree
|
||||
* filter method is being used.
|
||||
*/
|
||||
/* #define FILTER_STATISTICS */
|
||||
|
||||
#ifdef FILTER_STATISTICS
|
||||
static int filter_tries = 0;
|
||||
static int filter_matches = 0;
|
||||
static int filter_blackened = 0;
|
||||
static int filter_whitened = 0;
|
||||
#endif
|
||||
|
||||
#ifdef FILTER_STATISTICS
|
||||
void print_filter_stats() {
|
||||
fprintf(stderr, "\n# Error filter statistics: tries %d, matches %d, "
|
||||
"blackened %d, whitened %d\n",
|
||||
filter_tries, filter_matches, filter_blackened, filter_whitened);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if FILTER_METHOD == FILTER_BY_MATRIX || defined(FILTER_CHECKED)
|
||||
/*
|
||||
* Filter the pixel at (x,y) by directly applying the matrix.
|
||||
*/
|
||||
int pixel_filter_by_matrix(pix * p, int x, int y) {
|
||||
int i;
|
||||
static char c33[9];
|
||||
memset(c33, 0, sizeof(c33));
|
||||
/* copy environment of a point (only highest bit)
|
||||
bbg: FASTER now. It has 4 ifs less at least, 8 at most. */
|
||||
if (x > 0) { c33[3] = pixel_atp(p,x-1, y )>>7;
|
||||
if (y > 0) c33[0] = pixel_atp(p,x-1,y-1)>>7;
|
||||
if (y+1 < p->y) c33[6] = pixel_atp(p,x-1,y+1)>>7;
|
||||
}
|
||||
if (x+1 < p->x) { c33[5] = pixel_atp(p,x+1, y )>>7;
|
||||
if (y > 0) c33[2] = pixel_atp(p,x+1,y-1)>>7;
|
||||
if (y+1 < p->y) c33[8] = pixel_atp(p,x+1,y+1)>>7;
|
||||
}
|
||||
if (y > 0) c33[1] = pixel_atp(p, x ,y-1)>>7;
|
||||
c33[4] = pixel_atp(p, x , y )>>7;
|
||||
if (y+1 < p->y) c33[7] = pixel_atp(p, x ,y+1)>>7;
|
||||
|
||||
/* do filtering */
|
||||
for (i = 0; i < Nfilt3; i++)
|
||||
if( ( (filt3[i][0]>>1) || c33[0]!=(1 & filt3[i][0]) )
|
||||
&& ( (filt3[i][1]>>1) || c33[1]!=(1 & filt3[i][1]) )
|
||||
&& ( (filt3[i][2]>>1) || c33[2]!=(1 & filt3[i][2]) )
|
||||
&& ( (filt3[i][3]>>1) || c33[3]!=(1 & filt3[i][3]) )
|
||||
&& ( (filt3[i][4]>>1) || c33[4]!=(1 & filt3[i][4]) )
|
||||
&& ( (filt3[i][5]>>1) || c33[5]!=(1 & filt3[i][5]) )
|
||||
&& ( (filt3[i][6]>>1) || c33[6]!=(1 & filt3[i][6]) )
|
||||
&& ( (filt3[i][7]>>1) || c33[7]!=(1 & filt3[i][7]) )
|
||||
&& ( (filt3[i][8]>>1) || c33[8]!=(1 & filt3[i][8]) ) ) {
|
||||
return ((filt3[i][4])?JOB->cfg.cs:0);
|
||||
}
|
||||
return pixel_atp(p, x, y) & ~7;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if FILTER_METHOD == FILTER_BY_NUMBER || defined(FILTER_CHECKED)
|
||||
|
||||
#define NUM_TABLE_SIZE 512 /* max value of 9-bit value */
|
||||
/*
|
||||
* Recursively generates entries in the number table for a matrix filter.
|
||||
*
|
||||
* gen_num_filt is the number representation of the matrix filter.
|
||||
* This generation is handled recursively because this is the easiest
|
||||
* way to handle 2 (either value) entries in the filter, which lead
|
||||
* to 2 distinct entries in the number table (one for each alternate
|
||||
* value).
|
||||
*/
|
||||
void rec_generate_number_table(char * num_table, const char * filter,
|
||||
int i, unsigned short gen_num_filt) {
|
||||
if (i == 9) {
|
||||
/* Invert the value of the number representation, to reflect the
|
||||
* fact that the "white" is 0 in the filter, 1 (high) in the image. */
|
||||
gen_num_filt = ~gen_num_filt;
|
||||
gen_num_filt &= 0x01ff;
|
||||
assert(gen_num_filt < NUM_TABLE_SIZE);
|
||||
num_table[gen_num_filt] = 1;
|
||||
} else {
|
||||
if (filter[i] == 0 || filter[i] == 2)
|
||||
rec_generate_number_table(num_table, filter, i + 1, gen_num_filt);
|
||||
if (filter[i] == 1 || filter[i] == 2) {
|
||||
gen_num_filt |= (1 << (8 - i));
|
||||
rec_generate_number_table(num_table, filter, i + 1, gen_num_filt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Filter the pixel at (x, y) using a number table.
|
||||
*
|
||||
* Each filter can be converted into a 9-bit representation, where
|
||||
* filters containing 2 (either value) pixels are converted into
|
||||
* a separate numerical representation for each pixel, where position
|
||||
* i in the filter corresponds to bit i in the number. Each resulting
|
||||
* numerical representation N is represented as a 1 value in the Nth
|
||||
* position of a lookup table. A pixel's environment is converted in
|
||||
* the same way to a numeric representation P, and that environment
|
||||
* matches a filter if num_table[P] == 1.
|
||||
*/
|
||||
int pixel_filter_by_number(pix * p, int x, int y) {
|
||||
unsigned short val = 0;
|
||||
static char num_table[NUM_TABLE_SIZE];
|
||||
static int num_table_generated = 0;
|
||||
if (!num_table_generated) {
|
||||
int f;
|
||||
memset(num_table, 0, sizeof(num_table));
|
||||
for (f = 0; f < Nfilt3; f++)
|
||||
rec_generate_number_table(num_table, filt3[f], 0, 0);
|
||||
num_table_generated = 1;
|
||||
}
|
||||
|
||||
/* calculate a numeric value for the 3x3 square around the pixel. */
|
||||
if (x > 0) { val |= (pixel_atp(p,x-1, y )>>7) << (8 - 3);
|
||||
if (y > 0) val |= (pixel_atp(p,x-1,y-1)>>7) << (8 - 0);
|
||||
if (y+1 < p->y) val |= (pixel_atp(p,x-1,y+1)>>7) << (8 - 6);
|
||||
}
|
||||
if (x+1 < p->x) { val |= (pixel_atp(p,x+1, y )>>7) << (8 - 5);
|
||||
if (y > 0) val |= (pixel_atp(p,x+1,y-1)>>7) << (8 - 2);
|
||||
if (y+1 < p->y) val |= (pixel_atp(p,x+1,y+1)>>7) << (8 - 8);
|
||||
}
|
||||
if (y > 0) val |= (pixel_atp(p, x ,y-1)>>7) << (8 - 1);
|
||||
val |= (pixel_atp(p, x , y )>>7) << (8 - 4);
|
||||
if (y+1 < p->y) val |= (pixel_atp(p, x ,y+1)>>7) << (8 - 7);
|
||||
assert(val < NUM_TABLE_SIZE);
|
||||
|
||||
if (num_table[val])
|
||||
return (val & (1 << 4)) ? 0 : JOB->cfg.cs;
|
||||
else
|
||||
return pixel_atp(p, x, y) & ~7;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if FILTER_METHOD == FILTER_BY_TREE || defined(FILTER_CHECKED)
|
||||
|
||||
#define TREE_ARRAY_SIZE 1024
|
||||
/* 1+ number of nodes in a complete binary tree of height 10 */
|
||||
|
||||
/*
|
||||
* Recursively generate a tree representation of a filter.
|
||||
*/
|
||||
void rec_generate_tree(char * tree, const char * filter, int i, int n) {
|
||||
assert(i >= 0 && i <= 9);
|
||||
assert(n < TREE_ARRAY_SIZE);
|
||||
if (i == 9) {
|
||||
if (filter[4] == 0)
|
||||
tree[n] = 2;
|
||||
else
|
||||
tree[n] = 1;
|
||||
return;
|
||||
}
|
||||
/* first iteration has n == -1, does not set any values of the tree,
|
||||
just to find whether to start to the left or the right */
|
||||
if (n != -1)
|
||||
tree[n] = 1;
|
||||
if (filter[i] == 0)
|
||||
rec_generate_tree(tree, filter, i + 1, n * 2 + 2);
|
||||
else if (filter[i] == 1)
|
||||
rec_generate_tree(tree, filter, i + 1, n * 2 + 3);
|
||||
else {
|
||||
rec_generate_tree(tree, filter, i + 1, n * 2 + 2);
|
||||
rec_generate_tree(tree, filter, i + 1, n * 2 + 3);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Filter the pixel at (x, y) using the tree method.
|
||||
*
|
||||
* Each filter is represented by a single branch of a binary
|
||||
* tree, except for filters contain "either value" entries, which
|
||||
* bifurcate at that point in the branch. Each white pixel in the filter
|
||||
* is a left branch in the tree, each black pixel a right branch. The
|
||||
* final node of a branch indicates whether this filter turns a white
|
||||
* pixel black, or a black one white.
|
||||
*
|
||||
* We match a pixel's environment against this tree by similarly
|
||||
* using the pixels in that environment to traverse the tree. If
|
||||
* we run out of nodes before getting to the end of a branch, then
|
||||
* the environment doesn't match against any of the filters represented
|
||||
* by the tree. Otherwise, we return the value specified by the
|
||||
* final node.
|
||||
*
|
||||
* Since the total tree size, even including missing nodes, is small
|
||||
* (2 ^ 10), we can use a standard array representation of a binary
|
||||
* tree, where for the node tree[n], the left child is tree[2n + 2],
|
||||
* and the right tree[2n + 3]. The only information we want
|
||||
* from a non-leaf node is whether it exists (that is, is part of
|
||||
* a filter-representing branch). We represent this with the value
|
||||
* 1 at the node's slot in the array, the contrary by 0. For the
|
||||
* leaf node, 0 again represents non-existence, 1 that the filter
|
||||
* represented by this branch turns a black pixel white, and 2 a
|
||||
* white pixel black.
|
||||
*/
|
||||
int pixel_filter_by_tree(pix * p, int x, int y) {
|
||||
static char tree[TREE_ARRAY_SIZE];
|
||||
static int tree_generated = 0;
|
||||
int n;
|
||||
int pixel_val = pixel_atp(p, x, y) & ~7;
|
||||
#ifdef FILTER_STATISTICS
|
||||
static int registered_filter_stats = 0;
|
||||
if (!registered_filter_stats) {
|
||||
atexit(print_filter_stats);
|
||||
registered_filter_stats = 1;
|
||||
}
|
||||
filter_tries++;
|
||||
#endif /* FILTER_STATISTICS */
|
||||
if (!tree_generated) {
|
||||
int f;
|
||||
memset(tree, 0, sizeof(tree));
|
||||
for (f = 0; f < Nfilt3; f++) {
|
||||
const char * filter = filt3[f];
|
||||
rec_generate_tree(tree, filter, 0, -1);
|
||||
}
|
||||
tree_generated = 1;
|
||||
}
|
||||
n = -1;
|
||||
|
||||
/* Note that for the image, low is black, high is white, whereas
|
||||
* for the filter, 0 is white, 1 is black. For the image, then,
|
||||
* high (white) means go left, low (black) means go right. */
|
||||
|
||||
#define IS_BLACK(_dx,_dy) !(pixel_atp(p, x + (_dx), y + (_dy)) >> 7)
|
||||
#define IS_WHITE(_dx,_dy) (pixel_atp(p, x + (_dx), y + (_dy)) >> 7)
|
||||
#define GO_LEFT n = n * 2 + 2
|
||||
#define GO_RIGHT n = n * 2 + 3
|
||||
#define CHECK_NO_MATCH if (tree[n] == 0) return pixel_val
|
||||
|
||||
/* Top row */
|
||||
if (y == 0) {
|
||||
/* top 3 pixels off edge == black == right
|
||||
n = 2 * (2 * (2 * -1 + 3) + 3) + 3 = 13 */
|
||||
n = 13;
|
||||
} else {
|
||||
if (x == 0 || IS_BLACK(-1, -1))
|
||||
GO_RIGHT;
|
||||
else
|
||||
GO_LEFT;
|
||||
|
||||
if (IS_WHITE(0, -1))
|
||||
GO_LEFT;
|
||||
else
|
||||
GO_RIGHT;
|
||||
CHECK_NO_MATCH;
|
||||
|
||||
if (x + 1 == p->x || IS_BLACK(+1, -1))
|
||||
GO_RIGHT;
|
||||
else
|
||||
GO_LEFT;
|
||||
CHECK_NO_MATCH;
|
||||
}
|
||||
|
||||
/* Second row */
|
||||
if (x == 0 || IS_BLACK(-1, 0))
|
||||
GO_RIGHT;
|
||||
else
|
||||
GO_LEFT;
|
||||
CHECK_NO_MATCH;
|
||||
|
||||
if (IS_WHITE(0, 0))
|
||||
GO_LEFT;
|
||||
else
|
||||
GO_RIGHT;
|
||||
CHECK_NO_MATCH;
|
||||
|
||||
if (x + 1 == p->x || IS_BLACK(+1, 0))
|
||||
GO_RIGHT;
|
||||
else
|
||||
GO_LEFT;
|
||||
CHECK_NO_MATCH;
|
||||
|
||||
/* bottom row */
|
||||
if (y + 1 == p->y) {
|
||||
/* bottom 3 pixels off edge == black == right
|
||||
n' = 2 * (2 * (2n + 3) + 3) + 3
|
||||
= 2 * (4n + 9) + 3
|
||||
= 8n + 21 */
|
||||
n = 8 * n + 21;
|
||||
} else {
|
||||
if (x == 0 || IS_BLACK(-1, +1))
|
||||
GO_RIGHT;
|
||||
else
|
||||
GO_LEFT;
|
||||
CHECK_NO_MATCH;
|
||||
|
||||
if (IS_WHITE(0, 1))
|
||||
GO_LEFT;
|
||||
else
|
||||
GO_RIGHT;
|
||||
CHECK_NO_MATCH;
|
||||
|
||||
if (x + 1 == p->x || IS_BLACK(+1, +1))
|
||||
GO_RIGHT;
|
||||
else
|
||||
GO_LEFT;
|
||||
}
|
||||
assert(n < TREE_ARRAY_SIZE);
|
||||
assert(tree[n] == 0 || tree[n] == 1 || tree[n] == 2);
|
||||
CHECK_NO_MATCH;
|
||||
#ifdef FILTER_STATISTICS
|
||||
filter_matches++;
|
||||
#endif
|
||||
if (tree[n] == 1) {
|
||||
#ifdef FILTER_STATISTICS
|
||||
if (pixel_atp(p, x, y) < JOB->cfg.cs)
|
||||
filter_whitened++;
|
||||
#endif
|
||||
return JOB->cfg.cs;
|
||||
} else {
|
||||
#ifdef FILTER_STATISTICS
|
||||
if (pixel_atp(p, x, y) >= JOB->cfg.cs)
|
||||
filter_blackened++;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif /* FILTER_METHOD == FILTER_BY_TREE */
|
||||
|
||||
/*
|
||||
* This simple filter attempts to correct "fax"-like scan errors.
|
||||
*/
|
||||
int pixel_faxfilter(pix *p, int x, int y) {
|
||||
int r; // filter
|
||||
r = pixel_atp(p,x,y)&~7;
|
||||
/* {2,2,2, 2,0,1, 2,1,0} */
|
||||
if ((r&128) && (~pixel_atp(p,x+1, y )&128)
|
||||
&& (~pixel_atp(p, x ,y+1)&128)
|
||||
&& ( pixel_atp(p,x+1,y+1)&128))
|
||||
r = 64; /* faxfilter */
|
||||
|
||||
else
|
||||
/* {2,2,2, 1,0,2, 0,1,2} */
|
||||
if ((r&128) && (~pixel_atp(p,x-1, y )&128)
|
||||
&& (~pixel_atp(p, x ,y+1)&128)
|
||||
&& ( pixel_atp(p,x-1,y+1)&128))
|
||||
r = 64; /* faxfilter */
|
||||
return r & ~7;
|
||||
}
|
||||
|
||||
#ifdef FILTER_CHECKED
|
||||
/*
|
||||
* Print out the 3x3 environment of a pixel as a 9-bit binary.
|
||||
*
|
||||
* For debugging purposes only.
|
||||
*/
|
||||
void print_pixel_env(FILE * out, pix * p, int x, int y) {
|
||||
int x0, y0;
|
||||
for (y0 = y - 1; y0 < y + 2; y0++) {
|
||||
for (x0 = x - 1; x0 < x + 2; x0++) {
|
||||
if (x0 < 0 || x0 >= p->x || y0 < 0 || y0 >= p->y)
|
||||
fputc('?', out);
|
||||
else if (pixel_atp(p, x0, y0) >> 7)
|
||||
fputc('0', out);
|
||||
else
|
||||
fputc('1', out);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* this function is heavily used
|
||||
* test if pixel was set, remove low bits (marks) --- later with error-correction
|
||||
* result depends on n_run, if n_run>0 filter are used
|
||||
* Returns: pixel-color (without marks)
|
||||
*/
|
||||
int getpixel(pix *p, int x, int y){
|
||||
if ( x < 0 || y < 0 || x >= p->x || y >= p->y )
|
||||
return 255 & ~7;
|
||||
|
||||
/* filter will be used only once later, when vectorization replaces pixel
|
||||
* processing
|
||||
*/
|
||||
if (JOB->tmp.n_run > 0) { /* use the filters (correction of errors) */
|
||||
#if FILTER_METHOD == FILTER_BY_NUMBER
|
||||
int pix = pixel_filter_by_number(p, x, y);
|
||||
#ifdef FILTER_CHECKED
|
||||
int pix2 = pixel_filter_by_matrix(p, x, y);
|
||||
if (pix != pix2) {
|
||||
fprintf(stderr,
|
||||
"# BUG: pixel_filter: by number: %d; by matrix: %d, "
|
||||
"by atp %d; env: ", pix, pix2, pixel_atp(p, x, y) & ~7);
|
||||
print_pixel_env(stderr, p, x, y);
|
||||
fputc('\n', stderr);
|
||||
}
|
||||
#endif /* FILTER_CHECKED */
|
||||
return pix;
|
||||
#elif FILTER_METHOD == FILTER_BY_MATRIX
|
||||
return pixel_filter_by_matrix(p, x, y);
|
||||
#elif FILTER_METHOD == FILTER_BY_TREE
|
||||
int pix = pixel_filter_by_tree(p, x, y);
|
||||
#ifdef FILTER_CHECKED
|
||||
int pix2 = pixel_filter_by_matrix(p, x, y);
|
||||
int pix3 = pixel_filter_by_number(p, x, y);
|
||||
if (pix != pix2 || pix != pix3) {
|
||||
fprintf(stderr,
|
||||
"# BUG: pixel_filter: tree: %d; matrix: %d, "
|
||||
"number: %d, atp %d; env: ", pix, pix2, pix3,
|
||||
pixel_atp(p, x, y) & ~7);
|
||||
print_pixel_env(stderr, p, x, y);
|
||||
fputc('\n', stderr);
|
||||
}
|
||||
#endif /* FILTER_CHECKED */
|
||||
return pix;
|
||||
#else
|
||||
#error FILTER_METHOD not defined
|
||||
#endif /* FILTER_BY_NUMBER */
|
||||
}
|
||||
|
||||
return (pixel_atp(p,x,y) & ~7);
|
||||
}
|
||||
|
||||
/* modify pixel, test if out of range */
|
||||
void put(pix * p, int x, int y, int ia, int io) {
|
||||
if (x < p->x && x >= 0 && y >= 0 && y < p->y)
|
||||
pixel_atp(p, x, y) = (pixel_atp(p, x, y) & ia) | io;
|
||||
}
|
||||
733
ActiveX/ASCOfficeUtils/GOCR/src/pnm.c
Normal file
733
ActiveX/ASCOfficeUtils/GOCR/src/pnm.c
Normal file
@@ -0,0 +1,733 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
|
||||
v0.1.0 initial version (stdin added)
|
||||
v0.2.0 popen added
|
||||
v0.2.7 review by Bruno Barberi Gnecco
|
||||
v0.39 autoconf
|
||||
v0.41 fix integer and heap overflow, change color output
|
||||
v0.46 fix blank spaces problem in filenames
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
/* #include <unistd.h> */
|
||||
#endif
|
||||
|
||||
/* Windows needs extra code to work fine, ^Z in BMP's will stop input else.
|
||||
* I do not have any idea when this text mode will be an advantage
|
||||
* but the MS community seems to like to do simple things in a complex way. */
|
||||
#if defined(O_BINARY) && (defined(__WIN32) || defined(__WIN32__)\
|
||||
|| defined(__WIN64) || defined(__WIN64__) || defined(__MSDOS__))
|
||||
# include <fcntl.h>
|
||||
# define SET_BINARY(_f) do {if (!isatty(_f)) setmode (_f, O_BINARY);} while (0)
|
||||
#else
|
||||
# define SET_BINARY(f) (void)0
|
||||
#endif
|
||||
|
||||
#include "pnm.h"
|
||||
#ifdef HAVE_PAM_H
|
||||
# include <pam.h>
|
||||
# include <sys/types.h>
|
||||
# include <sys/stat.h>
|
||||
# include <fcntl.h>
|
||||
#else
|
||||
# include <ctype.h>
|
||||
#endif
|
||||
|
||||
#define EE() fprintf(stderr,"\nERROR "__FILE__" L%d: ",__LINE__)
|
||||
#define E0(x0) {EE();fprintf(stderr,x0 "\n"); }
|
||||
#define F0(x0) {EE();fprintf(stderr,x0 "\n"); exit(1);}
|
||||
#define F1(x0,x1) {EE();fprintf(stderr,x0 "\n",x1); exit(1);}
|
||||
|
||||
/*
|
||||
* Weights to use for the different colours when converting a ppm
|
||||
* to greyscale. These weights should sum to 1.0
|
||||
*
|
||||
* The below values have been chosen to reflect the fact that paper
|
||||
* goes a reddish-yellow as it ages.
|
||||
*
|
||||
* v0.41: for better performance, we use integer instead of double
|
||||
* this integer value divided by 1024 (2^10) gives the factor
|
||||
*/
|
||||
#define PPM_RED_WEIGHT 511 /* .499 */
|
||||
#define PPM_GREEN_WEIGHT 396 /* .387 */
|
||||
#define PPM_BLUE_WEIGHT 117 /* .114 */
|
||||
|
||||
/*
|
||||
feel free to expand this list of usable converting programs
|
||||
Note 1: the last field must be NULL.
|
||||
Note 2: "smaller" extensions must come later: ".pnm.gz" must come
|
||||
before ".pnm".
|
||||
calling external programs is a security risk
|
||||
ToDo: for better security replace gzip by /usr/bin/gzip !
|
||||
*/
|
||||
char *xlist[]={
|
||||
".pnm.gz", "gzip -cd", /* compressed pnm-files, gzip package */
|
||||
".pbm.gz", "gzip -cd",
|
||||
".pgm.gz", "gzip -cd",
|
||||
".ppm.gz", "gzip -cd",
|
||||
".pnm.bz2", "bzip2 -cd",
|
||||
".pbm.bz2", "bzip2 -cd",
|
||||
".pgm.bz2", "bzip2 -cd",
|
||||
".ppm.bz2", "bzip2 -cd",
|
||||
".jpg", "djpeg -gray -pnm", /* JPG/JPEG, jpeg package */
|
||||
".jpeg", "djpeg -gray -pnm",
|
||||
".gif", "giftopnm -image=all", /* GIF, netpbm package */
|
||||
".bmp", "bmptoppm",
|
||||
".tiff", "tifftopnm",
|
||||
".png", "pngtopnm", /* Portable Network Graphics (PNG) format */
|
||||
".ps", "pstopnm -stdout -portrait -pgm", /* postscript */
|
||||
".eps", "pstopnm -stdout -portrait -pgm", /* encapsulated postscript */
|
||||
/* gs -sDEVICE=pgmraw -sOutputFile=- -g609x235 -r141x141 -q -dNOPAUSE */
|
||||
".fig", "fig2dev -L ppm -m 3", /* xfig files, transfig package */
|
||||
NULL
|
||||
};
|
||||
|
||||
/* return a pointer to command converting file to pnm or NULL */
|
||||
char *testsuffix(char *name){
|
||||
int i; char *rr;
|
||||
|
||||
for(i = 0; xlist[i] != NULL; i += 2 ) {
|
||||
if((rr=strstr(name, xlist[i])) != NULL)
|
||||
if(strlen(rr)==strlen(xlist[i])) /* handle *.eps.pbm correct */
|
||||
return xlist[i+1];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
char read_char(FILE *f1){ // filter #-comments
|
||||
char c;
|
||||
int m;
|
||||
for(m=0;;){
|
||||
c=fgetc(f1);
|
||||
if( feof(f1) ) E0("read feof");
|
||||
if( ferror(f1) ) F0("read ferror");
|
||||
if( c == '#' ) { m = 1; continue; }
|
||||
if( m == 0 ) return c;
|
||||
if( c == '\n' ) m = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* read char from buffer
|
||||
* buf: pointer to buffer
|
||||
* pos: pointer to current pos in buffer
|
||||
* size: size of buffer
|
||||
*
|
||||
*/
|
||||
int fgetc2(char* buf, long* pos, long size)
|
||||
{
|
||||
if(*pos>size)
|
||||
return EOF;
|
||||
return buf[(*pos)++];
|
||||
}
|
||||
|
||||
/*
|
||||
* get end of buffer
|
||||
* pos: current pos in buffer
|
||||
* size: size of buffer
|
||||
*
|
||||
*/
|
||||
int feof2(long pos, long size)
|
||||
{
|
||||
return (pos > size);
|
||||
}
|
||||
|
||||
|
||||
char read_char2(char *buf, long* pos, long size){ // filter #-comments
|
||||
char c;
|
||||
int m;
|
||||
for(m=0;;){
|
||||
c=fgetc2(buf, pos, size);
|
||||
if( feof2(*pos, size) ) E0("read feof");
|
||||
if( c == '#' ) { m = 1; continue; }
|
||||
if( m == 0 ) return c;
|
||||
if( c == '\n' ) m = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
for simplicity only PAM of netpbm is used, the older formats
|
||||
PBM, PGM and PPM can be handled implicitly by PAM routines (js05)
|
||||
v0.43: return 1 if multiple file (hold it open), 0 otherwise
|
||||
*/
|
||||
#ifdef HAVE_PAM_H
|
||||
int readpgm(char *name, pix * p, int vvv) {
|
||||
static FILE *fp=NULL;
|
||||
static char *pip;
|
||||
char magic1, magic2;
|
||||
int i, j, sample, minv = 0, maxv = 0, eofP=0;
|
||||
struct pam inpam;
|
||||
tuple *tuplerow;
|
||||
|
||||
assert(p);
|
||||
|
||||
if (!fp) { // fp!=0 for multi-pnm and idx>0
|
||||
/* open file; test if conversion is needed. */
|
||||
if (name[0] == '-' && name[1] == '\0') {
|
||||
fp = stdin;
|
||||
SET_BINARY (fileno(fp)); // Windows needs it for correct work
|
||||
}
|
||||
else {
|
||||
pip = testsuffix(name);
|
||||
if (!pip) {
|
||||
fp = fopen(name, "rb");
|
||||
if (!fp)
|
||||
F1("opening file %s", name);
|
||||
}
|
||||
else {
|
||||
char *buf = (char *)malloc((strlen(pip)+strlen(name)+4));
|
||||
sprintf(buf, "%s \"%s\"", pip, name); /* allow spaces in filename */
|
||||
if (vvv) {
|
||||
fprintf(stderr, "# popen( %s )\n", buf);
|
||||
}
|
||||
#ifdef HAVE_POPEN
|
||||
/* potential security vulnerability, if name contains tricks */
|
||||
/* example: gunzip -c dummy | rm -rf * */
|
||||
/* windows needs "rb" for correct work, linux not, cygwin? */
|
||||
/* ToDo: do you have better code to go arround this? */
|
||||
#if defined(__WIN32) || defined(__WIN32__) || defined(__WIN64) || defined(__WIN64__)
|
||||
fp = popen(buf, "rb"); /* ToDo: may fail, please report */
|
||||
if (!fp) fp = popen(buf, "r"); /* 2nd try, the gnu way */
|
||||
#else
|
||||
fp = popen(buf, "r");
|
||||
#endif
|
||||
#else
|
||||
F0("sorry, compile with HAVE_POPEN to use pipes");
|
||||
#endif
|
||||
if (!fp)
|
||||
F1("opening pipe %s", buf);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* netpbm 0.10.36 tries to write a comment to nonzero char** comment_p */
|
||||
/* patch by C.P.Schmidt 21Nov06 */
|
||||
memset (&inpam, 0, sizeof(inpam));
|
||||
|
||||
/* read pgm-header */
|
||||
/* struct pam may change between netpbm-versions, causing problems? */
|
||||
#ifdef PAM_STRUCT_SIZE /* ok for netpbm-10.35 */
|
||||
/* new-and-better? but PAM_STRUCT_SIZE is not defined in netpbm-10.18 */
|
||||
pnm_readpaminit(fp, &inpam, PAM_STRUCT_SIZE(tuple_type));
|
||||
#else /* ok for netpbm-10.18 old-and-bad for new netpbms */
|
||||
pnm_readpaminit(fp, &inpam, sizeof(inpam));
|
||||
#endif
|
||||
|
||||
p->x = inpam.width;
|
||||
p->y = inpam.height;
|
||||
magic1=(inpam.format >> 8) & 255; /* 'P' for PNM,PAM */
|
||||
magic2=(inpam.format ) & 255; /* '7' for PAM */
|
||||
minv=inpam.maxval;
|
||||
if (vvv) {
|
||||
fprintf(stderr, "# readpam: format=0x%04x=%c%c h*w(d*b)=%d*%d(%d*%d)\n",
|
||||
inpam.format, /* magic1*256+magic2 */
|
||||
((magic1>31 && magic1<127)?magic1:'.'),
|
||||
((magic2>31 && magic2<127)?magic2:'.'),
|
||||
inpam.height,
|
||||
inpam.width,
|
||||
inpam.depth,
|
||||
inpam.bytes_per_sample);
|
||||
}
|
||||
if ( (1.*(p->x*p->y))!=((1.*p->x)*p->y) )
|
||||
F0("Error integer overflow");
|
||||
if ( !(p->p = (unsigned char *)malloc(p->x*p->y)) )
|
||||
F1("Error at malloc: p->p: %d bytes", p->x*p->y);
|
||||
tuplerow = pnm_allocpamrow(&inpam);
|
||||
for ( i=0; i < inpam.height; i++ ) {
|
||||
pnm_readpamrow(&inpam, tuplerow); /* exit on error */
|
||||
for ( j = 0; j < inpam.width; j++ ) {
|
||||
if (inpam.depth>=3)
|
||||
/* tuplerow is unsigned long (see pam.h sample) */
|
||||
/* we expect 8bit or 16bit integers,
|
||||
no overflow up to 32-10-2=20 bits */
|
||||
sample
|
||||
= ((PPM_RED_WEIGHT * tuplerow[j][0] + 511)>>10)
|
||||
+ ((PPM_GREEN_WEIGHT * tuplerow[j][1] + 511)>>10)
|
||||
+ ((PPM_BLUE_WEIGHT * tuplerow[j][2] + 511)>>10);
|
||||
else
|
||||
sample = tuplerow[j][0];
|
||||
sample = 255 * sample / inpam.maxval; /* normalize to 8 bit */
|
||||
p->p[i*inpam.width+j] = sample;
|
||||
if (maxv<sample) maxv=sample;
|
||||
if (minv>sample) minv=sample;
|
||||
}
|
||||
}
|
||||
pnm_freepamrow(tuplerow);
|
||||
pnm_nextimage(fp,&eofP);
|
||||
if (vvv)
|
||||
fprintf(stderr,"# readpam: min=%d max=%d eof=%d\n", minv, maxv, eofP);
|
||||
p->bpp = 1;
|
||||
if (eofP) {
|
||||
if (!pip) fclose(fp);
|
||||
#ifdef HAVE_POPEN
|
||||
else pclose(fp); /* close pipe (v0.43) */
|
||||
#endif
|
||||
fp=NULL; return 0;
|
||||
}
|
||||
return 1; /* multiple image = concatenated pnm */
|
||||
}
|
||||
|
||||
#else
|
||||
/*
|
||||
if PAM not installed, here is the fallback routine,
|
||||
which is not so powerful but needs no dependencies from other libs
|
||||
*/
|
||||
static int fread_num(char *buf, int bps, FILE *f1) {
|
||||
int mode, j2, j3; char c1;
|
||||
for (j2=0;j2<bps;j2++) buf[j2]=0; // initialize value to zero
|
||||
for(mode=0;!feof(f1);){ // mod=0 means skip leading spaces, 1 scan digits
|
||||
c1=read_char(f1);
|
||||
if (isspace(c1)) { if (mode==0) continue; else break; }
|
||||
mode=1; // digits scan mode
|
||||
if( !isdigit(c1) ) F0("unexpected char");
|
||||
for (j3=j2=0;j2<bps;j2++) { // multiply bps*bytes by 10
|
||||
j3 = buf[j2]*10 + j3; // j3 is used as result and carry
|
||||
buf[j2]=j3 & 255; j3>>=8;
|
||||
}
|
||||
buf[0] += c1-'0';
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
if PAM not installed, here is the fallback routine,
|
||||
which is not so powerful but needs no dependencies from other libs
|
||||
*/
|
||||
static int fread_num2(char *buf, int bps, char *buffer, long *pos, long size) {
|
||||
int mode, j2, j3; char c1;
|
||||
for (j2=0;j2<bps;j2++) buf[j2]=0; // initialize value to zero
|
||||
for(mode=0;!feof2(*pos, size);){ // mod=0 means skip leading spaces, 1 scan digits
|
||||
c1=read_char2(buffer, pos, size);
|
||||
if (isspace(c1)) { if (mode==0) continue; else break; }
|
||||
mode=1; // digits scan mode
|
||||
if( !isdigit(c1) ) F0("unexpected char");
|
||||
for (j3=j2=0;j2<bps;j2++) { // multiply bps*bytes by 10
|
||||
j3 = buf[j2]*10 + j3; // j3 is used as result and carry
|
||||
buf[j2]=j3 & 255; j3>>=8;
|
||||
}
|
||||
buf[0] += c1-'0';
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* read image file, used to read the OCR-image and database images,
|
||||
* image file can be PBM/PGM/PPM in RAW or TEXT
|
||||
* name: filename of image (input)
|
||||
* p: pointer where to store the loaded image (input)
|
||||
* vvv: verbose mode (input)
|
||||
* return: 0=ok, 1=further image follows (multiple image), -1 on error
|
||||
* this is the fall back routine if libpnm cant be used
|
||||
*/
|
||||
int readpgm(char *name, pix *p, int vvv){
|
||||
static char c1, c2; /* magic bytes, file type */
|
||||
static char *pip; // static to survive multiple calls
|
||||
int nx,ny,nc,mod,i,j; // buffer
|
||||
static FILE *f1=NULL; // trigger read new file or multi image file
|
||||
unsigned char *pic;
|
||||
char buf[512];
|
||||
int lx, ly, dx;
|
||||
int bps=1; /* bytes per sample (0..255..65535...) */
|
||||
|
||||
if (!f1) { /* first of multiple image, on MultipleImageFiles c1 was read */
|
||||
pip=NULL;
|
||||
if (name[0]=='-' && name[1]==0) {
|
||||
f1=stdin; /* is this correct ??? */
|
||||
SET_BINARY (fileno(f1)); // Windows needs it for correct work
|
||||
} else {
|
||||
pip=testsuffix(name);
|
||||
if (!pip) {
|
||||
f1=fopen(name,"rb"); if (!f1) F1("opening file %s",name);
|
||||
} else {
|
||||
sprintf(buf,"%s \"%s\"",pip,name); /* ToDo: how to prevent OVL ? */
|
||||
if (vvv) { fprintf(stderr,"# popen( %s )\n",buf); }
|
||||
#ifdef HAVE_POPEN
|
||||
#if defined(__WIN32) || defined(__WIN32__) || defined(__WIN64) || defined(__WIN64__)
|
||||
f1 = popen(buf, "rb"); /* ToDo: may fail, please report */
|
||||
if (!f1) f1 = popen(buf, "r"); /* 2nd try, the gnu way */
|
||||
#else
|
||||
f1=popen(buf,"r");
|
||||
#endif
|
||||
#else
|
||||
F0("only PNM files supported (compiled without HAVE_POPEN)");
|
||||
#endif
|
||||
if (!f1) F1("opening pipe %s",buf);
|
||||
}
|
||||
}
|
||||
c1=fgetc(f1); if (feof(f1)) { E0("unexpected EOF"); return -1; }
|
||||
}
|
||||
c2=fgetc(f1); if (feof(f1)) { E0("unexpected EOF"); return -1; }
|
||||
// check the first two bytes of the PNM file
|
||||
// PBM PGM PPM
|
||||
// TXT P1 P2 P3
|
||||
// RAW P4 P5 P6
|
||||
if (c1!='P' || c2 <'1' || c2 >'6') {
|
||||
fprintf(stderr,"\nread-PNM-error: file number is %2d,"
|
||||
" position %ld", fileno(f1), ftell(f1));
|
||||
fprintf(stderr,"\nread-PNM-error: bad magic bytes, expect 0x50 0x3[1-6]"
|
||||
" but got 0x%02x 0x%02x", 255&c1, 255&c2);
|
||||
if (f1) fclose(f1); f1=NULL; return(-1);
|
||||
}
|
||||
nx=ny=nc=0; if (c2=='4' || c2=='1') nc=1;
|
||||
for(mod=0;((c2=='5' || c2=='2') && (mod&7)<6)
|
||||
|| ((c2=='6' || c2=='3') && (mod&7)<6)
|
||||
|| ((c2=='4' || c2=='1') && (mod&7)<4);)
|
||||
{ // mode: 0,2,4=[ |\t|\r|\n]
|
||||
// 1=nx 3=ny 5=nc 8-13=#rem
|
||||
c1=read_char(f1); // former: # mod|=8
|
||||
if( (mod & 1)==0 ) // whitespaces
|
||||
if( !isspace(c1) ) mod++;
|
||||
if( (mod & 1)==1 ) {
|
||||
if( !isdigit(c1) ) {
|
||||
if( !isspace(c1) )F0("unexpected character");
|
||||
mod++; }
|
||||
else if(mod==1) nx=nx*10+c1-'0';
|
||||
else if(mod==3) ny=ny*10+c1-'0';
|
||||
else if(mod==5) nc=nc*10+c1-'0';
|
||||
}
|
||||
}
|
||||
if(vvv)
|
||||
fprintf(stderr,"# PNM P%c h*w=%d*%d c=%d head=%ld",c2,ny,nx,nc,ftell(f1));
|
||||
if( c2=='4' && (nx&7)!=0 ){
|
||||
/* nx=(nx+7)&~7;*/ if(vvv)fprintf(stderr," PBM2PGM nx %d",(nx+7)&~7);
|
||||
}
|
||||
if (nc>> 8) bps=2; // bytes per color and pixel
|
||||
if (nc>>16) bps=3;
|
||||
if (nc>>24) bps=4;
|
||||
fflush(stdout);
|
||||
if ( (1.*(nx*ny))!=((1.*nx)*ny) )
|
||||
F0("Error integer overflow");
|
||||
pic=(unsigned char *)malloc( nx*ny );
|
||||
if(pic==NULL)F0("memory failed"); // no memory
|
||||
for (i=0;i<nx*ny;i++) pic[i]=255; // init to white if reading fails
|
||||
/* this is a slow but short routine for P1 to P6 formats */
|
||||
if( c2=='5' || c2=='2' ) /* slow PGM-RAW/ASC read pixelwise */
|
||||
for (i=0;i<nx*ny;i++) {
|
||||
if (c2=='5') { if(bps!=(int)fread(buf,1,bps,f1)) {
|
||||
fprintf(stderr," ERROR reading at head+%d*%d\n", bps, i); break; } }
|
||||
else for (j=0;j<3;j++) fread_num(buf+j*bps, bps, f1);
|
||||
pic[i]=buf[bps-1]; /* store the most significant byte */
|
||||
}
|
||||
// we want to normalize brightness to 0..255
|
||||
if (c2=='6' || c2=='3') { // PPM-RAW/ASC
|
||||
for (i=0;i<nx*ny;i++) {
|
||||
if (c2=='6') { if (3*bps!=(int)fread(buf,1,3*bps,f1)){
|
||||
fprintf(stderr," ERROR reading at head+3*%d*%d\n", bps, i); break; } }
|
||||
else for (j=0;j<3;j++) fread_num(buf+j*bps, bps, f1);
|
||||
pic[i]
|
||||
= ((PPM_RED_WEIGHT * (unsigned char)buf[ bps-1] + 511)>>10)
|
||||
+ ((PPM_GREEN_WEIGHT * (unsigned char)buf[2*bps-1] + 511)>>10)
|
||||
+ ((PPM_BLUE_WEIGHT * (unsigned char)buf[3*bps-1] + 511)>>10);
|
||||
/* normalized to 0..255 */
|
||||
}
|
||||
}
|
||||
if( c2=='1' )
|
||||
for(mod=j=i=0,nc=255;i<nx*ny && !feof(f1);){ // PBM-ASCII 0001100
|
||||
c1=read_char(f1);
|
||||
if( isdigit(c1) ) { pic[i]=((c1=='0')?255:0); i++; }
|
||||
else if( !isspace(c1) )F0("unexpected char");
|
||||
}
|
||||
if( c2=='4' ){ // PBM-RAW
|
||||
dx=(nx+7)&~7; // dx (mod 8)
|
||||
if(ny!=(int)fread(pic,dx>>3,ny,f1))F0("read"); // read all bytes
|
||||
for(ly=ny-1;ly>=0;ly--)
|
||||
for(lx=nx-1;lx>=0;lx--)
|
||||
pic[lx+ly*nx]=( (128 & (pic[(lx+ly*dx)>>3]<<(lx & 7))) ? 0 : 255 );
|
||||
nc=255;
|
||||
}
|
||||
{
|
||||
int minc=255, maxc=0;
|
||||
for (i=0;i<nx*ny;i++) {
|
||||
if (pic[i]>maxc) maxc=pic[i];
|
||||
if (pic[i]<minc) minc=pic[i];
|
||||
}
|
||||
if (vvv) fprintf(stderr," min=%d max=%d", minc, maxc);
|
||||
}
|
||||
p->p=pic; p->x=nx; p->y=ny; p->bpp=1;
|
||||
if (vvv) fprintf(stderr,"\n");
|
||||
c1=0; c1=fgetc(f1); /* needed to trigger feof() */
|
||||
if (feof(f1) || c1!='P') { /* EOF ^Z or not 'P' -> single image */
|
||||
if (vvv) fprintf(stderr,"# PNM EOF\n");
|
||||
if(name[0]!='-' || name[1]!=0){ /* do not close stdin */
|
||||
if(!pip) fclose(f1);
|
||||
#ifdef HAVE_POPEN
|
||||
else pclose(f1); /* close pipe (Jul00) */
|
||||
#endif
|
||||
}
|
||||
f1=NULL; /* set file is closed flag */
|
||||
return 0;
|
||||
}
|
||||
return 1; /* multiple image = concatenated pnm's */
|
||||
}
|
||||
|
||||
#endif /* HAVE_PAM_H */
|
||||
|
||||
/*
|
||||
* read path of buffer
|
||||
* buf: pointer to out buffer
|
||||
* size: size objects to read
|
||||
* count: count objects to read
|
||||
* buf: pointer to in buffer
|
||||
* pos: pointer to current pos in buffer
|
||||
* sizeb : size of buffer
|
||||
*
|
||||
*/
|
||||
size_t fread2(void *bufOut, size_t size, size_t count, char* bufIn, long* pos, long sizeb)
|
||||
{
|
||||
long countTmp = (long)count;
|
||||
char* p = bufOut;
|
||||
|
||||
while (0 != countTmp)
|
||||
{
|
||||
if (*pos + (long)size > sizeb)
|
||||
{
|
||||
memcpy(p, bufIn + *pos, sizeb - *pos);
|
||||
*pos += sizeb - *pos;
|
||||
return count;
|
||||
}
|
||||
|
||||
memcpy(p, bufIn + *pos, size);
|
||||
p += size;
|
||||
*pos += size;
|
||||
|
||||
--countTmp;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* read image file, used to read the OCR-image and database images,
|
||||
* image file can be PBM/PGM/PPM in RAW or TEXT
|
||||
* buffer: pointer to buffer of image (input)
|
||||
* size: size buffer of image (input)
|
||||
* p: pointer where to store the loaded image (input)
|
||||
* return: 0=ok, 1=further image follows (multiple image), -1 on error
|
||||
* this is the fall back routine if libpnm cant be used
|
||||
*/
|
||||
int readpgmFromBuffer(char* buffer, long size, pix *p){
|
||||
static char c1, c2; /* magic bytes, file type */
|
||||
int nx,ny,nc,mod,i,j; // buffer
|
||||
unsigned char *pic;
|
||||
char buf[512];
|
||||
int lx, ly, dx;
|
||||
int bps=1; /* bytes per sample (0..255..65535...) */
|
||||
|
||||
long pos = 0;
|
||||
|
||||
c1=fgetc2(buffer, &pos, size); if (feof2(pos, size)) { E0("unexpected EOF"); return -1; }
|
||||
c2=fgetc2(buffer, &pos, size); if (feof2(pos, size)) { E0("unexpected EOF"); return -1; }
|
||||
// check the first two bytes of the PNM file
|
||||
// PBM PGM PPM
|
||||
// TXT P1 P2 P3
|
||||
// RAW P4 P5 P6
|
||||
if (c1!='P' || c2 <'1' || c2 >'6') {
|
||||
return(-1);
|
||||
}
|
||||
nx=ny=nc=0; if (c2=='4' || c2=='1') nc=1;
|
||||
for(mod=0;((c2=='5' || c2=='2') && (mod&7)<6)
|
||||
|| ((c2=='6' || c2=='3') && (mod&7)<6)
|
||||
|| ((c2=='4' || c2=='1') && (mod&7)<4);)
|
||||
{ // mode: 0,2,4=[ |\t|\r|\n]
|
||||
// 1=nx 3=ny 5=nc 8-13=#rem
|
||||
c1=read_char2(buffer, &pos, size); // former: # mod|=8
|
||||
if( (mod & 1)==0 ) // whitespaces
|
||||
if( !isspace(c1) ) mod++;
|
||||
if( (mod & 1)==1 ) {
|
||||
if( !isdigit(c1) ) {
|
||||
if( !isspace(c1) )F0("unexpected character");
|
||||
mod++; }
|
||||
else if(mod==1) nx=nx*10+c1-'0';
|
||||
else if(mod==3) ny=ny*10+c1-'0';
|
||||
else if(mod==5) nc=nc*10+c1-'0';
|
||||
}
|
||||
}
|
||||
if (nc>> 8) bps=2; // bytes per color and pixel
|
||||
if (nc>>16) bps=3;
|
||||
if (nc>>24) bps=4;
|
||||
fflush(stdout);
|
||||
if ( (1.*(nx*ny))!=((1.*nx)*ny) )
|
||||
F0("Error integer overflow");
|
||||
pic=(unsigned char *)malloc( nx*ny );
|
||||
if(pic==NULL)F0("memory failed"); // no memory
|
||||
for (i=0;i<nx*ny;i++)pic[i]=255; // init to white if reading fails
|
||||
/* this is a slow but short routine for P1 to P6 formats */
|
||||
if( c2=='5' || c2=='2' ) /* slow PGM-RAW/ASC read pixelwise */
|
||||
for (i=0;i<nx*ny;i++) {
|
||||
if (c2=='5') { if(bps!=(int)fread2(buf,1,bps,buffer,&pos,size)) {
|
||||
fprintf(stderr," ERROR reading at head+%d*%d\n", bps, i); break; } }
|
||||
else for (j=0;j<3;j++) fread_num2(buf+j*bps, bps, buffer, &pos, size);
|
||||
pic[i]=buf[bps-1]; /* store the most significant byte */
|
||||
}
|
||||
// we want to normalize brightness to 0..255
|
||||
if (c2=='6' || c2=='3') { // PPM-RAW/ASC
|
||||
for (i=0;i<nx*ny;i++) {
|
||||
if (c2=='6') { if (3*bps!=(int)fread2(buf,1,3*bps,buffer,&pos,size)){
|
||||
fprintf(stderr," ERROR reading at head+3*%d*%d\n", bps, i); break; } }
|
||||
else for (j=0;j<3;j++) fread_num2(buf+j*bps, bps, buffer, &pos, size);
|
||||
pic[i]
|
||||
= ((PPM_RED_WEIGHT * (unsigned char)buf[ bps-1] + 511)>>10)
|
||||
+ ((PPM_GREEN_WEIGHT * (unsigned char)buf[2*bps-1] + 511)>>10)
|
||||
+ ((PPM_BLUE_WEIGHT * (unsigned char)buf[3*bps-1] + 511)>>10);
|
||||
/* normalized to 0..255 */
|
||||
}
|
||||
}
|
||||
if( c2=='1' )
|
||||
for(mod=j=i=0,nc=255;i<nx*ny && !feof2(pos, size);){ // PBM-ASCII 0001100
|
||||
c1=read_char2(buffer, &pos, size);
|
||||
if( isdigit(c1) ) { pic[i]=((c1=='0')?255:0); i++; }
|
||||
else if( !isspace(c1) )F0("unexpected char");
|
||||
}
|
||||
if( c2=='4' ){ // PBM-RAW
|
||||
dx=(nx+7)&~7; // dx (mod 8)
|
||||
if(ny!=(int)fread2(pic,dx>>3,ny,buffer,&pos,size))F0("read"); // read all bytes
|
||||
for(ly=ny-1;ly>=0;ly--)
|
||||
for(lx=nx-1;lx>=0;lx--)
|
||||
pic[lx+ly*nx]=( (128 & (pic[(lx+ly*dx)>>3]<<(lx & 7))) ? 0 : 255 );
|
||||
nc=255;
|
||||
}
|
||||
{
|
||||
int minc=255, maxc=0;
|
||||
for (i=0;i<nx*ny;i++) {
|
||||
if (pic[i]>maxc) maxc=pic[i];
|
||||
if (pic[i]<minc) minc=pic[i];
|
||||
}
|
||||
}
|
||||
p->p=pic; p->x=nx; p->y=ny; p->bpp=1;
|
||||
c1=0; c1=fgetc2(buffer, &pos, size); /* needed to trigger feof() */
|
||||
if (feof2(pos, size) || c1!='P') { /* EOF ^Z or not 'P' -> single image */
|
||||
return 0;
|
||||
}
|
||||
return 1; /* multiple image = concatenated pnm's */
|
||||
}
|
||||
|
||||
int writepgm(char *nam,pix *p){// P5 raw-pgm
|
||||
FILE *f1;int a,x,y;
|
||||
f1=fopen(nam,"wb");if(!f1)F0("open"); // open-error
|
||||
fprintf(f1,"P5\n%d %d\n255\n",p->x,p->y);
|
||||
if(p->bpp==3)
|
||||
for(y=0;y<p->y;y++)
|
||||
for(x=0;x<p->x;x++){ // set bit
|
||||
a=x+y*p->x;
|
||||
p->p[a]=(p->p[3*a+0]+p->p[3*a+1]+p->p[3*a+2])/3;
|
||||
}
|
||||
if(p->y!=(int)fwrite(p->p,p->x,p->y,f1))F0("write"); // write all lines
|
||||
fclose(f1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* adding colours, care about range */
|
||||
void addrgb(unsigned char rgb[3], int sr, int sg, int sb) {
|
||||
int add[3], i;
|
||||
/* add colour on dark pixels, subtract on white pixels */
|
||||
add[0]=2*sr; add[1]=2*sg; add[2]=2*sb;
|
||||
if (((int)rgb[0])+((int)rgb[1])+((int)rgb[2])>=3*160)
|
||||
{ add[0]=(-sg-sb); add[1]=(-sr-sb); add[2]=(-sr-sg); } // rgb/2?
|
||||
/* care about colour range */
|
||||
for (i=0;i<3;i++)
|
||||
if (add[i]<0) rgb[i]-=(( rgb[i]<-add[i])? rgb[i]:-add[i]);
|
||||
else rgb[i]+=((255-rgb[i]< add[i])?255-rgb[i]: add[i]);
|
||||
}
|
||||
/*
|
||||
* pgmtoppm or pnmtopng, use last 3 bits for farbcoding
|
||||
* replaces old writebmp variant
|
||||
*/
|
||||
int writeppm(char *nam, pix *p){ /* P6 raw-ppm */
|
||||
FILE *f1=NULL; int x,y,f1t=0; unsigned char rgb[3], gray, bits;
|
||||
char buf[128];
|
||||
if (strchr(nam,'|')) return -1; /* no nasty code */
|
||||
if (strstr(nam,".ppm")) { f1=fopen(nam,"wb"); }
|
||||
#ifdef HAVE_POPEN
|
||||
/* be sure that nam contains hacker code like "dummy | rm -rf *" */
|
||||
if (!f1) {
|
||||
strncpy(buf,"pnmtopng > ",12); /* no spaces within filenames allowed! */
|
||||
strncpy(buf+11,nam,111); buf[123]=0;
|
||||
strncpy(buf+strlen(buf),".png",5);
|
||||
/* we dont care about win "wb" here, never debug on win systems */
|
||||
f1 = popen(buf, "w"); if(f1) f1t=1; else E0("popen pnmtopng");
|
||||
}
|
||||
if (!f1) {
|
||||
strncpy(buf,"gzip -c > ",11);
|
||||
strncpy(buf+10,nam,109); buf[120]=0;
|
||||
strncpy(buf+strlen(buf),".ppm.gz",8);
|
||||
/* we dont care about win "wb" here, never debug on win systems */
|
||||
f1 = popen(buf, "w"); if(f1) f1t=1; else E0("popen gzip -c");
|
||||
}
|
||||
#endif
|
||||
if (!f1) {
|
||||
strncpy(buf,nam,113); buf[114]=0;
|
||||
strncpy(buf+strlen(buf),".ppm",5);
|
||||
f1=fopen(buf,"wb");
|
||||
}
|
||||
if (!f1) F0("open"); /* open-error */
|
||||
fprintf(f1,"P6\n%d %d\n255\n",p->x,p->y);
|
||||
if ( p->bpp==1 )
|
||||
for (y=0;y<p->y;y++)
|
||||
for (x=0;x<p->x;x++){
|
||||
gray=p->p[x+y*p->x];
|
||||
bits=(gray&0x0F); /* save marker bits */
|
||||
/* replace used bits to get max. contrast, 160=0xA0 */
|
||||
gray = ((gray<160) ? (gray&~0x0F)>>1 : 0xC3|(gray>>1) );
|
||||
rgb[0] = rgb[1] = rgb[2] = gray;
|
||||
if ((bits & 1)==1) { addrgb(rgb,0,0,8+8*((x+y)&1)); } /* dark blue */
|
||||
if ((bits & 8)==8) { addrgb(rgb,0,0, 16); } /* blue (low priority) */
|
||||
if ((bits & 6)==6) { addrgb(rgb,0,0, 32); } /* blue */
|
||||
if ((bits & 6)==4) { addrgb(rgb,0,48,0); } /* green */
|
||||
if ((bits & 6)==2) { addrgb(rgb,32,0,0); } /* red */
|
||||
if ( 1!=(int)fwrite(rgb,3,1,f1) ) { E0("write"); y=p->y; break; }
|
||||
}
|
||||
if ( p->bpp==3 )
|
||||
if ( p->y!=(int)fwrite(p->p,3*p->x,p->y,f1) ) E0("write");
|
||||
#ifdef HAVE_POPEN
|
||||
if (f1t) { pclose (f1); f1=NULL; }
|
||||
#endif
|
||||
if (f1) fclose(f1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// high bit = first,
|
||||
int writepbm(char *nam,pix *p){// P4 raw-pbm
|
||||
FILE *f1;int x,y,a,b,dx,i;
|
||||
dx=(p->x+7)&~7; // enlarge to a factor of 8
|
||||
for(y=0;y<p->y;y++)
|
||||
for(x=0;x<p->x;x++){ // set bit
|
||||
a=(x+y*dx)>>3;b=7-(x&7); // adress an bitisnumber
|
||||
i=x+y*p->x;
|
||||
if(p->bpp==3) i=(p->p[3*i+0]+p->p[3*i+1]+p->p[3*i+2])/3;
|
||||
else i= p->p[ i ];
|
||||
i=((i>127)?0:1);
|
||||
p->p[a]=(p->p[a] & (~1<<b)) | (i<<b);
|
||||
}
|
||||
f1=fopen(nam,"wb");if(!f1)F0("open"); // open-error
|
||||
fprintf(f1,"P4\n%d %d\n",p->x,p->y);
|
||||
if(p->y!=(int)fwrite(p->p,dx>>3,p->y,f1))F0("write"); // write all lines
|
||||
fclose(f1);
|
||||
return 0;
|
||||
}
|
||||
// ------------------------------------------------------------------------
|
||||
87
ActiveX/ASCOfficeUtils/GOCR/src/progress.c
Normal file
87
ActiveX/ASCOfficeUtils/GOCR/src/progress.c
Normal file
@@ -0,0 +1,87 @@
|
||||
/* ---------------------------- progress output ---------------------- */
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "progress.h"
|
||||
|
||||
FILE *fp=NULL; /* output stream for progress info */
|
||||
time_t printinterval = 10; /* approx. seconds between printouts, 1.. */
|
||||
|
||||
/* initialization of progress output, fname="<fileID>","<filename>","-" */
|
||||
int ini_progress(char *fname){
|
||||
int fd;
|
||||
if (fp) { fclose(fp); fp=NULL; }
|
||||
if (fname) if (fname[0]) {
|
||||
fd=atoi(fname);
|
||||
if(fd>255 || fname[((fd>99)?3:((fd>9)?2:1))]) fd=-1; /* be sure */
|
||||
if (fname[0]=='-' && fname[1]==0) { fp=stdout; }
|
||||
#ifdef __USE_POSIX
|
||||
else if (fd>0) { fp=fdopen(fd,"w"); } /* not sure that "w" is ok ???? */
|
||||
#endif
|
||||
else { fp=fopen(fname,"w");if(!fp)fp=fopen(fname,"a"); }
|
||||
if (!fp) {
|
||||
fprintf(stderr,"could not open %s for progress output\n",fname);
|
||||
return -1; /* no success */
|
||||
}
|
||||
}
|
||||
/* fprintf(stderr,"# progress: fd=%d\n",fileno(fp)); */
|
||||
return 0; /* no error */
|
||||
}
|
||||
|
||||
progress_counter_t *open_progress(int maxcount, const char *name){
|
||||
progress_counter_t *pc;
|
||||
pc = (progress_counter_t*) malloc( sizeof(progress_counter_t) );
|
||||
if (!pc) return 0; /* nonfatal */
|
||||
pc->starttime = time(NULL);
|
||||
pc->maxcount = maxcount;
|
||||
pc->numskip = 0;
|
||||
pc->lastprintcount = -1;
|
||||
pc->name = name;
|
||||
pc->lastprinttime = pc->starttime;
|
||||
return pc;
|
||||
}
|
||||
/* free counter */
|
||||
int close_progress(progress_counter_t *counter){
|
||||
if (counter) free(counter);
|
||||
return 0;
|
||||
}
|
||||
/* progress meter output
|
||||
* only 1output/10s, + estimated endtime (test on pixelfields)
|
||||
* ToDo: to stderr by default? remove subprogress, ini_progress? rm_progress?
|
||||
* test on tcl
|
||||
*/
|
||||
int progress(int counter, progress_counter_t *pc){
|
||||
/* we try to save computing time, so we skip early */
|
||||
if ((!fp) || counter - pc->lastprintcount <= pc->numskip) return 0;
|
||||
{
|
||||
char cr='\n';
|
||||
time_t now = time(NULL);
|
||||
#if 0 /* debugging */
|
||||
if (counter)
|
||||
fprintf(fp," progress %s %3d / %d time %d skip %d\n",
|
||||
pc->name,counter,pc->maxcount,(int)(now - pc->starttime),
|
||||
pc->numskip); fflush(fp);
|
||||
#endif
|
||||
if (5*(now - pc->lastprinttime) < 2*printinterval
|
||||
&& counter - pc->lastprintcount >= pc->numskip) { /* save for tests */
|
||||
if (pc->numskip < 1024) pc->numskip += pc->numskip+1;
|
||||
}
|
||||
if (3*(now - pc->lastprinttime) < 2*printinterval ) {
|
||||
return 0; /* to early for printing */
|
||||
}
|
||||
if (2*(now - pc->lastprinttime) > 3*printinterval ) {
|
||||
pc->numskip >>= 1; /* to late for printing */
|
||||
}
|
||||
if (fileno(fp)<3) cr='\r'; /* may be choosen in ini? */
|
||||
if (counter)
|
||||
fprintf(fp," progress %s %5d / %d time[s] %5d / %5d (skip=%d)%c",
|
||||
pc->name,counter,pc->maxcount,
|
||||
(int)(now - pc->starttime), /* time gone since start */
|
||||
(int)(now - pc->starttime)*pc->maxcount/(counter), /* estimated */
|
||||
pc->numskip, cr);
|
||||
fflush(fp);
|
||||
pc->lastprintcount=counter;
|
||||
pc->lastprinttime=now;
|
||||
}
|
||||
return 0; /* no error */
|
||||
}
|
||||
/* --------------------- end of progress output ---------------------- */
|
||||
703
ActiveX/ASCOfficeUtils/GOCR/src/remove.c
Normal file
703
ActiveX/ASCOfficeUtils/GOCR/src/remove.c
Normal file
@@ -0,0 +1,703 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 2000-2009 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "pgm2asc.h"
|
||||
#include "gocr.h"
|
||||
#include "progress.h"
|
||||
|
||||
/* measure mean thickness as an criteria for big chars */
|
||||
int mean_thickness( struct box *box2 ){
|
||||
int mt=0, i, y, dx=box2->x1-box2->x0+1, dy;
|
||||
for (y=box2->y0+1; y<box2->y1; y++) {
|
||||
i=loop(box2->p,box2->x0+0,y,dx,JOB->cfg.cs,0,RI);
|
||||
i=loop(box2->p,box2->x0+i,y,dx,JOB->cfg.cs,1,RI);
|
||||
mt+=i;
|
||||
}
|
||||
dy = box2->y1 - box2->y0 - 1;
|
||||
if (dy) mt=(mt+dy/2)/dy;
|
||||
return mt;
|
||||
}
|
||||
|
||||
/* ---- remove dust ---------------------------------
|
||||
What is dust? I think, this is a very small pixel cluster without
|
||||
neighbours. Of course not all dust clusters can be detected correct.
|
||||
This feature should be possible to switch off via option.
|
||||
-> may be, all clusters should be stored here?
|
||||
speed is very slow, I know, but I am happy that it is working well
|
||||
*/
|
||||
int remove_dust( job_t *job ){
|
||||
/* new dust removing */
|
||||
/* FIXME jb:remove pp */
|
||||
pix *pp = &job->src.p;
|
||||
int i1,i,j,x,y,x0,x1,y0,y1,nC,sX,sY,sP, cs,vvv=job->cfg.verbose;
|
||||
struct box *box2;
|
||||
#define HISTSIZE 220 /* histogramm size */
|
||||
int histo[HISTSIZE];
|
||||
cs=job->cfg.cs; sP=sX=sY=nC=0;
|
||||
/*
|
||||
* count number of black pixels within a box and store it in .dots
|
||||
* later .dots is re-used for number of objects belonging to the character
|
||||
* should be done in the flood-fill algorithm
|
||||
* volume of white pixels is estimated to big here (left/right rot)
|
||||
* ToDo: mean thickness of char lines?
|
||||
* or interval nesting (minP..maxP) to remove outriders
|
||||
*/
|
||||
j=0;
|
||||
for (i1=0;i1<HISTSIZE;i1++) histo[i1]=0;
|
||||
/* mean value over every black object which is big enough */
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if (!box2->num_frames) continue;
|
||||
if (box2->frame_vol[0]<0) continue; /* don't count inner holes */
|
||||
j = abs(box2->frame_vol[0]);
|
||||
if ((box2->y1-box2->y0+1)>3) {
|
||||
nC++; /* only count potential chars v0.42 */
|
||||
sX+=box2->x1 - box2->x0 + 1;
|
||||
sY+=box2->y1 - box2->y0 + 1;
|
||||
sP+=j;
|
||||
}
|
||||
if (j<HISTSIZE) histo[j]++;
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
|
||||
if (job->cfg.dust_size < 0 && nC > 0) { /* auto detection */
|
||||
/* this formula is empirically, high resolution scans have bigger dust */
|
||||
/* maximum allowed dustsize (min=4*7 ca. 32)
|
||||
* does not work for background pattern!
|
||||
*/
|
||||
job->cfg.dust_size = ( ( sX/nC ) * ( sY/nC ) + 16) / 32;
|
||||
if (vvv) fprintf(stderr, "# remove.c remove_dust(): ");
|
||||
if (vvv) fprintf(stderr, "\n# dust size detection, vol num"
|
||||
" #obj=%d maxDust=%d mpixel= %3d mxy= %2d %2d",
|
||||
nC, job->cfg.dust_size, sP/nC, sX/nC, sY/nC);
|
||||
/* we assume that for random dust applies histo[i+1]<histo[i] */
|
||||
for (i=1;i+3<HISTSIZE;i++){
|
||||
if (vvv) fprintf(stderr,"\n# dust size histogram %3d %5d",i,histo[i]);
|
||||
if (histo[i]>=nC) continue; /* v0.42 lot of pixels -> bg pattern < 3 */
|
||||
if (i>=job->cfg.dust_size) break; /* maximum = mean size / 32 */
|
||||
if (histo[i/*+1*/]==0) break; /* bad statistic */
|
||||
if ((histo[i+2]+histo[i+3])
|
||||
>=(histo[i] +histo[i+1])) break; /* no noise, but to late? */
|
||||
if ( histo[i-1] > 1024*histo[i] &&
|
||||
2*histo[i+1] >=histo[i]) break; /* bg pattern */
|
||||
}
|
||||
if (vvv) fprintf(stderr," break");
|
||||
if (vvv) for (i1=0,j=i+1;j<HISTSIZE;j++) {
|
||||
/* compressed, output only if something is changing */
|
||||
if (j==HISTSIZE-1 || histo[j]!=histo[j-1] || histo[j]!=histo[j+1]) {
|
||||
fprintf(stderr,"\n# dust size histogram %3d %5d",j,histo[j]);
|
||||
if (++i1>20) break; /* dont do excessive output */
|
||||
}
|
||||
}
|
||||
job->cfg.dust_size=i-1;
|
||||
/* what is the statistic of random dust?
|
||||
* if we have p pixels on a x*y image we should have
|
||||
* (p/(x*y))^1 * (x*y) = p singlets
|
||||
* (p/(x*y))^2 * (x*y) = p^2/(x*y) doublets and
|
||||
* (p/(x*y))^3 * (x*y) = p^3/(x*y)^2 triplets
|
||||
*/
|
||||
if (vvv) fprintf(stderr,"\n# auto dust size = %d nC= %3d .. %3d"
|
||||
" avD= %2d %2d .. %2d %2d\n",
|
||||
job->cfg.dust_size, nC, job->res.numC,
|
||||
(job->res.sumX+job->res.numC/2)/job->res.numC,
|
||||
(job->res.sumY+job->res.numC/2)/job->res.numC, sX/nC, sY/nC);
|
||||
}
|
||||
if (job->cfg.dust_size)
|
||||
{ i=0;
|
||||
if(vvv){
|
||||
fprintf(stderr,"# remove dust of size %2d",job->cfg.dust_size);
|
||||
/* Warning: better use (1/(x*y))^2 as 1/((x*y)^2),
|
||||
* because (x*y)^2 may overflow */
|
||||
fprintf(stderr," histo=%d,%d(?=%d),%d(?=%d),...\n# ...",
|
||||
histo[1],histo[2],histo[1]*histo[1]/(pp->x*pp->y),
|
||||
histo[3], histo[1]*histo[1]/(pp->x*pp->y)
|
||||
*histo[1]/(pp->x*pp->y));
|
||||
}
|
||||
i = 0;
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
x0=box2->x0;x1=box2->x1;y0=box2->y0;y1=box2->y1; /* box */
|
||||
j=abs(box2->frame_vol[0]);
|
||||
if(j<=job->cfg.dust_size) /* remove this tiny object */
|
||||
{ /* here we should distinguish dust and i-dots,
|
||||
* may be we should sort out dots to a seperate dot list and
|
||||
* after line detection decide, which is dust and which not
|
||||
* dust should be removed to make recognition easier (ToDo)
|
||||
*/
|
||||
#if 0
|
||||
if(get_bw((3*x0+x1)/4,(x0+3*x1)/4,y1+y1-y0+1,y1+8*(y1-y0+1),pp,cs,1))
|
||||
continue; /* this idea was to simple, see kscan003.jpg sample */
|
||||
#endif
|
||||
/* remove from average */
|
||||
job->res.numC--;
|
||||
job->res.sumX-=x1-x0+1;
|
||||
job->res.sumY-=y1-y0+1;
|
||||
/* remove pixels (should only be done with dust) */
|
||||
for(x=x0;x<=x1;x++)
|
||||
for(y=y0;y<=y1;y++){ put(pp,x,y,0,255&~7); }
|
||||
/* remove from list */
|
||||
list_del(&(job->res.boxlist),box2);
|
||||
/* free memory */
|
||||
free_box(box2);
|
||||
i++; /* count as dust particle */
|
||||
continue;
|
||||
}
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if(vvv)fprintf(stderr," %3d cluster removed, nC= %3d\n",i,job->res.numC);
|
||||
}
|
||||
/* reset dots to 0 and remove white pixels (new) */
|
||||
i=0;
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = ((struct box *)list_get_current(&(job->res.boxlist)));
|
||||
if (box2->frame_vol[0]<0) continue; /* for black areas only */
|
||||
x0=box2->x0;x1=box2->x1;y0=box2->y0;y1=box2->y1; /* box */
|
||||
if (x1-x0>16 && y1-y0>30) /* only on large enough chars */
|
||||
for(x=x0+1;x<=x1-1;x++)
|
||||
for(y=y0+1;y<=y1-1;y++){
|
||||
if( pixel_atp(pp,x ,y )>=cs
|
||||
&& pixel_atp(pp,x-1,y ) <cs
|
||||
&& pixel_atp(pp,x+1,y ) <cs
|
||||
&& pixel_atp(pp,x ,y-1) <cs
|
||||
&& pixel_atp(pp,x ,y+1) <cs ) /* remove it */
|
||||
{
|
||||
put(pp,x,y,0,0); i++; /* (x and 0) or 0 */
|
||||
}
|
||||
}
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (vvv) fprintf(stderr,"# ... %3d white pixels removed, cs=%d nC= %3d\n",
|
||||
i,cs,job->res.numC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---- smooth big chars ---------------------------------
|
||||
* Big chars often do not have smooth borders, which let fail
|
||||
* the engine. Here we smooth the borders of big chars (>7x16).
|
||||
* Smoothing is important for b/w scans, where we often have
|
||||
* comb like pattern on a vertikal border. I also received
|
||||
* samples with lot of white pixels (sample: 04/02/25).
|
||||
* ToDo: obsolete if vector code is complete
|
||||
*/
|
||||
int smooth_borders( job_t *job ){
|
||||
pix *pp = &job->src.p;
|
||||
int ii=0,x,y,x0,x1,y0,y1,dx,dy,cs,i0,i1,i2,i3,i4,n1,n2,
|
||||
cn[8],cm,vvv=job->cfg.verbose; /* dust found */
|
||||
struct box *box2;
|
||||
cs=job->cfg.cs; n1=n2=0;
|
||||
if(vvv){ fprintf(stderr,"# smooth big chars 7x16 cs=%d",cs); }
|
||||
/* filter for each big box */
|
||||
for_each_data(&(job->res.boxlist)) { n2++; /* count boxes */
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
/* do not touch small characters! but how we define small characters? */
|
||||
if (box2->x1-box2->x0+1<7 || box2->y1-box2->y0+1<16 ) continue;
|
||||
if (box2->c==PICTURE) continue;
|
||||
if (mean_thickness(box2)<3) continue;
|
||||
n1++; /* count boxes matching big-char criteria */
|
||||
x0=box2->x0; y0=box2->y0;
|
||||
x1=box2->x1; y1=box2->y1;
|
||||
dx=x1-x0+1; dy=y1-y0-1;
|
||||
/* out_x(box2);
|
||||
* dont change to much! only change if absolutely sure!
|
||||
* ....... 1 2 3
|
||||
* ex: .?##### 0 * 4
|
||||
* ....... 7 6 5
|
||||
* we should also avoid removing lines by sytematic remove
|
||||
* from left end to the right, so we concern also about distance>1
|
||||
*/
|
||||
for(x=box2->x0;x<=box2->x1;x++)
|
||||
for(y=box2->y0;y<=box2->y1;y++){ /* filter out high frequencies */
|
||||
/* this is a very primitive solution, only for learning */
|
||||
cn[0]=getpixel(pp,x-1,y);
|
||||
cn[4]=getpixel(pp,x+1,y); /* horizontal */
|
||||
cn[2]=getpixel(pp,x,y-1);
|
||||
cn[6]=getpixel(pp,x,y+1); /* vertical */
|
||||
cn[1]=getpixel(pp,x-1,y-1);
|
||||
cn[3]=getpixel(pp,x+1,y-1); /* diagonal */
|
||||
cn[7]=getpixel(pp,x-1,y+1);
|
||||
cn[5]=getpixel(pp,x+1,y+1);
|
||||
cm=getpixel(pp,x,y);
|
||||
/* check for 5 other and 3 same surrounding pixels */
|
||||
for (i0=0;i0<8;i0++)
|
||||
if ((cn[i0 ]<cs)==(cm<cs)
|
||||
&& (cn[(i0+7) & 7]<cs)!=(cm<cs)) break; /* first same */
|
||||
for (i1=0;i1<8;i1++)
|
||||
if ((cn[(i0+i1) & 7]<cs)!=(cm<cs)) break; /* num same */
|
||||
for (i2=0;i2<8;i2++)
|
||||
if ((cn[(i0+i1+i2) & 7]<cs)==(cm<cs)) break; /* num other */
|
||||
cn[0]=getpixel(pp,x-2,y);
|
||||
cn[4]=getpixel(pp,x+2,y); /* horizontal */
|
||||
cn[2]=getpixel(pp,x,y-2);
|
||||
cn[6]=getpixel(pp,x,y+2); /* vertical */
|
||||
cn[1]=getpixel(pp,x-2,y-2);
|
||||
cn[3]=getpixel(pp,x+2,y-2); /* diagonal */
|
||||
cn[7]=getpixel(pp,x-2,y+2);
|
||||
cn[5]=getpixel(pp,x+2,y+2);
|
||||
/* check for 5 other and 3 same surrounding pixels */
|
||||
for (i0=0;i0<8;i0++)
|
||||
if ((cn[i0 ]<cs)==(cm<cs)
|
||||
&& (cn[(i0+7) & 7]<cs)!=(cm<cs)) break; /* first same */
|
||||
for (i3=0;i3<8;i3++)
|
||||
if ((cn[(i0+i3) & 7]<cs)!=(cm<cs)) break; /* num same */
|
||||
for (i4=0;i4<8;i4++)
|
||||
if ((cn[(i0+i3+i4) & 7]<cs)==(cm<cs)) break; /* num other */
|
||||
if (i1<=3 && i2>=5 && i3>=3 && i4>=3) { /* change only on borders */
|
||||
ii++; /* white : black */
|
||||
put(pp,x,y,7,((cm<cs)?(cs|32):cs/2)&~7);
|
||||
#if 0
|
||||
printf(" x y i0 i1 i2 i3 i4 cm new cs %3d %3d"
|
||||
" %3d %3d %3d %3d %3d %3d %3d %3d\n",
|
||||
x-box2->x0,y-box2->y0,i0,i1,i2,i3,i3,cm,getpixel(pp,x,y),cs);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if 0 /* debugging */
|
||||
out_x(box2);
|
||||
#endif
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if(vvv)fprintf(stderr," ... %3d changes in %d of %d\n",ii,n1,n2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* test if a corner of box1 is within box2 */
|
||||
int box_nested( struct box *box1, struct box *box2){
|
||||
/* box1 in box2, +1..-1 frame for pixel-patterns */
|
||||
if ( ( ( box1->x0>=box2->x0-1 && box1->x0<=box2->x1+1 )
|
||||
|| ( box1->x1>=box2->x0-1 && box1->x1<=box2->x1+1 ) )
|
||||
&& ( ( box1->y0>=box2->y0-1 && box1->y0<=box2->y1+1 )
|
||||
|| ( box1->y1>=box2->y0-1 && box1->y1<=box2->y1+1 ) ) )
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* test if box1 is within box2 */
|
||||
int box_covered( struct box *box1, struct box *box2){
|
||||
/* box1 in box2, +1..-1 frame for pixel-patterns */
|
||||
if ( ( box1->x0>=box2->x0-1 && box1->x1<=box2->x1+1 )
|
||||
&& ( box1->y0>=box2->y0-1 && box1->y1<=box2->y1+1 ) )
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---- remove pictures ------------------------------------------
|
||||
* may be, not deleting or moving to another list is much better!
|
||||
* should be renamed to remove_pictures and border boxes
|
||||
*/
|
||||
int remove_pictures( job_t *job){
|
||||
struct box *box4,*box2;
|
||||
int j=0, j2=0, num_del=0;
|
||||
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr, "# "__FILE__" L%d: remove pictures\n# ...",
|
||||
__LINE__);
|
||||
|
||||
/* ToDo: output a list for picture handle scripts */
|
||||
j=0; j2=0;
|
||||
if(job->cfg.verbose)
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if (box4->c==PICTURE) j++; else j2++;
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr," status: pictures= %d other= %d nC= %d\n# ...",
|
||||
j, j2, job->res.numC);
|
||||
|
||||
/* remove table frames */
|
||||
if (job->res.numC > 8)
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if (box2->c==PICTURE
|
||||
&& box2->num_ac==0 /* dont remove barcodes */
|
||||
&& box2->x1-box2->x0+1>box2->p->x/2 /* big table? */
|
||||
&& box2->y1-box2->y0+1>box2->p->y/2 ){ j=0;
|
||||
/* count boxes nested with the picture */
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if( box4 != box2 ) /* not count itself */
|
||||
if (box_nested(box4,box2)) j++; /* box4 in box2 */
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if( j>8 ){ /* remove box if more than 8 chars are within box */
|
||||
list_del(&(job->res.boxlist), box2); /* does not work proper ?! */
|
||||
free_box(box2); num_del++;
|
||||
}
|
||||
}
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr, " deleted= %d pictures (table frames)\n# ...",
|
||||
num_del);
|
||||
num_del=0;
|
||||
|
||||
/* remove dark-border-boxes (typical for hard copy of book site,
|
||||
* or spam random border) */
|
||||
if (job->res.numC > 1) /* dont remove the only char */
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if (box2->c!=PICTURE) continue; // ToDo: PICTUREs set already?
|
||||
if ( box2->x1-box2->x0+1 > box2->p->x/2
|
||||
&& box2->y1-box2->y0+1 > box2->p->y/2 ) continue;
|
||||
j=0;
|
||||
if (box2->x0==0) j++;
|
||||
if (box2->y0==0) j++; /* on border? */
|
||||
if (box2->x1==box2->p->x-1) j++;
|
||||
if (box2->y1==box2->p->y-1) j++;
|
||||
if (j>2){ /* ToDo: check corner pixel */
|
||||
int cs=job->cfg.cs;
|
||||
j=0;
|
||||
if (getpixel(box2->p,box2->x0,box2->y0)<cs) j++;
|
||||
if (getpixel(box2->p,box2->x1,box2->y0)<cs) j++;
|
||||
if (getpixel(box2->p,box2->x0,box2->y1)<cs) j++;
|
||||
if (getpixel(box2->p,box2->x1,box2->y1)<cs) j++;
|
||||
if (j>2) {
|
||||
list_del(&(job->res.boxlist), box2);
|
||||
free_box(box2); num_del++;
|
||||
}
|
||||
}
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr, " deleted= %d pictures (on border)\n# ...",
|
||||
num_del);
|
||||
num_del=0;
|
||||
|
||||
j=0; j2=0;
|
||||
if(job->cfg.verbose)
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if( box4->c==PICTURE ) j++; else j2++;
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr," status: pictures= %d other= %d nC= %d\n# ...",
|
||||
j, j2, job->res.numC);
|
||||
|
||||
for(j=1;j;){ j=0; /* this is only because list_del does not work */
|
||||
/* can be slow on gray images */
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if( box2->c==PICTURE && box2->num_ac==0)
|
||||
for(j=1;j;){ /* let it grow to max before leave */
|
||||
j=0; box4=NULL;
|
||||
/* find boxes nested with the picture and remove */
|
||||
/* its for pictures build by compounds */
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if( box4!=box2 /* not destroy self */
|
||||
&& (box4->num_ac==0) /* dont remove barcodes etc. */
|
||||
&& (/* box4->c==UNKNOWN || */
|
||||
box4->c==PICTURE) ) /* dont remove valid chars */
|
||||
if(
|
||||
/* box4 in box2, +1..-1 frame for pixel-patterns */
|
||||
box_nested(box4,box2)
|
||||
/* or box2 in box4 */
|
||||
|| box_nested(box2,box4) /* same? */
|
||||
)
|
||||
if ( box4->x1-box4->x0+1>2*job->res.avX
|
||||
|| box4->x1-box4->x0+1<job->res.avX/2
|
||||
|| box4->y1-box4->y0+1>2*job->res.avY
|
||||
|| box4->y1-box4->y0+1<job->res.avY/2
|
||||
|| box_covered(box4,box2) ) /* box4 completely within box2 */
|
||||
/* dont remove chars! see rotate45.fig */
|
||||
{
|
||||
/* do not remove boxes in inner loop (bug?) ToDo: check why! */
|
||||
/* instead we leave inner loop and mark box4 as valid */
|
||||
if( box4->x0<box2->x0 ) box2->x0=box4->x0;
|
||||
if( box4->x1>box2->x1 ) box2->x1=box4->x1;
|
||||
if( box4->y0<box2->y0 ) box2->y0=box4->y0;
|
||||
if( box4->y1>box2->y1 ) box2->y1=box4->y1;
|
||||
j=1; /* mark box4 as valid */
|
||||
break; /* and leave inner loop */
|
||||
}
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (j!=0 && box4!=NULL) { /* check for valid box4 */
|
||||
/* ToDo: melt */
|
||||
list_del(&(job->res.boxlist), box4); /* does not work proper ?! */
|
||||
free_box(box4); /* break; ToDo: necessary to leave after del??? */
|
||||
num_del++;
|
||||
}
|
||||
|
||||
}
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
}
|
||||
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr, " deleted= %d nested pictures\n# ...", num_del);
|
||||
|
||||
/* output a list for picture handle scripts */
|
||||
j=0; j2=0;
|
||||
if(job->cfg.verbose)
|
||||
for_each_data(&(job->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(job->res.boxlist));
|
||||
if( box4->c==PICTURE ) {
|
||||
fprintf(stderr," found picture at %4d %4d size %4d %4d\n# ...",
|
||||
box4->x0, box4->y0, box4->x1-box4->x0+1, box4->y1-box4->y0+1 );
|
||||
j++;
|
||||
} else j2++;
|
||||
} end_for_each(&(job->res.boxlist));
|
||||
if (job->cfg.verbose)
|
||||
fprintf(stderr," status: pictures= %d other= %d nC= %d\n",
|
||||
j, j2, job->res.numC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ---- remove melted serifs --------------------------------- v0.2.5
|
||||
>>v<<
|
||||
##########.######## <-y0
|
||||
################### like X VW etc.
|
||||
...###.......###... <-y
|
||||
...###......###....
|
||||
j1 j2 j3
|
||||
- can generate new boxes if two characters were glued
|
||||
*/
|
||||
int remove_melted_serifs( pix *pp ){
|
||||
int x,y,j1,j2,j3,j4,i2,i3,i,ii,ni,cs,x0,x1,xa,xb,y0,y1,vvv=JOB->cfg.verbose;
|
||||
struct box *box2, *box3;
|
||||
progress_counter_t *pc = NULL;
|
||||
|
||||
cs=JOB->cfg.cs; i=0; ii=0; ni=0;
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
ni++;
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
pc = open_progress(ni,"remove_melted_serifs");
|
||||
ni = 0;
|
||||
|
||||
if(vvv){ fprintf(stderr,"# searching melted serifs ..."); }
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->c != UNKNOWN) continue; /* dont try on pictures */
|
||||
x0=box2->x0; x1=box2->x1;
|
||||
y0=box2->y0; y1=box2->y1; /* box */
|
||||
/* upper serifs */
|
||||
for(j1=x0;j1+4<x1;){
|
||||
j1+=loop(pp,j1,y0 ,x1-x0,cs,0,RI);
|
||||
x =loop(pp,j1,y0 ,x1-x0,cs,1,RI); if(j1+x>x1+1) break;
|
||||
y =loop(pp,j1,y0+1,x1-x0,cs,1,RI); if(y>x) x=y; if(j1+x>x1+1) break;
|
||||
/* measure mean thickness of serif pos: (j1,y0)-(j1+x,y0) */
|
||||
for(j2=j3=j4=0,i2=j1;i2<j1+x;i2++){
|
||||
/* 2009-07: bug, j1 used instead of i2 */
|
||||
i3 =loop(pp,i2,y0 ,y1-y0,cs,0,DO); if(8*i3>y1-y0) break;
|
||||
i3+=loop(pp,i2,y0+i3,y1-y0,cs,1,DO); if(8*i3>y1-y0) continue;
|
||||
if(8*i3<y1-y0){ j2+=i3; j3++; } /* sum vert. thickness */
|
||||
} if(j3==0){ j1+=x; continue; } /* no serif, skip this object */
|
||||
y = y0+(j2+j3-1)/j3+(y1-y0+1)/32; /* y0 + mean thickness + dy/32 + 1 */
|
||||
if (vvv&1)
|
||||
fprintf(stderr, "\n# upper serif x0,y0,j1-x0+x,y-y0 %4d %4d %2d+%2d %2d",
|
||||
x0,y0,j1-x0,x,y-y0);
|
||||
|
||||
/* check if really melted serifs */
|
||||
if (loop(pp,j1,y,x1-x0,cs,0,RI)<1) { j1+=x; continue; }
|
||||
if(num_cross(j1 ,j1+x,y,y,pp,cs) < 2 ){ j1+=x;continue; }
|
||||
if (vvv&1)
|
||||
fprintf(stderr, " ok1");
|
||||
j2 = j1 + loop(pp,j1,y,x1-x0,cs,0,RI);
|
||||
j2 = j2 + loop(pp,j2,y,x1-x0,cs,1,RI);
|
||||
i3 = loop(pp,j2,y,x1-x0,cs,0,RI); if(i3<2){j1+=x;continue;}
|
||||
j2 += i3/2;
|
||||
j3 = j2 + loop(pp,j2,y ,x1-j2,cs,0,RI);
|
||||
i3 = j2 + loop(pp,j2,y+1,x1-j2,cs,0,RI); if(i3>j3)j3=i3;
|
||||
j3 = j3 + loop(pp,j3,y ,x1-j3,cs,1,RI);
|
||||
i3 = loop(pp,j3,y ,x1-j3,cs,0,RI);
|
||||
if(i3<2 || j3>=j1+x){j1+=x;continue;}
|
||||
j3 += i3/2;
|
||||
|
||||
if(x>5)
|
||||
{
|
||||
i++; /* snip! */
|
||||
for(y=0;y<(y1-y0+1+4)/8;y++)put(pp,j2,y0+y,255,128+64); /* clear highest bit */
|
||||
if(vvv&4){
|
||||
fprintf(stderr,"\n");
|
||||
out_x(box2);
|
||||
fprintf(stderr,"# melted serifs corrected on %d %d j1=%d j3=%d",
|
||||
j2-x0, y, j1-x0, j3-x0);
|
||||
// ToDo: vector cut with line from xa,ya to xb,yb
|
||||
// two frames of double melted MN become one frame if cut one
|
||||
// of the melted serifs (new function cut_frames_at_line())
|
||||
}
|
||||
for(xb=0,xa=0;xa<(x1-x0+4)/8;xa++){ /* detect vertical gap */
|
||||
i3=y1;
|
||||
if(box2->m3>y0 && 2*y1>box2->m3+box2->m4) i3=box2->m3; /* some IJ */
|
||||
if( loop(pp,j2-xa,i3,i3-y0,cs,0,UP) > (y1-y0+1)/2
|
||||
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,LE) >=xa ){ xb=-xa; break; }
|
||||
if( loop(pp,j2+xa,i3,i3-y0,cs,0,UP) > (y1-y0+1)/2
|
||||
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,RI) >=xa ){ xb= xa; break; }
|
||||
}
|
||||
if( get_bw(j2 ,j2 ,y0,(y0+y1)/2,pp,cs,1) == 0
|
||||
&& get_bw(j2+xb,j2+xb,(y0+y1)/2,i3,pp,cs,1) == 0 )
|
||||
{ /* divide */
|
||||
box3=malloc_box(box2);
|
||||
box3->x1=j2-1;
|
||||
box2->x0=j2+1; x1=box2->x1;
|
||||
cut_box(box2); /* cut vectors outside the box, see box.c */
|
||||
cut_box(box3);
|
||||
box3->num=JOB->res.numC;
|
||||
list_ins(&(JOB->res.boxlist),box2,box3); JOB->res.numC++; ii++; /* insert box3 before box2 */
|
||||
if(vvv&4) fprintf(stderr," => splitted");
|
||||
j1=x0=box2->x0; x=0; /* hopefully ok, UVW */
|
||||
}
|
||||
}
|
||||
j1+=x;
|
||||
}
|
||||
/* same on lower serifs -- change this later to better function
|
||||
// #### ###
|
||||
// #### v ### # <-y
|
||||
// #################### <-y1
|
||||
// j1 j2 j3
|
||||
*/
|
||||
for(j1=x0;j1<x1;){
|
||||
j1+=loop(pp,j1,y1 ,x1-x0,cs,0,RI);
|
||||
x =loop(pp,j1,y1 ,x1-x0,cs,1,RI); if(j1+x>x1+1) break;
|
||||
y =loop(pp,j1,y1-1,x1-x0,cs,1,RI); if(y>x) x=y; if(j1+x>x1+1) break;
|
||||
/* measure mean thickness of serif */
|
||||
for(j2=j3=j4=0,i2=j1;i2<j1+x;i2++){
|
||||
/* 2009-07: bug, j1 used instead of i2 */
|
||||
i3 =loop(pp,i2,y1 ,y1-y0,cs,0,UP); if(8*i3>y1-y0) break;
|
||||
i3+=loop(pp,i2,y1-i3,y1-y0,cs,1,UP); if(8*i3>y1-y0) continue;
|
||||
if(8*i3<y1-y0){ j2+=i3; j3++; }
|
||||
} if(j3==0){ j1+=x; continue; }
|
||||
y = y1-(j2+j3-1)/j3-(y1-y0+1)/32;
|
||||
if (vvv&1)
|
||||
fprintf(stderr, "\n# lower serif x0,y0,j1-x0+x,y1-y %4d %4d %2d+%2d %2d",
|
||||
x0,y0,j1-x0,x,y1-y);
|
||||
|
||||
/* check if really melted serifs */
|
||||
if( loop(pp,j1,y,x1-x0,cs,0,RI)<1 ) { j1+=x; continue; }
|
||||
if(num_cross(j1 ,j1+x,y,y,pp,cs) < 2 ){ j1+=x;continue; }
|
||||
if (vvv&1) fprintf(stderr, " ok1");
|
||||
j2 = j1 + loop(pp,j1,y,x1-x0,cs,0,RI);
|
||||
j2 = j2 + loop(pp,j2,y,x1-x0,cs,1,RI);
|
||||
i3 = loop(pp,j2,y,x1-x0,cs,0,RI); if(i3<2){j1+=x;continue;}
|
||||
j2 += i3/2;
|
||||
j3 = j2 + loop(pp,j2,y ,x1-j2,cs,0,RI);
|
||||
i3 = j2 + loop(pp,j2,y-1,x1-j2,cs,0,RI); if(i3>j3)j3=i3;
|
||||
j3 = j3 + loop(pp,j3,y ,x1-j3,cs,1,RI);
|
||||
i3 = loop(pp,j3,y,x1-j3,cs,0,RI);
|
||||
if(i3<2 || j3>=j1+x){j1+=x;continue;}
|
||||
j3 += i3/2;
|
||||
|
||||
/* y =y1-(y1-y0+1+4)/8; */
|
||||
if(x>5)
|
||||
{
|
||||
i++; /* snip! */
|
||||
for(i3=0;i3<(y1-y0+1+4)/8;i3++)
|
||||
put(pp,j2,y1-i3,255,128+64); /* clear highest bit */
|
||||
if(vvv&4){
|
||||
fprintf(stderr,"\n");
|
||||
out_x(box2);
|
||||
fprintf(stderr,"# melted serifs corrected on %d %d j1=%d j3=%d",j2-x0,y-y0,j1-x0,j3-x0);
|
||||
}
|
||||
for(xb=0,xa=0;xa<(x1-x0+4)/8;xa++){ /* detect vertical gap */
|
||||
if( loop(pp,j2-xa,y0,y1-y0,cs,0,DO) > (y1-y0+1)/2
|
||||
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,LE) >=xa ){ xb=-xa; break; }
|
||||
if( loop(pp,j2+xa,y0,y1-y0,cs,0,DO) > (y1-y0+1)/2
|
||||
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,RI) >=xa ){ xb= xa; break; }
|
||||
}
|
||||
if( get_bw(j2 ,j2 ,(y0+y1)/2,y1,pp,cs,1) == 0
|
||||
&& get_bw(j2+xb,j2+xb,y0,(y0+y1)/2,pp,cs,1) == 0 )
|
||||
{ /* divide */
|
||||
box3=malloc_box(box2);
|
||||
box3->x1=j2-1;
|
||||
box2->x0=j2; x1=box2->x1;
|
||||
cut_box(box2); /* cut vectors outside the box */
|
||||
cut_box(box3);
|
||||
box3->num=JOB->res.numC;
|
||||
list_ins(&(JOB->res.boxlist),box2,box3); JOB->res.numC++; ii++;
|
||||
/* box3,box2 in correct order??? */
|
||||
if(vvv&4) fprintf(stderr," => splitted");
|
||||
j1=x0=box2->x0; x=0; /* hopefully ok, NMK */
|
||||
}
|
||||
}
|
||||
j1+=x;
|
||||
}
|
||||
progress(ni++,pc);
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
close_progress(pc);
|
||||
if(vvv)fprintf(stderr," %3d cluster corrected, %d new boxes\n",i,ii);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* remove black borders often seen on bad scanned copies of books
|
||||
- dust around the border
|
||||
*/
|
||||
int remove_rest_of_dust() {
|
||||
int i1, i2, vvv = JOB->cfg.verbose, x0, x1, y0, y1, cnt=0;
|
||||
struct box *box2, *box4;
|
||||
progress_counter_t *pc = NULL;
|
||||
|
||||
i1 = i2 = 0; /* counter for removed boxes */
|
||||
if (vvv)
|
||||
fprintf(stderr, "# detect dust (avX,nC), ... ");
|
||||
/* remove fragments from border */
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (box2->c == UNKNOWN) {
|
||||
x0 = box2->x0; x1 = box2->x1;
|
||||
y0 = box2->y0; y1 = box2->y1; /* box */
|
||||
/* box in char ??? */
|
||||
if ( 2 * JOB->res.numC * (y1 - y0 + 1) < 3 * JOB->res.sumY
|
||||
&& ( y1 < box2->p->y/4 || y0 > 3*box2->p->y/4 ) /* not single line */
|
||||
&& JOB->res.numC > 1 /* do not remove everything */
|
||||
&& ( box2->m4 == 0 ) ) /* remove this */
|
||||
{
|
||||
JOB->res.numC--; /* ToDo: dont count tiny pixels */
|
||||
/* ToDo: res.sumX,Y must also be corrected */
|
||||
i1++;
|
||||
list_del(&(JOB->res.boxlist), box2);
|
||||
free_box(box2);
|
||||
}
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
|
||||
pc = open_progress(JOB->res.boxlist.n,"remove_dust2");
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
progress(cnt++,pc);
|
||||
if (box2->c == PICTURE) continue;
|
||||
x0 = box2->x0; x1 = box2->x1;
|
||||
y0 = box2->y0; y1 = box2->y1; /* box */
|
||||
/* remove tiny box2 if to far away from bigger boxes */
|
||||
/* ToDo: remove clouds of tiny pixels (count near small, compare with num bigger) */
|
||||
/* 0.42: remove far away pixel? ToDo: do it at earlier? */
|
||||
if (x1-x0+1<3 && y1-y0+1<3){
|
||||
int xn, yn, xs, ys;
|
||||
int found=0; /* nearest bigger box */
|
||||
/* search near bigger box */
|
||||
for_each_data(&(JOB->res.boxlist)) {
|
||||
box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
|
||||
if (found || box4 == box2) continue;
|
||||
if (box4->x1-box4->x0+1<3 && box4->y1-box4->y0+1<3) continue;
|
||||
xs = box4->x1-box4->x0+1;
|
||||
ys = box4->y1-box4->y0+1;
|
||||
xn = abs((box4->x0+box4->x1)/2 - box2->x0);
|
||||
yn = abs((box4->y0+box4->y1)/2 - box2->y0);
|
||||
if (2*xn < 3*xs && 2*yn < 3*ys) { found=1; }
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
if (!found) { /* found nothing, box2 to far from big boxes */
|
||||
i2++;
|
||||
list_del(&(JOB->res.boxlist), box2);
|
||||
free_box(box2);
|
||||
}
|
||||
}
|
||||
} end_for_each(&(JOB->res.boxlist));
|
||||
close_progress(pc);
|
||||
if (vvv)
|
||||
fprintf(stderr, " %3d + %3d boxes deleted, nC= %d ?\n",
|
||||
i1, i2, JOB->res.numC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
87
ActiveX/ASCOfficeUtils/GOCR/src/tga.c
Normal file
87
ActiveX/ASCOfficeUtils/GOCR/src/tga.c
Normal file
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
This is a Optical-Character-Recognition program
|
||||
Copyright (C) 1999 Joerg Schulenburg
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
see README for EMAIL-address
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "tga.h"
|
||||
|
||||
typedef unsigned char byte;
|
||||
|
||||
// --- needed for reading TGA-files
|
||||
#if 0
|
||||
char read_b(FILE *f1){ // filter #-comments
|
||||
char c;
|
||||
c=fgetc(f1); assert(!feof(f1)); assert(!ferror(f1));
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
//byte tga[18]={ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,24,32};
|
||||
/* header_hex= 00 00 02 00 00 00 00 00 00 00 00 00 xl xh yl yh
|
||||
* 18 20 -- -- -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
void readtga(char *name,pix *p,int mode){ // see pcx.format.txt
|
||||
// mode: 0=gray,1=RGB
|
||||
int nx,ny,i,x,y;
|
||||
FILE *f1;
|
||||
unsigned char *pic,h[18];
|
||||
|
||||
f1=fopen(name,"rb"); if(!f1) fprintf(stderr," error opening file\n");
|
||||
assert(f1); // open-error
|
||||
assert(fread(h,1,18,f1)==18); /* 18 Byte lesen -> h[] */
|
||||
assert(h[ 0]== 0); // TGA0
|
||||
assert(h[ 1]== 0); // TGA1
|
||||
assert(h[ 2]== 2); // TGA2 no run length encoding
|
||||
for(i=3;i<12;i++)
|
||||
assert(h[ i]== 0); // ???
|
||||
assert(h[16]==0x18); // TGA16
|
||||
assert(h[17]==0x20); // TGA17
|
||||
nx = h[12] + (h[13]<<8); /* x-dimension low high */
|
||||
ny = h[14] + (h[15]<<8); /* y-dimension low high */
|
||||
fprintf(stderr,"# TGA version=%d x=%d y=%d", h[2],nx,ny );
|
||||
fflush(stdout);
|
||||
pic=(unsigned char *)malloc( 3*nx*ny );
|
||||
assert(pic!=NULL); // no memory
|
||||
assert(ny==(int)fread(pic,3*nx,ny,f1)); // read all lines BGR
|
||||
if(mode==0)
|
||||
{
|
||||
for(y=0;y<ny;y++) /* BGR => gray */
|
||||
for(x=0;x<nx;x++)
|
||||
{ i=x+y*nx; pic[i]=(pic[i*3+0]+pic[i*3+1]+pic[i*3+2])/3; }
|
||||
}
|
||||
else
|
||||
if(mode==1)
|
||||
{
|
||||
byte b;
|
||||
for(y=0;y<ny;y++) /* BGR => RGB */
|
||||
for(x=0;x<nx;x++)
|
||||
{ i=x+y*nx; b=pic[i*3+0]; pic[i*3+0]=pic[i*3+2]; pic[i*3+2]=b; }
|
||||
}
|
||||
else assert(0); // wrong mode
|
||||
fclose(f1);
|
||||
p->p=pic; p->x=nx; p->y=ny; p->bpp=1+2*mode;
|
||||
fprintf(stderr," mode=%d\n",mode);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
1314
ActiveX/ASCOfficeUtils/GOCR/src/unicode.c
Normal file
1314
ActiveX/ASCOfficeUtils/GOCR/src/unicode.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user