init repo

This commit is contained in:
nikolay ivanov
2014-07-05 18:22:49 +00:00
commit a8be6b9e72
17348 changed files with 9229832 additions and 0 deletions

View File

@@ -0,0 +1,36 @@

Microsoft Visual Studio Solution File, Format Version 10.00
# Visual Studio 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ASCOfficeUtils", "ASCOfficeUtils\ASCOfficeUtils.vcproj", "{6215E3BF-2D42-40FB-B951-B8C448A596D2}"
ProjectSection(ProjectDependencies) = postProject
{DD328E05-26BE-4C81-A13E-489D15321212} = {DD328E05-26BE-4C81-A13E-489D15321212}
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60} = {56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GOCR", "GOCR\GOCR.vcproj", "{DD328E05-26BE-4C81-A13E-489D15321212}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PNM", "PNM\PNM.vcproj", "{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Debug|Win32.ActiveCfg = Debug|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Debug|Win32.Build.0 = Debug|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Release|Win32.ActiveCfg = Release|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Release|Win32.Build.0 = Release|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Debug|Win32.ActiveCfg = Debug|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Debug|Win32.Build.0 = Debug|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Release|Win32.ActiveCfg = Release|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Release|Win32.Build.0 = Release|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Debug|Win32.ActiveCfg = Debug|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Debug|Win32.Build.0 = Debug|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Release|Win32.ActiveCfg = Release|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,59 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
class AVSOfficeCriticalSection
{
private:
CRITICAL_SECTION CriticalSection;
public:
AVSOfficeCriticalSection()
{
InitializeCriticalSection( &(this->CriticalSection) );
}
~AVSOfficeCriticalSection()
{
DeleteCriticalSection( &(this->CriticalSection) );
}
void Enter()
{
EnterCriticalSection( &(this->CriticalSection) );
}
void Leave()
{
LeaveCriticalSection( &(this->CriticalSection) );
}
};

View File

@@ -0,0 +1,49 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "stdafx.h"
#include "resource.h"
[ module(dll, uuid = "{92F87FA9-D3C2-4820-82F6-DEAEC0A3539D}",
name = "ASCOfficeUtils",
helpstring = "ASCOfficeUtils 1.0 Type Library",
resource_name = "IDR_ASCOFFICEUTILS") ]
class CAVSOfficeUtilsModule
{
public:
};
#include "OfficeUtils.h"
#include "OfficeOCR.h"

View File

@@ -0,0 +1,103 @@
//Microsoft Visual C++ generated resource script.
//
#include "resource.h"
#include "version.h"
#define COMPONENT_NAME "OfficeUtils"
#include "../../Common/FileInfo.h"
#define APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 2 resource.
//
#include "winres.h"
/////////////////////////////////////////////////////////////////////////////
#undef APSTUDIO_READONLY_SYMBOLS
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
LANGUAGE 9, 1
#pragma code_page(1251)
#ifdef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// TEXTINCLUDE
//
1 TEXTINCLUDE
BEGIN
"resource.h\0"
END
2 TEXTINCLUDE
BEGIN
"#include ""winres.h""\r\n"
"\0"
END
#endif // APSTUDIO_INVOKED
#ifndef _MAC
/////////////////////////////////////////////////////////////////////////////
//
// Version
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION INTVER
PRODUCTVERSION INTVER
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
#else
FILEFLAGS 0x0L
#endif
FILEOS 0x4L
FILETYPE 0x2L
FILESUBTYPE 0x0L
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "040904B0"
BEGIN
VALUE "CompanyName", COMPANY_NAME
VALUE "FileDescription", FILE_DESCRIPTION_ACTIVEX
VALUE "FileVersion", STRVER
VALUE "LegalCopyright", LEGAL_COPYRIGHT
VALUE "InternalName", COMPONENT_FILE_NAME_DLL
VALUE "OriginalFilename", COMPONENT_FILE_NAME_DLL
VALUE "ProductName", FILE_DESCRIPTION_ACTIVEX
VALUE "ProductVersion", STRVER
VALUE "OLESelfRegister", ""
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x0409, 0x04B0
END
END
#endif // !_MAC
/////////////////////////////////////////////////////////////////////////////
//
// String Table
//
STRINGTABLE
BEGIN
IDS_PROJNAME "ASCOfficeUtils"
END
IDR_ASCOFFICEUTILS REGISTRY "ASCOfficeUtils.rgs"
////////////////////////////////////////////////////////////////////////////
#endif
#ifndef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
#endif // not APSTUDIO_INVOKED

View File

@@ -0,0 +1,11 @@
HKCR
{
NoRemove AppID
{
'%APPID%' = s 'ASCOfficeUtils'
'ASCOfficeUtils.DLL'
{
val AppID = s '%APPID%'
}
}
}

View File

@@ -0,0 +1,688 @@
<?xml version="1.0" encoding="windows-1251"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9,00"
Name="ASCOfficeUtils"
ProjectGUID="{6215E3BF-2D42-40FB-B951-B8C448A596D2}"
RootNamespace="ASCOfficeUtils"
Keyword="AtlProj"
TargetFrameworkVersion="131072"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="2"
UseOfATL="1"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/ASCOfficeUtils.tlb"
HeaderFileName="ASCOfficeUtils.h"
DLLDataFileName=""
InterfaceIdentifierFileName="ASCOfficeUtils_i.c"
ProxyFileName="ASCOfficeUtils_p.c"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="&quot;..\ZLIB\zlib-1.2.3\contrib\minizip&quot;;&quot;..\ZLIB\zlib-1.2.3&quot;;&quot;..\GOCR\headers&quot;;&quot;..\GOCR\include&quot;;&quot;..\PNM\headers&quot;"
PreprocessorDefinitions="WIN32;_WINDOWS;_DEBUG;_USRDLL;_ATL_ATTRIBUTES;ZLIB_WINAPI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="2"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
IgnoreImportLibrary="true"
OutputFile="$(OutDir)/ASCOfficeUtils.dll"
LinkIncremental="2"
AdditionalLibraryDirectories="..\ZLIB\zlib123dll\static32"
IgnoreDefaultLibraryNames="LIBC.LIB"
MergedIDLBaseFileName="_ASCOfficeUtils.idl"
GenerateDebugInformation="true"
SubSystem="2"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
ImportLibrary="$(OutDir)/ASCOfficeUtils.lib"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
Description="Performing registration"
CommandLine="regsvr32 /s /c &quot;$(TargetPath)&quot;"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="2"
UseOfATL="1"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
CommandLine="..\..\..\..\Redist\VersionControl.exe $(ProjectDir)version.h"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/ASCOfficeUtils.tlb"
HeaderFileName="ASCOfficeUtils.h"
DLLDataFileName=""
InterfaceIdentifierFileName="ASCOfficeUtils_i.c"
ProxyFileName="ASCOfficeUtils_p.c"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="&quot;..\ZLIB\zlib-1.2.3\contrib\minizip&quot;;&quot;..\ZLIB\zlib-1.2.3&quot;;&quot;..\GOCR\headers&quot;;&quot;..\GOCR\include&quot;;&quot;..\PNM\headers&quot;"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES;ZLIB_WINAPI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_DEPRECATE"
RuntimeLibrary="2"
UsePrecompiledHeader="2"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
RegisterOutput="true"
IgnoreImportLibrary="true"
OutputFile="..\$(ProjectName).dll"
LinkIncremental="1"
AdditionalLibraryDirectories="..\ZLIB\zlib123dll\static32"
IgnoreDefaultLibraryNames="LIBC.LIB"
MergedIDLBaseFileName="_ASCOfficeUtils.idl"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)/$(TargetName).pdb"
SubSystem="2"
OptimizeReferences="2"
EnableCOMDATFolding="2"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
ImportLibrary="$(OutDir)/ASCOfficeUtils.lib"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
Description="Performing registration"
CommandLine="regsvr32 /s /c &quot;$(TargetPath)&quot;"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\ASCOfficeUtils.cpp"
>
</File>
<File
RelativePath=".\CallbackHelpers.cpp"
>
</File>
<File
RelativePath=".\OfficeOCR.cpp"
>
</File>
<File
RelativePath=".\OfficeUtils.cpp"
>
</File>
<File
RelativePath=".\stdafx.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="1"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="1"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\UniversalString.cpp"
>
</File>
<File
RelativePath=".\ZipUtils.cpp"
>
</File>
<Filter
Name="zlib"
>
<File
RelativePath="..\ZLIB\zlib-1.2.3\adler32.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\compress.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\crc32.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\deflate.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\contrib\masmx86\gvmat32c.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\gzio.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\infback.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\contrib\masmx64\inffas8664.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\inffast.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\inflate.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\inftrees.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\contrib\minizip\ioapi.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\trees.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\uncompr.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\contrib\minizip\unzip.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\contrib\minizip\zip.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\ZLIB\zlib-1.2.3\zutil.c"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
</Filter>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\ASCOfficeCriticalSection.h"
>
</File>
<File
RelativePath=".\CallbackHelpers.h"
>
</File>
<File
RelativePath=".\CSLocker.h"
>
</File>
<File
RelativePath=".\OfficeOCR.h"
>
</File>
<File
RelativePath=".\OfficeUtils.h"
>
</File>
<File
RelativePath=".\Resource.h"
>
</File>
<File
RelativePath=".\stdafx.h"
>
</File>
<File
RelativePath=".\UniversalString.h"
>
</File>
<File
RelativePath=".\version.h"
>
</File>
<File
RelativePath=".\ZipUtils.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
<File
RelativePath=".\ASCOfficeUtils.rc"
>
</File>
<File
RelativePath=".\ASCOfficeUtils.rgs"
>
</File>
</Filter>
<Filter
Name="Interfaces"
>
<File
RelativePath=".\IOfficeOCR.h"
>
</File>
</Filter>
<File
RelativePath=".\ReadMe.txt"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,52 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
#include "ASCOfficeCriticalSection.h"
class CSLocker
{
public:
CSLocker(AVSOfficeCriticalSection &critical_section) : cs(critical_section)
{
cs.Enter();
}
~CSLocker(void)
{
cs.Leave();
}
private:
AVSOfficeCriticalSection &cs;
};

View File

@@ -0,0 +1,64 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "StdAfx.h"
#include "CallbackHelpers.h"
#include "OfficeUtils.h"
ExtractedFileCallback::ExtractedFileCallback(IExtractedFileEvent* data_receiver) : invokable_object(data_receiver)
{
}
void ExtractedFileCallback::Invoke(_bstr_t file_name, SAFEARRAY** arr)
{
if(invokable_object)
{
invokable_object->ExtractedFile(file_name, arr);
}
}
RequestFileCallback::RequestFileCallback(IRequestFileEvent* data_receiver) : invokable_object(data_receiver)
{
}
bool RequestFileCallback::Invoke(BSTR* file_name, SAFEARRAY** arr)
{
if(invokable_object)
{
VARIANT_BOOL is_data_attached;
invokable_object->RequestFile(file_name, arr, &is_data_attached);
return VARIANT_TRUE == is_data_attached;
}
}

View File

@@ -0,0 +1,59 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
__interface IExtractedFileEvent;
class ExtractedFileCallback
{
public:
ExtractedFileCallback(IExtractedFileEvent* data_receiver);
void Invoke(_bstr_t file_name, SAFEARRAY** arr);
private:
IExtractedFileEvent* invokable_object;
};
__interface IRequestFileEvent;
class RequestFileCallback
{
public:
RequestFileCallback(IRequestFileEvent* data_receiver);
bool Invoke(BSTR* file_name, SAFEARRAY** arr);
private:
IRequestFileEvent* invokable_object;
};

View File

@@ -0,0 +1,62 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
[ object, uuid("804C1FF6-7020-477E-AD9F-980E9201C237"), dual, pointer_default(unique) ]
__interface IOfficeOCR : IDispatch
{
[id(1)] HRESULT Recognize([in] IUnknown *Image, [out, retval] BSTR* Text);
[id(2), propput]HRESULT OutputFormat([in] LONG Type);
[id(2), propget]HRESULT OutputFormat([ out, retval ] LONG *Type);
[id(3), propput]HRESULT GrayLevel([in] LONG Type);
[id(3), propget]HRESULT GrayLevel([ out, retval ] LONG *Type);
[id(4), propput]HRESULT DustSize([in] LONG Type);
[id(4), propget]HRESULT DustSize([ out, retval ] LONG *Type);
[id(5), propput]HRESULT SpaceWidthDots([in] LONG Type);
[id(5), propget]HRESULT SpaceWidthDots([ out, retval ] LONG *Type);
[id(6), propput]HRESULT Certainty([in] LONG Type);
[id(6), propget]HRESULT Certainty([ out, retval ] LONG *Type);
[id(100000)] HRESULT SetAdditionalParam([in] BSTR ParamName, [in] VARIANT ParamValue);
[id(100001)] HRESULT GetAdditionalParam([in] BSTR ParamName, [out, retval] VARIANT* ParamValue);
};

View File

@@ -0,0 +1,212 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "stdafx.h"
#include "../../Common/ASCUtils.h"
#include "../../Common/MediaFormatDefine.h"
#include "OfficeOCR.h"
#include "ImageToPNM.h"
#include "gocr.h"
COfficeOCR::COfficeOCR ()
: CAVSATLError ( __uuidof ( COfficeOCR ), __uuidof ( IOfficeOCR ) )
{
m_lOutputFormatType = OUTPUT_FORMAT_TYPE_XML;
m_lGrayLevel = 0;
m_lDustSize = -1;
m_lSpaceWidthDots = 0;
m_lCertainty = 95;
}
HRESULT COfficeOCR::FinalConstruct()
{
return S_OK;
}
void COfficeOCR::FinalRelease()
{
}
STDMETHODIMP COfficeOCR::Recognize ( IUnknown *Image, BSTR* Text )
{
#ifdef BUILD_CONFIG_FULL_VERSION
if ( NULL == Text )
return CAVSATLError::Error ( MEMORY );
*Text = NULL;
if ( NULL == Image )
return CAVSATLError::Error ( FILEFORMAT );
MediaCore::IAVSUncompressedVideoFrame *pUncompressedVideoFrame = NULL; Image->QueryInterface ( &pUncompressedVideoFrame );
if ( NULL == pUncompressedVideoFrame )
return CAVSATLError::Error ( FILEFORMAT );
LPBYTE pBuffer = NULL; pUncompressedVideoFrame->get_Buffer ( &pBuffer );
LONG lWidth = 0; pUncompressedVideoFrame->get_Width ( &lWidth );
LONG lHeight = 0; pUncompressedVideoFrame->get_Height ( &lHeight );
LONG lColorSpace = 0; pUncompressedVideoFrame->get_ColorSpace ( &lColorSpace );
LONG lBitCount = 0;
if ( ( CSP_BGRA & CSP_COLOR_MASK ) == lColorSpace || ( ( CSP_BGRA | CSP_VFLIP ) & CSP_COLOR_MASK ) == lColorSpace ||
( CSP_ABGR & CSP_COLOR_MASK ) == lColorSpace || ( ( CSP_ABGR | CSP_VFLIP ) & CSP_COLOR_MASK ) == lColorSpace ||
( CSP_RGBA & CSP_COLOR_MASK ) == lColorSpace || ( ( CSP_RGBA | CSP_VFLIP ) & CSP_COLOR_MASK ) == lColorSpace ||
( CSP_ARGB & CSP_COLOR_MASK ) == lColorSpace || ( ( CSP_ARGB | CSP_VFLIP ) & CSP_COLOR_MASK ) == lColorSpace )
lBitCount = 32;
else if ( ( CSP_BGR & CSP_COLOR_MASK ) == lColorSpace || ( ( CSP_BGR | CSP_VFLIP ) & CSP_COLOR_MASK ) == lColorSpace )
lBitCount = 24;
else if ( ( CSP_RGB555 & CSP_COLOR_MASK ) == lColorSpace || ( ( CSP_RGB555 | CSP_VFLIP ) & CSP_COLOR_MASK ) == lColorSpace ||
( CSP_RGB565 & CSP_COLOR_MASK ) == lColorSpace || ( ( CSP_RGB565 | CSP_VFLIP ) & CSP_COLOR_MASK ) == lColorSpace )
lBitCount = 16;
else
lBitCount = 32;
long lCount = 0;
CHAR *pPBMImage = convertToPNM ( pBuffer, lWidth, lHeight, lBitCount, true, true, lCount );
CStringA sOutputFormat = "";
switch ( m_lOutputFormatType )
{
case OUTPUT_FORMAT_TYPE_XML :
sOutputFormat = "XML";
break;
case OUTPUT_FORMAT_TYPE_TEXT :
sOutputFormat = "UTF8";
break;
}
CHAR *pText = PNMToText (pPBMImage, lCount, sOutputFormat.GetBuffer(), m_lGrayLevel, m_lDustSize, m_lSpaceWidthDots, m_lCertainty);
free ( pPBMImage );
RELEASEINTERFACE ( pUncompressedVideoFrame );
CString sText ( pText );
free ( pText );
CString sResult = _T("");
if ( OUTPUT_FORMAT_TYPE_XML == m_lOutputFormatType )
{
sResult += _T("<?xml version=\"1.0\" encoding=\"utf-8\" ?>");
sResult += _T ("<Text>");
}
sResult += sText;
if ( OUTPUT_FORMAT_TYPE_XML == m_lOutputFormatType )
sResult += _T ("</Text>");
*Text = sResult.AllocSysString();
#endif
return S_OK;
}
STDMETHODIMP COfficeOCR::put_OutputFormat ( LONG Type )
{
if ( 0 <= Type && 2 >Type )
m_lOutputFormatType = Type;
return S_OK;
}
STDMETHODIMP COfficeOCR::get_OutputFormat ( LONG *Type )
{
*Type = m_lOutputFormatType;
return S_OK;
}
STDMETHODIMP COfficeOCR::put_GrayLevel ( LONG Type )
{
m_lGrayLevel = Type;
if ( 0 > m_lGrayLevel )
m_lGrayLevel = 0;
if ( 255 < m_lGrayLevel )
m_lGrayLevel = 255;
return S_OK;
}
STDMETHODIMP COfficeOCR::get_GrayLevel ( LONG *Type )
{
*Type = m_lDustSize;
return S_OK;
}
STDMETHODIMP COfficeOCR::put_DustSize ( LONG Type )
{
m_lDustSize = Type;
return S_OK;
}
STDMETHODIMP COfficeOCR::get_DustSize ( LONG *Type )
{
*Type = m_lGrayLevel;
return S_OK;
}
STDMETHODIMP COfficeOCR::put_SpaceWidthDots ( LONG Type )
{
m_lSpaceWidthDots = Type;
if ( 0 > m_lSpaceWidthDots )
m_lSpaceWidthDots = 0;
return S_OK;
}
STDMETHODIMP COfficeOCR::get_SpaceWidthDots ( LONG *Type )
{
*Type = m_lSpaceWidthDots;
return S_OK;
}
STDMETHODIMP COfficeOCR::put_Certainty ( LONG Type )
{
m_lCertainty = Type;
if ( 0 > m_lCertainty )
m_lSpaceWidthDots = 0;
if ( 100 < m_lCertainty )
m_lCertainty = 100;
return S_OK;
}
STDMETHODIMP COfficeOCR::get_Certainty ( LONG *Type )
{
*Type = m_lCertainty;
return S_OK;
}
STDMETHODIMP COfficeOCR::SetAdditionalParam ( BSTR ParamName, VARIANT ParamValue )
{
return S_OK;
}
STDMETHODIMP COfficeOCR::GetAdditionalParam ( BSTR ParamName, VARIANT *ParamValue )
{
return S_OK;
}

View File

@@ -0,0 +1,81 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
#include "resource.h"
#include "IOfficeOCR.h"
#include "../../Common/ASCATLError.h"
#define OUTPUT_FORMAT_TYPE_XML 0
#define OUTPUT_FORMAT_TYPE_TEXT 1
[ coclass, uuid("5E4BC6BB-26B4-4237-894E-2C872842A8EE"), threading(apartment), vi_progid("AVSOfficeUtils.OfficeOCR"), progid("AVSOfficeUtils.OfficeOCR.1"), version(1.0), support_error_info(IOfficeOCR) ]
class ATL_NO_VTABLE COfficeOCR : public IOfficeOCR, public CAVSATLError
{
public :
COfficeOCR();
DECLARE_PROTECT_FINAL_CONSTRUCT()
HRESULT FinalConstruct();
void FinalRelease();
public :
STDMETHOD ( Recognize )( IUnknown *Image, BSTR* Text );
STDMETHOD ( put_OutputFormat )( LONG Type );
STDMETHOD ( get_OutputFormat )( LONG *Type );
STDMETHOD ( put_GrayLevel )( LONG Type );
STDMETHOD ( get_GrayLevel )( LONG *Type );
STDMETHOD ( put_DustSize )( LONG Type );
STDMETHOD ( get_DustSize )( LONG *Type );
STDMETHOD ( put_SpaceWidthDots )( LONG Type );
STDMETHOD ( get_SpaceWidthDots )( LONG *Type );
STDMETHOD ( put_Certainty )( LONG Type );
STDMETHOD ( get_Certainty )( LONG *Type );
STDMETHOD ( SetAdditionalParam )( BSTR ParamName, VARIANT ParamValue );
STDMETHOD ( GetAdditionalParam )( BSTR ParamName, VARIANT *ParamValue );
private :
LONG m_lOutputFormatType;
LONG m_lGrayLevel;
LONG m_lDustSize;
LONG m_lSpaceWidthDots;
LONG m_lCertainty;
};

View File

@@ -0,0 +1,196 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "stdafx.h"
#include "OfficeUtils.h"
STDMETHODIMP COfficeUtils::ExtractToDirectory(BSTR zipFile, BSTR unzipDir, BSTR password, SHORT extract_without_path)
{
ProgressCallback progress;
progress.OnProgress = OnProgressFunc;
progress.caller = this;
if( ZLibZipUtils::UnzipToDir( zipFile, unzipDir, &progress, password, ( extract_without_path > 0 ) ? (true) : (false) ) == 0 )
{
return S_OK;
}
else
{
return S_FALSE;
}
}
STDMETHODIMP COfficeUtils::CompressFileOrDirectory(BSTR name, BSTR outputFile, SHORT level)
{
WIN32_FIND_DATA ffd;
HANDLE hFind = INVALID_HANDLE_VALUE;
HRESULT result = S_FALSE;
hFind = FindFirstFile( name, &ffd );
if ( hFind == INVALID_HANDLE_VALUE )
{
return result;
}
if ( ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )
{
ProgressCallback progress;
progress.OnProgress = OnProgressFunc;
progress.caller = this;
if ( ZLibZipUtils::ZipDir( name, outputFile, &progress, level ) == 0 )
{
result = S_OK;
}
else
{
result = S_FALSE;
}
}
else
{
if ( ZLibZipUtils::ZipFile( name, outputFile, level ) == 0 )
{
result = S_OK;
}
else
{
result = S_FALSE;
}
}
FindClose( hFind );
return result;
}
STDMETHODIMP COfficeUtils::Uncompress(BYTE* destBuf, ULONG* destSize, BYTE* sourceBuf, ULONG sourceSize)
{
if ( ZLibZipUtils::UncompressBytes( destBuf, destSize, sourceBuf, sourceSize ) == Z_OK )
{
return S_OK;
}
else
{
return S_FALSE;
}
}
STDMETHODIMP COfficeUtils::Compress(BYTE* destBuf, ULONG* destSize, BYTE* sourceBuf, ULONG sourceSize, SHORT level)
{
if ( ZLibZipUtils::CompressBytes( destBuf, destSize, sourceBuf, sourceSize, level ) == Z_OK )
{
return S_OK;
}
else
{
return S_FALSE;
}
}
STDMETHODIMP COfficeUtils::IsArchive(BSTR filename)
{
if( ZLibZipUtils::IsArchive(filename) )
{
return S_OK;
}
else
{
return S_FALSE;
}
}
STDMETHODIMP COfficeUtils::IsFileExistInArchive(BSTR zipFile, BSTR filePath)
{
if( ZLibZipUtils::IsFileExistInArchive( zipFile, filePath) )
{
return S_OK;
}
else
{
return S_FALSE;
}
}
STDMETHODIMP COfficeUtils::LoadFileFromArchive(BSTR zipFile, BSTR filePath, BYTE** fileInBytes)
{
if( ZLibZipUtils::LoadFileFromArchive( zipFile, filePath, fileInBytes))
{
return S_OK;
}
else
{
return S_FALSE;
}
}
STDMETHODIMP COfficeUtils::ExtractFilesToMemory(BSTR zipFile, IExtractedFileEvent* data_receiver, VARIANT_BOOL* result)
{
*result = ZLibZipUtils::ExtractFiles(_bstr_t(zipFile), ExtractedFileCallback (data_receiver)) ? VARIANT_TRUE : VARIANT_FALSE;
return S_OK;
}
STDMETHODIMP COfficeUtils::CompressFilesFromMemory(BSTR zipFile, IRequestFileEvent* data_source, SHORT compression_level, VARIANT_BOOL* result)
{
*result = ZLibZipUtils::CompressFiles(_bstr_t(zipFile), RequestFileCallback (data_source), compression_level) ? VARIANT_TRUE : VARIANT_FALSE;
return S_OK;
}
void COfficeUtils::OnProgressFunc( LPVOID lpParam, long nID, long nPercent, short* Cancel )
{
COfficeUtils* pOfficeUtils = reinterpret_cast<COfficeUtils*>( lpParam );
if ( pOfficeUtils != NULL )
{
pOfficeUtils->OnProgress( nID, nPercent, Cancel );
}
}

View File

@@ -0,0 +1,163 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
#include "resource.h"
#include "ZipUtils.h"
#if defined(_WIN32_WCE) && !defined(_CE_DCOM) && !defined(_CE_ALLOW_SINGLE_THREADED_OBJECTS_IN_MTA)
#error "Single-threaded COM objects are not properly supported on Windows CE platform, such as the Windows Mobile platforms that do not include full DCOM support. Define _CE_ALLOW_SINGLE_THREADED_OBJECTS_IN_MTA to force ATL to support creating single-thread COM object's and allow use of it's single-threaded COM object implementations. The threading model in your rgs file was set to 'Free' as that is the only threading model supported in non DCOM Windows CE platforms."
#endif
[
object,
uuid("D1E2A35D-AD68-4E0E-9FF2-859155151328"),
dual, helpstring("IOfficeUtils Interface"),
pointer_default(unique)
]
__interface IOfficeUtils : IDispatch
{
[id(1), helpstring("method ExtractToDirectory")] HRESULT ExtractToDirectory([in] BSTR zipFile, [in] BSTR unzipDir, [in] BSTR password, [in] SHORT extract_without_path);
[id(2), helpstring("method CompressFileOrDirectory")] HRESULT CompressFileOrDirectory([in] BSTR name, [in] BSTR outputFile, [in] SHORT level);
[id(3), helpstring("method Uncompress")] HRESULT Uncompress([out] BYTE* destBuf, [in,out] ULONG* destSize, [in] BYTE* sourceBuf, [in] ULONG sourceSize);
[id(4), helpstring("method Compress")] HRESULT Compress([out] BYTE* destBuf, [in,out] ULONG* destSize, [in] BYTE* sourceBuf, [in] ULONG sourceSize, [in] SHORT level);
};
[
object,
uuid("6BA9C2ED-263A-456d-882F-646DA4CE1FEA"),
dual, helpstring("IExtractedFileEvent Interface"),
pointer_default(unique)
]
__interface IExtractedFileEvent : IDispatch
{
[id(9), helpstring("method ExtractedFile")] HRESULT ExtractedFile([in] BSTR file_name, [in, satype("unsigned char")] SAFEARRAY** arr);
};
[
object,
uuid("6013A180-406F-48fc-94BD-B0AC8B72CC0E"),
dual, helpstring("IRequestFileEvent Interface"),
pointer_default(unique)
]
__interface IRequestFileEvent : IDispatch
{
[id(11), helpstring("method RequestFile")] HRESULT RequestFile([out] BSTR* file_name, [out, satype("unsigned char")] SAFEARRAY** arr, [out, retval] VARIANT_BOOL* is_data_attached);
};
[
object,
uuid("F9C00AE2-7B59-4210-B348-5E34B8F495D7"),
dual, helpstring("IOfficeUtils2 Interface"),
pointer_default(unique)
]
__interface IOfficeUtils2 : IOfficeUtils
{
[id(5), helpstring("method IsArchive")] HRESULT IsArchive([in] BSTR filename);
[id(6), helpstring("method IsFileExistInArchive")] HRESULT IsFileExistInArchive([in] BSTR zipFile, [in] BSTR filePath);
[id(7), helpstring("method LoadFileFromArchive")] HRESULT LoadFileFromArchive([in] BSTR zipFile, [in] BSTR filePath, [out] BYTE** fileInBytes);
[id(8), helpstring("method ExtractFilesToMemory")] HRESULT ExtractFilesToMemory([in] BSTR zipFile, [in] IExtractedFileEvent* data_receiver, [out, retval] VARIANT_BOOL* result);
[id(10), helpstring("method CompressFilesFromMemory")] HRESULT CompressFilesFromMemory([in] BSTR zipFile, [in] IRequestFileEvent* data_source, [in] SHORT compression_level, [out, retval] VARIANT_BOOL* result);
};
[
dispinterface,
uuid("FB7DE28F-2E10-4dc8-813E-943701B9FB81"),
helpstring("_IAVSOfficeUtilsEvents Interface")
]
__interface _IAVSOfficeUtilsEvents
{
[id(1), helpstring("method OnProgress")] HRESULT OnProgress([in] LONG nID, [in] LONG nPercent, [in, out, ref] SHORT* Cancel);
};
[
coclass,
default(IOfficeUtils, _IAVSOfficeUtilsEvents),
threading(apartment),
event_source(com),
vi_progid("AVSOfficeUtils.OfficeUtils"),
progid("AVSOfficeUtils.OfficeUtils.1"),
version(1.0),
uuid("27AC89C1-0995-46FA-90A5-01EE850A09AC"),
helpstring("OfficeUtils Class")
]
class ATL_NO_VTABLE COfficeUtils :
public IOfficeUtils2
{
public:
COfficeUtils()
{
}
__event __interface _IAVSOfficeUtilsEvents;
DECLARE_PROTECT_FINAL_CONSTRUCT()
HRESULT FinalConstruct()
{
return S_OK;
}
void FinalRelease()
{
}
public:
STDMETHOD(ExtractToDirectory)(BSTR zipFile, BSTR unzipDir, BSTR password, SHORT extract_without_path);
STDMETHOD(CompressFileOrDirectory)(BSTR name, BSTR outputFile, SHORT level);
STDMETHOD(Uncompress)(BYTE* destBuf, ULONG* destSize, BYTE* sourceBuf, ULONG sourceSize);
STDMETHOD(Compress)(BYTE* destBuf, ULONG* destSize, BYTE* sourceBuf, ULONG sourceSize, SHORT level);
STDMETHOD(IsArchive)(BSTR filename);
STDMETHOD(IsFileExistInArchive)(BSTR zipFile, BSTR filePath);
STDMETHOD(LoadFileFromArchive)(BSTR zipFile, BSTR filePath, BYTE** fileInBytes);
STDMETHOD(ExtractFilesToMemory)(BSTR zipFile, IExtractedFileEvent* data_receiver, VARIANT_BOOL* result);
STDMETHOD(CompressFilesFromMemory)(BSTR zipFile, IRequestFileEvent* data_source, SHORT compression_level, VARIANT_BOOL* result);
protected:
static void OnProgressFunc( LPVOID lpParam, long nID, long nPercent, short* Cancel );
};

View File

@@ -0,0 +1,68 @@
========================================================================
ACTIVE TEMPLATE LIBRARY : AVSOfficeUtils Project Overview
========================================================================
AppWizard has created this AVSOfficeUtils project for you to use as the starting point for
writing your Dynamic Link Library (DLL).
This project is implemented with Visual C++ attributes.
This file contains a summary of what you will find in each of the files that
make up your project.
AVSOfficeUtils.vcproj
This is the main project file for VC++ projects generated using an Application Wizard.
It contains information about the version of Visual C++ that generated the file, and
information about the platforms, configurations, and project features selected with the
Application Wizard.
_AVSOfficeUtils.idl
This file will be generated by the compiler when the project is built. It will contain the IDL
definitions of the type library, the interfaces and co-classes defined in your project.
This file will be processed by the MIDL compiler to generate:
C++ interface definitions and GUID declarations (_AVSOfficeUtils.h)
GUID definitions (_AVSOfficeUtils_i.c)
A type library (_AVSOfficeUtils.tlb)
Marshaling code (_AVSOfficeUtils_p.c and dlldata.c)
AVSOfficeUtils.cpp
This file contains the object map and the implementation of your DLL's exports.
AVSOfficeUtils.rc
This is a listing of all of the Microsoft Windows resources that the
program uses.
AVSOfficeUtils.def
This module-definition file provides the linker with information about the exports
required by your DLL. It contains exports for:
DllGetClassObject
DllCanUnloadNow
GetProxyDllInfo
DllRegisterServer
DllUnregisterServer
/////////////////////////////////////////////////////////////////////////////
Other standard files:
StdAfx.h, StdAfx.cpp
These files are used to build a precompiled header (PCH) file
named AVSOfficeUtils.pch and a precompiled types file named StdAfx.obj.
Resource.h
This is the standard header file that defines resource IDs.
/////////////////////////////////////////////////////////////////////////////
Proxy/stub DLL project and module definition file:
AVSOfficeUtilsps.vcproj
This file is the project file for building a proxy/stub DLL if necessary.
The IDL file in the main project must contain at least one interface and you must
first compile the IDL file before building the proxy/stub DLL. This process generates
dlldata.c, AVSOfficeUtils_i.c and AVSOfficeUtils_p.c which are required
to build the proxy/stub DLL.
AVSOfficeUtilsps.def
This module definition file provides the linker with information about the exports
required by the proxy/stub.
/////////////////////////////////////////////////////////////////////////////

View File

@@ -0,0 +1,18 @@
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by AVSOfficeUtils.rc
//
#define IDS_PROJNAME 100
#define IDR_ASCOFFICEUTILS 101
// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 201
#define _APS_NEXT_COMMAND_VALUE 32768
#define _APS_NEXT_CONTROL_VALUE 201
#define _APS_NEXT_SYMED_VALUE 102
#endif
#endif

View File

@@ -0,0 +1,222 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "stdafx.h"
#include "UniversalString.h"
void UniversalString::ClearUniversalString()
{
this->length = 0;
this->charSize = 0;
this->wcharSize = 0;
if ( this->str != NULL )
{
delete [](this->str);
this->str = NULL;
}
if ( this->wstr != NULL )
{
delete [](this->wstr);
this->wstr = NULL;
}
}
UniversalString::UniversalString():
str(NULL), wstr(NULL), length(0), charSize(1), wcharSize(1)
{
this->str = new char[this->charSize];
this->str[0] = '\0';
this->wstr = new wchar_t[this->wcharSize];
this->wstr[0] = '\0';
}
UniversalString::UniversalString( const UniversalString& _ustr ):
str(NULL), wstr(NULL), length(_ustr.length), charSize(_ustr.charSize), wcharSize(_ustr.wcharSize)
{
if ( ( this->charSize > 0 ) && ( this->wcharSize > 0 ) )
{
this->str = new char[this->charSize];
this->wstr = new wchar_t[this->wcharSize];
if ( ( this->str != NULL ) && ( this->wstr != NULL ) )
{
memset( this->str, 0, ( sizeof(char) * this->charSize ) );
memset( this->wstr, 0, ( sizeof(wchar_t) * this->wcharSize ) );
memcpy( this->str, _ustr.str, ( sizeof(char) * this->charSize ) );
memcpy( this->wstr, _ustr.wstr, ( sizeof(wchar_t) * this->wcharSize ) );
}
}
}
UniversalString::UniversalString( const char* _str, unsigned int CodePage ):
str(NULL), wstr(NULL), length(0), charSize(0), wcharSize(0)
{
if ( _str != NULL )
{
this->length = (unsigned int)strlen( _str );
this->charSize = ( this->length + 1 );
this->str = new char[this->charSize];
if ( this->str != NULL )
{
memset( this->str, 0, ( sizeof(char) * this->charSize ) );
memcpy( this->str, _str, ( sizeof(char) * this->charSize ) );
this->wcharSize = MultiByteToWideChar( CodePage, 0, this->str, -1, NULL, 0 );
this->wstr = new wchar_t[this->wcharSize];
if ( this->wstr != NULL )
{
memset( this->wstr, 0, ( sizeof(wchar_t) * this->wcharSize ) );
MultiByteToWideChar( CodePage, 0, this->str, -1, this->wstr, this->wcharSize );
}
}
}
}
UniversalString::UniversalString( const wchar_t* _wstr, unsigned int CodePage ):
str(NULL), wstr(NULL), length(0), charSize(0), wcharSize(0)
{
if ( _wstr != NULL )
{
this->length = (unsigned int)wcslen( _wstr );
this->wcharSize = ( this->length + 1 );
this->wstr = new wchar_t[this->wcharSize];
if ( this->wstr != NULL )
{
memset( this->wstr, 0, ( sizeof(wchar_t) * this->wcharSize ) );
memcpy( this->wstr, _wstr, ( sizeof(wchar_t) * this->wcharSize ) );
this->charSize = WideCharToMultiByte( CodePage, 0, this->wstr, -1, NULL, 0, NULL, NULL );
this->str = new char[this->charSize];
if ( this->str != NULL )
{
memset( this->str, 0, ( sizeof(char) * this->charSize ) );
WideCharToMultiByte( CodePage, 0, this->wstr, -1, this->str, this->charSize, NULL, NULL );
}
}
}
}
UniversalString::~UniversalString()
{
this->ClearUniversalString();
}
UniversalString::operator char* ()
{
return this->str;
}
UniversalString::operator wchar_t* ()
{
return this->wstr;
}
bool UniversalString::operator == ( const UniversalString& _ustr )
{
if ( ( this->length == _ustr.length ) && ( this->charSize == _ustr.charSize ) && ( this->wcharSize == _ustr.wcharSize ) &&
( strncmp( this->str, _ustr.str, _ustr.charSize ) == 0 ) && ( wcsncmp( this->wstr, _ustr.wstr, _ustr.wcharSize ) == 0 ) )
{
return true;
}
else
{
return false;
}
}
bool UniversalString::operator != ( const UniversalString& _ustr )
{
return !(this->operator == ( _ustr ));
}
UniversalString& UniversalString::operator = ( const UniversalString& _ustr )
{
if ( this != &_ustr )
{
this->ClearUniversalString();
this->length = _ustr.length;
this->charSize = _ustr.charSize;
this->wcharSize = _ustr.wcharSize;
if ( ( this->charSize > 0 ) && ( this->wcharSize > 0 ) )
{
this->str = new char[this->charSize];
this->wstr = new wchar_t[this->wcharSize];
if ( ( this->str != NULL ) && ( this->wstr != NULL ) )
{
memset( this->str, 0, ( sizeof(char) * this->charSize ) );
memset( this->wstr, 0, ( sizeof(wchar_t) * this->wcharSize ) );
memcpy( this->str, _ustr.str, ( sizeof(char) * this->charSize ) );
memcpy( this->wstr, _ustr.wstr, ( sizeof(wchar_t) * this->wcharSize ) );
}
}
}
return *this;
}
unsigned int UniversalString::GetLength() const
{
return this->length;
}

View File

@@ -0,0 +1,60 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
#include <string.h>
#include <stdlib.h>
class UniversalString
{
private:
char *str;
wchar_t *wstr;
unsigned int length;
unsigned int charSize;
unsigned int wcharSize;
void ClearUniversalString();
public:
UniversalString();
UniversalString( const UniversalString& _ustr );
UniversalString( const char* _str, unsigned int CodePage = CP_ACP);
UniversalString( const wchar_t* _wstr, unsigned int CodePage = CP_ACP);
~UniversalString();
operator char* ();
operator wchar_t* ();
bool operator == ( const UniversalString& _ustr );
bool operator != ( const UniversalString& _ustr );
UniversalString& operator = ( const UniversalString& _ustr );
unsigned int GetLength() const;
};

View File

@@ -0,0 +1,996 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#include "stdafx.h"
#include "ZipUtils.h"
#include <memory>
#include "atlsafe.h"
#define WRITEBUFFERSIZE 8192
#define READBUFFERSIZE 8192
namespace ZLibZipUtils
{
AVSOfficeCriticalSection criticalSection;
static void change_file_date( const char *filename, uLong dosdate, tm_unz tmu_date );
static int mymkdir( const char* dirname );
static int makedir( const char *newdir );
static int do_extract_currentfile( unzFile uf, const int* popt_extract_without_path, int* popt_overwrite, const char* password );
static int do_extract( unzFile uf, int opt_extract_without_path, int opt_overwrite, const char* password, const ProgressCallback* progress );
static bool is_file_in_archive(unzFile uf, const wchar_t *filename);
static bool current_file_is_find(unzFile uf, const wchar_t *filename);
static bool get_file_in_archive(unzFile uf, const wchar_t *filePathInZip, BYTE** fileInBytes );
static bool get_file(unzFile uf, BYTE** fileInBytes );
static unsigned int get_files_count( const WCHAR* dirname );
static void change_file_date( const char *filename, uLong dosdate, tm_unz tmu_date )
{
HANDLE hFile;
FILETIME ftm,ftLocal,ftCreate,ftLastAcc,ftLastWrite;
hFile = CreateFileA(filename,GENERIC_READ | GENERIC_WRITE,
0,NULL,OPEN_EXISTING,0,NULL);
GetFileTime(hFile,&ftCreate,&ftLastAcc,&ftLastWrite);
DosDateTimeToFileTime((WORD)(dosdate>>16),(WORD)dosdate,&ftLocal);
LocalFileTimeToFileTime(&ftLocal,&ftm);
SetFileTime(hFile,&ftm,&ftLastAcc,&ftm);
CloseHandle(hFile);
}
static int mymkdir( const char* dirname )
{
int ret=0;
ret = _mkdir(dirname);
return ret;
}
static int makedir( const char *newdir )
{
char *buffer ;
char *p;
int len = (int)strlen(newdir);
if (len <= 0)
return 0;
buffer = (char*)malloc(len+1);
strcpy_s(buffer, (len+1), newdir);
if (buffer[len-1] == '/') {
buffer[len-1] = '\0';
}
if (mymkdir(buffer) == 0)
{
free(buffer);
return 1;
}
p = buffer+1;
while (1)
{
char hold;
while(*p && *p != '\\' && *p != '/')
p++;
hold = *p;
*p = 0;
if ((mymkdir(buffer) == -1) && (errno == ENOENT))
{
free(buffer);
return 0;
}
if (hold == 0)
break;
*p++ = hold;
}
free(buffer);
return 1;
}
static int do_extract_currentfile( unzFile uf, const int* popt_extract_without_path, int* popt_overwrite, const char* password )
{
char filename_inzip[256];
char* filename_withoutpath;
char* p;
int err=UNZ_OK;
FILE *fout=NULL;
void* buf;
uInt size_buf;
unz_file_info file_info;
uLong ratio=0;
err = unzGetCurrentFileInfo(uf,&file_info,filename_inzip,sizeof(filename_inzip),NULL,0,NULL,0);
#ifdef CODEPAGE_ISSUE_FIX
{
UniversalString us( filename_inzip, CP_OEMCP );
us = UniversalString( (wchar_t*)us );
memset( filename_inzip, 0, ( 256 * sizeof(char) ) );
strcpy_s( filename_inzip, 256, (char*)us );
}
#endif
if (err!=UNZ_OK)
{
return err;
}
size_buf = WRITEBUFFERSIZE;
buf = (void*)malloc(size_buf);
if (buf==NULL)
{
return UNZ_INTERNALERROR;
}
p = filename_withoutpath = filename_inzip;
while ((*p) != '\0')
{
if (((*p)=='/') || ((*p)=='\\'))
filename_withoutpath = p+1;
p++;
}
if ((*filename_withoutpath)=='\0')
{
if ((*popt_extract_without_path)==0)
{
mymkdir(filename_inzip);
}
}
else
{
const char* write_filename;
int skip=0;
if ((*popt_extract_without_path)==0)
write_filename = filename_inzip;
else
write_filename = filename_withoutpath;
err = unzOpenCurrentFilePassword(uf,password);
if (((*popt_overwrite)==0) && (err==UNZ_OK))
{
char rep=0;
FILE* ftestexist = NULL;
fopen_s(&ftestexist, write_filename, "rb");
if (ftestexist!=NULL)
{
fclose(ftestexist);
}
if (rep == 'N')
skip = 1;
if (rep == 'A')
*popt_overwrite=1;
}
if ((skip==0) && (err==UNZ_OK))
{
fopen_s(&fout, write_filename, "wb");
if ((fout==NULL) && ((*popt_extract_without_path)==0) &&
(filename_withoutpath!=(char*)filename_inzip))
{
char c=*(filename_withoutpath-1);
*(filename_withoutpath-1)='\0';
makedir(write_filename);
*(filename_withoutpath-1)=c;
fopen_s(&fout, write_filename, "wb");
}
}
if (fout!=NULL)
{
do
{
err = unzReadCurrentFile(uf, buf, size_buf);
if (err<0)
{
break;
}
if (err>0)
if (fwrite(buf,err,1,fout)!=1)
{
err=UNZ_ERRNO;
break;
}
}
while (err>0);
if (fout)
fclose(fout);
if (err==0)
change_file_date(write_filename,file_info.dosDate,
file_info.tmu_date);
}
if (err==UNZ_OK)
{
err = unzCloseCurrentFile (uf);
}
else
unzCloseCurrentFile(uf);
}
free(buf);
return err;
}
static int do_extract( unzFile uf, int opt_extract_without_path, int opt_overwrite, const char* password, const ProgressCallback* progress )
{
uLong i;
unz_global_info gi;
int err;
FILE* fout=NULL;
err = unzGetGlobalInfo (uf,&gi);
for (i=0;i<gi.number_entry;i++)
{
if (do_extract_currentfile(uf,&opt_extract_without_path,
&opt_overwrite,
password) != UNZ_OK)
break;
if ( progress != NULL )
{
short cancel = 0;
long progressValue = ( 1000000 / gi.number_entry * i );
progress->OnProgress( progress->caller, UTILS_ONPROGRESSEVENT_ID, progressValue, &cancel );
if ( cancel != 0 )
{
return err;
}
}
if ((i+1)<gi.number_entry)
{
err = unzGoToNextFile(uf);
if (err!=UNZ_OK)
{
break;
}
}
}
if ( progress != NULL )
{
short cancel = 0;
long progressValue = 1000000;
progress->OnProgress( progress->caller, UTILS_ONPROGRESSEVENT_ID, progressValue, &cancel );
}
return 0;
}
static bool is_file_in_archive(unzFile uf, const wchar_t *filename)
{
uLong i;
unz_global_info gi;
int err;
err = unzGetGlobalInfo (uf,&gi);
for (i = 0; i < gi.number_entry; i++)
{
if (current_file_is_find(uf, filename) == true)
return true;
if ((i + 1) < gi.number_entry)
{
err = unzGoToNextFile(uf);
if (err != UNZ_OK)
break;
}
}
return false;
}
static bool current_file_is_find(unzFile uf, const wchar_t *filename)
{
char filename_inzip[256];
int err = UNZ_OK;
unz_file_info file_info;
err = unzGetCurrentFileInfo(uf, &file_info, filename_inzip, sizeof(filename_inzip), NULL, 0, NULL, 0);
UniversalString us( filename_inzip, CP_OEMCP );
us = UniversalString( (wchar_t*)us );
UniversalString findFile( filename, CP_OEMCP );
if (us == findFile)
return true;
return false;
}
static const _bstr_t get_filename_from_unzfile(unzFile unzip_file_handle)
{
static char filename_OEM[MAX_PATH];
static wchar_t filename_ANSI[MAX_PATH];
if (UNZ_OK == unzGetCurrentFileInfo(unzip_file_handle, NULL, filename_OEM, sizeof(filename_OEM), NULL, 0, NULL, 0))
{
OemToChar(filename_OEM, filename_ANSI);
return _bstr_t(filename_ANSI);
}
return _bstr_t(L"");
}
static bool get_file_in_archive(unzFile uf, const wchar_t *filePathInZip, BYTE** fileInBytes)
{
unz_global_info gi;
int err;
err = unzGetGlobalInfo (uf,&gi);
for (uLong i = 0; i < gi.number_entry; i++)
{
if (current_file_is_find(uf, filePathInZip) == true)
{
get_file(uf, fileInBytes);
return true;
}
if ((i + 1) < gi.number_entry)
{
err = unzGoToNextFile(uf);
if (err != UNZ_OK)
break;
}
}
return false;
}
static bool get_file(unzFile uf, BYTE** fileInBytes)
{
int err = UNZ_OK;
uInt size_buf = WRITEBUFFERSIZE;
bool flag = false;
void* buf;
buf = (void*)malloc(size_buf);
err = unzOpenCurrentFilePassword(uf, NULL);
err = unzReadCurrentFile(uf, buf, size_buf);
if (err > 0)
{
*fileInBytes = (BYTE*)::HeapAlloc(GetProcessHeap(), NULL, err);
memcpy(*fileInBytes, static_cast<BYTE*>(buf), err);
flag = true;
}
if (err == UNZ_OK)
err = unzCloseCurrentFile (uf);
else
unzCloseCurrentFile(uf);
free(buf);
return flag;
}
static bool get_file(unzFile unzip_file_handle, SAFEARRAY* arr, uInt array_size)
{
if(UNZ_OK == unzOpenCurrentFile(unzip_file_handle))
{
int data_read_size = unzReadCurrentFile(unzip_file_handle, arr->pvData, array_size);
unzCloseCurrentFile(unzip_file_handle);
return data_read_size == array_size ? true : false;
}
return false;
}
int ZipDir( const WCHAR* dir, const WCHAR* outputFile, const ProgressCallback* progress, int compressionLevel )
{
criticalSection.Enter();
int err = -1;
if ( ( dir != NULL ) && ( outputFile != NULL ) )
{
WIN32_FIND_DATA ffd;
HANDLE hFind = INVALID_HANDLE_VALUE;
BOOL bNextFile = FALSE;
deque<wstring> StringDeque;
deque<wstring> zipDeque;
StringDeque.push_back( wstring( dir ) );
wstring zipDir;
wstring file;
wstring zipFileName;
wstring szText;
wstring szCurText;
zipFile zf = zipOpen( UniversalString( outputFile ), APPEND_STATUS_CREATE );
zip_fileinfo zi;
zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour =
zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0;
zi.dosDate = 0;
zi.internal_fa = 0;
zi.external_fa = 0;
SYSTEMTIME currTime;
GetLocalTime( &currTime );
zi.tmz_date.tm_sec = currTime.wSecond;
zi.tmz_date.tm_min = currTime.wMinute;
zi.tmz_date.tm_hour = currTime.wHour;
zi.tmz_date.tm_mday = currTime.wDay;
zi.tmz_date.tm_mon = currTime.wMonth;
zi.tmz_date.tm_year = currTime.wYear;
unsigned int filesCount = get_files_count( dir );
unsigned int currentFileIndex = 0;
while ( ( !StringDeque.empty() ) || ( bNextFile ) )
{
if ( ( !bNextFile ) && ( !StringDeque.empty() ) )
{
szText = StringDeque.front() + wstring( _T( "\\" ) );
szCurText = szText + wstring( _T( "*" ) );
if ( hFind != INVALID_HANDLE_VALUE )
{
FindClose( hFind );
zipDir = zipDeque.front() + wstring( _T( "/" ) );
zipDeque.pop_front();
}
StringDeque.pop_front();
hFind = FindFirstFile( szCurText.c_str(), &ffd );
}
if ( ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )
{
if ( ( wcscmp( ffd.cFileName, _T( "." ) ) != 0 ) && ( wcscmp( ffd.cFileName, _T( ".." ) ) != 0 ) )
{
StringDeque.push_back( szText + wstring( ffd.cFileName ) );
zipDeque.push_back( zipDir + wstring( ffd.cFileName ) );
}
}
else
{
file = szText + wstring( ffd.cFileName );
zipFileName = zipDir + wstring( ffd.cFileName );
string xstr;
ifstream xfile( file.c_str(), ios::binary );
xfile.seekg( 0, ios_base::end );
xstr.resize( xfile.tellg() );
xfile.seekg( 0, ios_base::beg );
xfile.read( const_cast<char*>( xstr.data() ), (streamsize)xstr.size() );
err = zipOpenNewFileInZip( zf, UniversalString( zipFileName.c_str(), CP_OEMCP ), &zi, NULL, 0, NULL, 0, NULL, Z_DEFLATED, compressionLevel );
err = zipWriteInFileInZip( zf, xstr.data(), (unsigned int)xstr.size() );
err = zipCloseFileInZip( zf );
xfile.close();
if ( progress != NULL )
{
short cancel = 0;
long progressValue = ( 1000000 / filesCount * currentFileIndex );
progress->OnProgress( progress->caller, UTILS_ONPROGRESSEVENT_ID, progressValue, &cancel );
if ( cancel != 0 )
{
FindClose( hFind );
err = zipClose( zf, NULL );
return err;
}
}
currentFileIndex++;
}
bNextFile = FindNextFile( hFind, &ffd );
}
FindClose( hFind );
err = zipClose( zf, NULL );
if ( progress != NULL )
{
short cancel = 0;
long progressValue = 1000000;
progress->OnProgress( progress->caller, UTILS_ONPROGRESSEVENT_ID, progressValue, &cancel );
}
}
criticalSection.Leave();
return err;
}
int ZipFile( const WCHAR* inputFile, const WCHAR* outputFile, int compressionLevel )
{
criticalSection.Enter();
int err = -1;
if ( ( inputFile != NULL ) && ( outputFile != NULL ) )
{
string istr;
ifstream ifile( inputFile, std::ios::binary );
ifile.seekg( 0, ios_base::end );
istr.resize( ifile.tellg() );
ifile.seekg( 0, ios_base::beg );
ifile.read( const_cast<char*>( istr.data() ), (streamsize)istr.size() );
ifile.close();
zipFile zf = zipOpen( UniversalString( outputFile ), APPEND_STATUS_CREATE );
zip_fileinfo zi;
zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour =
zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0;
zi.dosDate = 0;
zi.internal_fa = 0;
zi.external_fa = 0;
SYSTEMTIME currTime;
GetLocalTime( &currTime );
zi.tmz_date.tm_sec = currTime.wSecond;
zi.tmz_date.tm_min = currTime.wMinute;
zi.tmz_date.tm_hour = currTime.wHour;
zi.tmz_date.tm_mday = currTime.wDay;
zi.tmz_date.tm_mon = currTime.wMonth;
zi.tmz_date.tm_year = currTime.wYear;
wstring inputFileName( inputFile );
wstring::size_type pos = 0;
static const wstring::size_type npos = -1;
pos = inputFileName.find_last_of( _T( '\\' ) );
wstring zipFileName;
if ( pos != npos )
{
zipFileName = wstring( ( inputFileName.begin() + pos + 1 ), inputFileName.end() );
}
else
{
zipFileName = wstring( inputFileName.begin(), inputFileName.end() );
}
err = zipOpenNewFileInZip( zf, UniversalString( zipFileName.c_str(), CP_OEMCP ), &zi, NULL, 0, NULL, 0, NULL, Z_DEFLATED, compressionLevel );
err = zipWriteInFileInZip( zf, istr.data(), (unsigned int)istr.size() );
err = zipCloseFileInZip( zf );
err = zipClose( zf, NULL );
}
criticalSection.Leave();
return false;
}
bool ClearDirectory( const WCHAR* dir, bool delDir )
{
criticalSection.Enter();
bool result = false;
if ( dir != NULL )
{
unsigned int size = (unsigned int)wcslen( dir );
WCHAR* _dir = new WCHAR[size + 2];
wcsncpy_s( _dir, size + 2, dir, size );
_dir[size++] = L'\0';
_dir[size] = L'\0';
SHFILEOPSTRUCT lpFileOp;
lpFileOp.hwnd = NULL;
lpFileOp.wFunc = FO_DELETE;
lpFileOp.pFrom = _dir;
lpFileOp.pTo = NULL;
lpFileOp.fFlags = FOF_NOERRORUI | FOF_NOCONFIRMATION | FOF_SILENT;
lpFileOp.fAnyOperationsAborted = FALSE;
lpFileOp.hNameMappings = NULL;
lpFileOp.lpszProgressTitle = NULL;
int err = SHFileOperation( &lpFileOp );
if ( !delDir )
{
CreateDirectory( dir, NULL );
}
if ( _dir != NULL )
{
delete []_dir;
_dir = NULL;
}
result = true;
}
else
{
result = false;
}
criticalSection.Leave();
return result;
}
int UnzipToDir( const WCHAR* zipFile, const WCHAR* unzipDir, const ProgressCallback* progress, const WCHAR* password, bool opt_extract_without_path, bool clearOutputDirectory )
{
criticalSection.Enter();
unzFile uf = NULL;
int err = -1;
if ( ( zipFile != NULL ) && ( unzipDir != NULL ) )
{
uf = unzOpen (zipFile);
}
if ( uf != NULL )
{
if ( clearOutputDirectory )
{
ClearDirectory( unzipDir );
}
char* buffer = NULL;
buffer = _getcwd( NULL, 0 );
err = _wchdir (unzipDir);
if ( err == 0 )
{
err = do_extract( uf, opt_extract_without_path, 1, UniversalString( password ), progress );
}
if ( err == UNZ_OK )
{
err = unzClose( uf );
}
if ( buffer != NULL )
{
err = _chdir( UniversalString( buffer ) );
free( buffer );
buffer = NULL;
}
}
criticalSection.Leave();
return err;
}
int UncompressBytes( BYTE* destBuf, ULONG* destSize, const BYTE* sourceBuf, ULONG sourceSize )
{
criticalSection.Enter();
int err = -1;
err = uncompress( destBuf, destSize, sourceBuf, sourceSize );
criticalSection.Leave();
return err;
}
int CompressBytes( BYTE* destBuf, ULONG* destSize, const BYTE* sourceBuf, ULONG sourceSize, SHORT level )
{
criticalSection.Enter();
int err = -1;
err = compress2( destBuf, destSize, sourceBuf, sourceSize, level );
criticalSection.Leave();
return err;
}
bool IsArchive(const WCHAR* filename)
{
criticalSection.Enter();
unzFile uf = NULL;
bool isZIP = false;
if (( filename != NULL ))
uf = unzOpen( UniversalString( filename ) );
if ( uf != NULL )
{
isZIP = true;
unzClose( uf );
}
criticalSection.Leave();
return isZIP;
}
bool IsFileExistInArchive(const WCHAR* zipFile, const WCHAR* filePathInZip)
{
criticalSection.Enter();
unzFile uf = NULL;
bool isIn = false;
if ( ( zipFile != NULL ) && ( filePathInZip != NULL ) )
uf = unzOpen( UniversalString( zipFile ) );
if ( uf != NULL )
{
isIn = is_file_in_archive( uf, filePathInZip );
unzClose( uf );
}
criticalSection.Leave();
return isIn;
}
bool LoadFileFromArchive(const WCHAR* zipFile, const WCHAR* filePathInZip, BYTE** fileInBytes)
{
criticalSection.Enter();
unzFile uf = NULL;
bool isIn = false;
if ( ( zipFile != NULL ) && ( filePathInZip != NULL ) )
uf = unzOpen( UniversalString( zipFile ) );
if ( uf != NULL )
{
isIn = get_file_in_archive( uf, filePathInZip, fileInBytes);
unzClose( uf );
}
criticalSection.Leave();
return isIn;
}
bool ExtractFiles(const _bstr_t zip_file_path, ExtractedFileCallback& callback)
{
CSLocker locker(criticalSection);
unzFile unzip_file_handle = unzOpen(static_cast<wchar_t*>(zip_file_path));
if ( unzip_file_handle != NULL )
{
do
{
unz_file_info file_info;
unzGetCurrentFileInfo(unzip_file_handle, &file_info, NULL, 0, NULL, 0, NULL, 0);
CComSafeArray<BYTE> arr(file_info.uncompressed_size);
if(file_info.uncompressed_size == 0 || get_file(unzip_file_handle, arr.m_psa, file_info.uncompressed_size))
{
callback.Invoke(get_filename_from_unzfile(unzip_file_handle), arr.GetSafeArrayPtr());
}
} while (UNZ_OK == unzGoToNextFile(unzip_file_handle));
unzClose( unzip_file_handle );
return true;
}
return false;
}
bool CompressFiles(_bstr_t zip_file_path, RequestFileCallback& callback, int compression_level)
{
CSLocker locker(criticalSection);
zipFile zip_file_handle = zipOpen(zip_file_path, APPEND_STATUS_CREATE);
if(NULL != zip_file_handle)
{
zip_fileinfo zi = {0};
SYSTEMTIME currTime;
GetLocalTime( &currTime );
zi.tmz_date.tm_sec = currTime.wSecond;
zi.tmz_date.tm_min = currTime.wMinute;
zi.tmz_date.tm_hour = currTime.wHour;
zi.tmz_date.tm_mday = currTime.wDay;
zi.tmz_date.tm_mon = currTime.wMonth;
zi.tmz_date.tm_year = currTime.wYear;
SAFEARRAY* arr;
BSTR in_zip_filename;
while(callback.Invoke(&in_zip_filename, &arr))
{
_bstr_t in_zip_filename_wrapper(in_zip_filename, false);
CComSafeArray<BYTE> arr_wrapper;
arr_wrapper.Attach(arr);
static char in_zip_filename_OEM[MAX_PATH];
CharToOemW(static_cast<wchar_t*>(in_zip_filename_wrapper), in_zip_filename_OEM);
if (ZIP_OK != zipOpenNewFileInZip( zip_file_handle, in_zip_filename_OEM, &zi, NULL, 0, NULL, 0, NULL, Z_DEFLATED, compression_level ) ||
ZIP_OK != zipWriteInFileInZip(zip_file_handle, arr_wrapper.m_psa->pvData, arr_wrapper.GetCount()) ||
ZIP_OK != zipCloseFileInZip(zip_file_handle))
{
zipClose(zip_file_handle, NULL);
return false;
}
}
zipClose(zip_file_handle, NULL);
return true;
}
return false;
}
static unsigned int get_files_count( const WCHAR* dirname )
{
unsigned int filescount = 0;
if ( dirname != NULL )
{
WIN32_FIND_DATA ffd;
HANDLE hFind = INVALID_HANDLE_VALUE;
BOOL bNextFile = FALSE;
deque<wstring> StringDeque;
StringDeque.push_back( dirname );
wstring file;
wstring szText;
wstring szCurText;
while ( ( !StringDeque.empty() ) || ( bNextFile ) )
{
if ( ( !bNextFile ) && ( !StringDeque.empty() ) )
{
szText = StringDeque.front() + wstring( _T( "\\" ) );
szCurText = szText + wstring( _T( "*" ) );
if ( hFind != INVALID_HANDLE_VALUE )
{
FindClose( hFind );
}
StringDeque.pop_front();
hFind = FindFirstFile( szCurText.c_str(), &ffd );
}
if ( ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )
{
if ( ( wcscmp( ffd.cFileName, _T( "." ) ) != 0 ) && ( wcscmp( ffd.cFileName, _T( ".." ) ) != 0 ) )
{
StringDeque.push_back( szText + wstring( ffd.cFileName ) );
}
}
else
{
filescount++;
}
bNextFile = FindNextFile( hFind, &ffd );
}
FindClose( hFind );
}
return filescount;
}
}

View File

@@ -0,0 +1,69 @@
/*
* (c) Copyright Ascensio System SIA 2010-2014
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia,
* EU, LV-1021.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#pragma once
#include "UniversalString.h"
#include <string>
#include <deque>
#include <fstream>
#include <vector>
#include <list>
#include <direct.h>
#include "CallbackHelpers.h"
using namespace std;
#define ZLIB_WINAPI
#define CODEPAGE_ISSUE_FIX
#include "unzip.h"
#include "zip.h"
namespace ZLibZipUtils
{
int ZipDir( const WCHAR* dir, const WCHAR* outputFile, const ProgressCallback* progress, int compressionLevel = -1 );
int ZipFile( const WCHAR* inputFile, const WCHAR* outputFile, int compressionLevel = -1 );
bool ClearDirectory( const WCHAR* dir, bool delDir = false );
int UnzipToDir( const WCHAR* zipFile, const WCHAR* unzipDir, const ProgressCallback* progress, const WCHAR* password = NULL, bool opt_extract_without_path = false, bool clearOutputDirectory = false );
int UncompressBytes( BYTE* destBuf, ULONG* destSize, const BYTE* sourceBuf, ULONG sourceSize );
int CompressBytes( BYTE* destBuf, ULONG* destSize, const BYTE* sourceBuf, ULONG sourceSize, SHORT level );
bool IsArchive(const WCHAR* filename);
bool IsFileExistInArchive(const WCHAR* zipFile, const WCHAR* filePathInZip);
bool LoadFileFromArchive(const WCHAR* zipFile, const WCHAR* filePathInZip, BYTE** fileInBytes);
bool ExtractFiles(const _bstr_t zip_file_path, ExtractedFileCallback& callback);
bool CompressFiles(_bstr_t zip_file_path, RequestFileCallback& callback, int compression_level);
}

View File

@@ -0,0 +1,5 @@
// stdafx.cpp : source file that includes just the standard includes
// AVSOfficeUtils.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information
#include "stdafx.h"

View File

@@ -0,0 +1,77 @@
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently,
// but are changed infrequently
#pragma once
#ifndef STRICT
#define STRICT
#endif
// Modify the following defines if you have to target a platform prior to the ones specified below.
// Refer to MSDN for the latest info on corresponding values for different platforms.
#ifndef WINVER // Allow use of features specific to Windows 95 and Windows NT 4 or later.
#define WINVER 0x0400 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later.
#endif
#ifndef _WIN32_WINNT // Allow use of features specific to Windows NT 4 or later.
#define _WIN32_WINNT 0x0400 // Change this to the appropriate value to target Windows 2000 or later.
#endif
#ifndef _WIN32_WINDOWS // Allow use of features specific to Windows 98 or later.
#define _WIN32_WINDOWS 0x0410 // Change this to the appropriate value to target Windows Me or later.
#endif
#ifndef _WIN32_IE // Allow use of features specific to IE 4.0 or later.
#define _WIN32_IE 0x0400 // Change this to the appropriate value to target IE 5.0 or later.
#endif
#define _ATL_APARTMENT_THREADED
#define _ATL_NO_AUTOMATIC_NAMESPACE
#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be explicit
// turns off ATL's hiding of some common and often safely ignored warning messages
#define _ATL_ALL_WARNINGS
#include <atlbase.h>
#include <atlcom.h>
#include <atlwin.h>
#include <atltypes.h>
#include <atlctl.h>
#include <atlhost.h>
#include "CSLocker.h"
using namespace ATL;
#include "../../Common/Config.h"
#ifdef BUILD_CONFIG_FULL_VERSION
#import "../../Redist/ASCMediaCore3.dll" named_guids raw_interfaces_only rename_namespace("MediaCore"), exclude("tagRECT")
#else
#import "../../Redist/OfficeCore.dll" named_guids raw_interfaces_only rename_namespace("OfficeCore")
#endif
#ifdef _DEBUG
#pragma comment(lib, "..\\GOCR\\Debug\\GOCR.lib")
#pragma comment(lib, "..\\PNM\\Debug\\PNM.lib")
#else
#ifdef BUILD_CONFIG_FULL_VERSION
#pragma comment(lib, "..\\GOCR\\Release\\GOCR.lib")
#pragma comment(lib, "..\\PNM\\Release\\PNM.lib")
#else
#pragma comment(lib, "..\\GOCR\\ReleaseOpenSource\\GOCR.lib")
#pragma comment(lib, "..\\PNM\\ReleaseOpenSource\\PNM.lib")
#endif
#endif
static const long UTILS_ONPROGRESSEVENT_ID = 0;
typedef void (*OnProgressCallback)( LPVOID lpParam, long nID, long nPercent, short* Cancel );
struct ProgressCallback
{
OnProgressCallback OnProgress;
LPVOID caller;
};

View File

@@ -0,0 +1,7 @@
#pragma once
//1
//0
//0
//18
#define INTVER 1,0,0,18
#define STRVER "1,0,0,18\0"

View File

@@ -0,0 +1,50 @@

Microsoft Visual Studio Solution File, Format Version 9.00
# Visual C++ Express 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ASCOfficeUtils", "ASCOfficeUtils\ASCOfficeUtils2005.vcproj", "{6215E3BF-2D42-40FB-B951-B8C448A596D2}"
ProjectSection(ProjectDependencies) = postProject
{DD328E05-26BE-4C81-A13E-489D15321212} = {DD328E05-26BE-4C81-A13E-489D15321212}
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60} = {56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GOCR", "GOCR\GOCR2005.vcproj", "{DD328E05-26BE-4C81-A13E-489D15321212}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PNM", "PNM\PNM2005.vcproj", "{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
ReleaseASC|Win32 = ReleaseASC|Win32
ReleaseOpenSource|Win32 = ReleaseOpenSource|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Debug|Win32.ActiveCfg = Debug|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Debug|Win32.Build.0 = Debug|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Release|Win32.ActiveCfg = Release|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.Release|Win32.Build.0 = Release|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.ReleaseASC|Win32.ActiveCfg = ReleaseASC|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.ReleaseASC|Win32.Build.0 = ReleaseASC|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.ReleaseOpenSource|Win32.ActiveCfg = ReleaseOpenSource|Win32
{6215E3BF-2D42-40FB-B951-B8C448A596D2}.ReleaseOpenSource|Win32.Build.0 = ReleaseOpenSource|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Debug|Win32.ActiveCfg = Debug|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Debug|Win32.Build.0 = Debug|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Release|Win32.ActiveCfg = Release|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.Release|Win32.Build.0 = Release|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.ReleaseASC|Win32.ActiveCfg = ReleaseASC|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.ReleaseASC|Win32.Build.0 = ReleaseASC|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.ReleaseOpenSource|Win32.ActiveCfg = ReleaseOpenSource|Win32
{DD328E05-26BE-4C81-A13E-489D15321212}.ReleaseOpenSource|Win32.Build.0 = ReleaseOpenSource|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Debug|Win32.ActiveCfg = Debug|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Debug|Win32.Build.0 = Debug|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Release|Win32.ActiveCfg = Release|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.Release|Win32.Build.0 = Release|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.ReleaseASC|Win32.ActiveCfg = ReleaseASC|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.ReleaseASC|Win32.Build.0 = ReleaseASC|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.ReleaseOpenSource|Win32.ActiveCfg = ReleaseOpenSource|Win32
{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}.ReleaseOpenSource|Win32.Build.0 = ReleaseOpenSource|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,333 @@
<?xml version="1.0" encoding="windows-1251"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9,00"
Name="GOCR"
ProjectGUID="{DD328E05-26BE-4C81-A13E-489D15321212}"
Keyword="AtlProj"
TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="4"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC70.vsprops"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="headers; include"
PreprocessorDefinitions="WIN32;_DEBUG"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
EnableFunctionLevelLinking="false"
RuntimeTypeInfo="false"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Debug/gocr.pch"
WarningLevel="3"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)\$(ProjectName).lib"
SuppressStartupBanner="true"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers; include"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Release/gocr.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\src\barcode.c"
>
</File>
<File
RelativePath=".\src\box.c"
>
</File>
<File
RelativePath=".\src\database.c"
>
</File>
<File
RelativePath=".\src\detect.c"
>
</File>
<File
RelativePath=".\src\gocr.c"
>
</File>
<File
RelativePath=".\src\jconv.c"
>
</File>
<File
RelativePath=".\src\job.c"
>
</File>
<File
RelativePath=".\src\lines.c"
>
</File>
<File
RelativePath=".\src\list.c"
>
</File>
<File
RelativePath=".\src\ocr0.c"
>
</File>
<File
RelativePath=".\src\ocr0n.c"
>
</File>
<File
RelativePath=".\src\ocr1.c"
>
</File>
<File
RelativePath=".\src\otsu.c"
>
</File>
<File
RelativePath=".\src\output.c"
>
</File>
<File
RelativePath=".\src\pcx.c"
>
</File>
<File
RelativePath=".\src\pgm2asc.c"
>
</File>
<File
RelativePath=".\src\pixel.c"
>
</File>
<File
RelativePath=".\src\pnm.c"
>
</File>
<File
RelativePath=".\src\progress.c"
>
</File>
<File
RelativePath=".\src\remove.c"
>
</File>
<File
RelativePath=".\src\tga.c"
>
</File>
<File
RelativePath=".\src\unicode.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\headers\amiga.h"
>
</File>
<File
RelativePath=".\headers\barcode.h"
>
</File>
<File
RelativePath=".\include\config.h"
>
</File>
<File
RelativePath=".\headers\gocr.h"
>
</File>
<File
RelativePath=".\headers\list.h"
>
</File>
<File
RelativePath=".\headers\ocr0.h"
>
</File>
<File
RelativePath=".\headers\ocr1.h"
>
</File>
<File
RelativePath=".\headers\otsu.h"
>
</File>
<File
RelativePath=".\headers\output.h"
>
</File>
<File
RelativePath=".\headers\pcx.h"
>
</File>
<File
RelativePath=".\headers\pgm2asc.h"
>
</File>
<File
RelativePath=".\headers\pnm.h"
>
</File>
<File
RelativePath=".\headers\progress.h"
>
</File>
<File
RelativePath=".\Resource.h"
>
</File>
<File
RelativePath=".\headers\tga.h"
>
</File>
<File
RelativePath=".\headers\unicode.h"
>
</File>
<File
RelativePath=".\include\version.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,464 @@
<?xml version="1.0" encoding="windows-1251"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8,00"
Name="GOCR"
ProjectGUID="{DD328E05-26BE-4C81-A13E-489D15321212}"
Keyword="AtlProj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="4"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC70.vsprops"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="headers; include"
PreprocessorDefinitions="WIN32;_DEBUG"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
EnableFunctionLevelLinking="false"
RuntimeTypeInfo="false"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Debug/gocr.pch"
WarningLevel="3"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
OutputFile="$(OutDir)\$(ProjectName).lib"
SuppressStartupBanner="true"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers; include"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Release/gocr.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="ReleaseASC|Win32"
OutputDirectory="ReleaseASC"
IntermediateDirectory="ReleaseASC"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers; include"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Release/gocr.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG; ASCBUILD"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="ReleaseOpenSource|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers; include"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES;BUILD_CONFIG_OPENSOURCE_VERSION"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\$(ConfigurationName)/gocr.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\src\barcode.c"
>
</File>
<File
RelativePath=".\src\box.c"
>
</File>
<File
RelativePath=".\src\database.c"
>
</File>
<File
RelativePath=".\src\detect.c"
>
</File>
<File
RelativePath=".\src\gocr.c"
>
</File>
<File
RelativePath=".\src\jconv.c"
>
</File>
<File
RelativePath=".\src\job.c"
>
</File>
<File
RelativePath=".\src\lines.c"
>
</File>
<File
RelativePath=".\src\list.c"
>
</File>
<File
RelativePath=".\src\ocr0.c"
>
</File>
<File
RelativePath=".\src\ocr0n.c"
>
</File>
<File
RelativePath=".\src\ocr1.c"
>
</File>
<File
RelativePath=".\src\otsu.c"
>
</File>
<File
RelativePath=".\src\output.c"
>
</File>
<File
RelativePath=".\src\pcx.c"
>
</File>
<File
RelativePath=".\src\pgm2asc.c"
>
</File>
<File
RelativePath=".\src\pixel.c"
>
</File>
<File
RelativePath=".\src\pnm.c"
>
</File>
<File
RelativePath=".\src\progress.c"
>
</File>
<File
RelativePath=".\src\remove.c"
>
</File>
<File
RelativePath=".\src\tga.c"
>
</File>
<File
RelativePath=".\src\unicode.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\headers\amiga.h"
>
</File>
<File
RelativePath=".\headers\barcode.h"
>
</File>
<File
RelativePath=".\include\config.h"
>
</File>
<File
RelativePath=".\headers\gocr.h"
>
</File>
<File
RelativePath=".\headers\list.h"
>
</File>
<File
RelativePath=".\headers\ocr0.h"
>
</File>
<File
RelativePath=".\headers\ocr1.h"
>
</File>
<File
RelativePath=".\headers\otsu.h"
>
</File>
<File
RelativePath=".\headers\output.h"
>
</File>
<File
RelativePath=".\headers\pcx.h"
>
</File>
<File
RelativePath=".\headers\pgm2asc.h"
>
</File>
<File
RelativePath=".\headers\pnm.h"
>
</File>
<File
RelativePath=".\headers\progress.h"
>
</File>
<File
RelativePath=".\Resource.h"
>
</File>
<File
RelativePath=".\headers\tga.h"
>
</File>
<File
RelativePath=".\headers\unicode.h"
>
</File>
<File
RelativePath=".\include\version.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,31 @@
/*
this file was suggested by Uffe Holst Jun05,2000
to compile gocr using SAS/C under AmigaOS
uhc@post6.tele.dk
SAS/C propably does not support ANSI C++, therefore this changes
I am a little bit confused about using declaration and
macro definition of abs(). I think that should not be necessary.
Tell me, if you have an Amiga and you can give answer
to the following questions.
Joerg Schulenburg, see README for EMAIL-address
*/
#ifdef _AMIGA
#ifdef __SASC
#if 0
#include <string.h> /* may be this can be removed ??? */
#include <stdlib.h> /* may be this can be removed ??? */
extern int abs(int); /* may be this can be removed ??? */
#endif
#ifndef abs
#define abs(i) ((i) < 0 ? -(i) : (i))
#endif
#endif
#endif

View File

@@ -0,0 +1,11 @@
#ifndef _BARCODE_H
#define _BARCODE_H
#include "pnm.h"
/*
detect barcode and add a string to the box (obj-pointer)
*/
int detect_barcode(job_t *job);
#endif

View File

@@ -0,0 +1,295 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2006 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
sometimes I have written comments in german language, sorry for that
- look for ??? for preliminary code
*/
/* General headerfile with gocr-definitions */
#ifndef __GOCR_H__
#define __GOCR_H__
#include "pnm.h"
#include "unicode.h"
#include "list.h"
#include <stddef.h>
#ifdef HAVE_GETTIMEOFDAY
#include <sys/time.h>
#endif
/*
* wchar_t should always exist (ANSI), but WCHAR.H is sometimes missing
* USE_UNICODE should be removed or replaced by HAVE_WCHAR_H in future
*/
#ifdef HAVE_WCHAR_H
#define USE_UNICODE 1
#endif
#ifdef __cplusplus
extern "C"{
#endif
/* ------------------------ feature extraction ----------------- */
#define AT 7 /* mark */
#define M1 1 /* mark */
enum direction {
UP=1, DO, RI, LE
};
typedef enum direction DIRECTION;
#define ST 7 /* stop */
/* ------------------------------------------------------------- */
/* detect maximas in of line overlapps (return in %) and line koord */
#define HOR 1 /* horizontal */
#define VER 2 /* vertikal */
#define RIS 3 /* rising=steigend */
#define FAL 4 /* falling=fallend */
#define MAXlines 1024
/* ToDo: if we have a tree instead of a list, a line could be a node object */
struct tlines {
int num;
int dx, dy; /* direction of text lines (straight/skew) */
int m1[MAXlines], /* start of line = upper bound of 'A' */
m2[MAXlines], /* upper bound of 'e' */
m3[MAXlines], /* lower bound of 'e' = baseline */
m4[MAXlines]; /* stop of line = lower bound of 'q' */
/* ToDo: add sureness per m1,m2 etc? */
int x0[MAXlines],
x1[MAXlines]; /* left and right border */
int wt[MAXlines]; /* weight, how sure thats correct in percent, v0.41 */
int pitch[MAXlines]; /* word pitch (later per box?), v0.41 */
int mono[MAXlines]; /* spacing type, 0=proportional, 1=monospaced */
};
#define NumAlt 10 /* maximal number of alternative chars (table length) */
#define MaxNumFrames 8 /* maximum number of frames per char/box */
#define MaxFrameVectors 128 /* maximum vectors per frame (*8=1KB/box) */
/* ToDo: use only malloc_box(),free_box(),copybox() for creation, destroy etc.
* adding reference_counter to avoid pointer pointing to freed box
*/
struct box { /* this structure should contain all pixel infos of a letter */
int x0,x1,y0,y1,x,y,dots; /* xmin,xmax,ymin,ymax,reference-pixel,i-dots */
int num_boxes, /* 1 "abc", 2 "!i?", 3 "&auml;" (composed objects) 0.41 */
num_subboxes; /* 1 for "abdegopqADOPQR", 2 for "B" (holes) 0.41 */
wchar_t c; /* detected char (same as tac[0], obsolete?) */
wchar_t modifier; /* default=0, see compose() in unicode.c */
int num; /* same number = same char */
int line; /* line number (points to struct tlines lines) */
int m1,m2,m3,m4; /* m2 = upper boundary, m3 = baseline */
/* planed: sizeof hole_1, hole_2, certainty (run1=100%,run2=90%,etc.) */
pix *p; /* pointer to pixmap (v0.2.5) */
/* tac, wac is used together with setac() to manage very similar chars */
int num_ac; /* length of table (alternative chars), default=0 */
wchar_t tac[NumAlt]; /* alternative chars, only used by setac(),getac() */
int wac[NumAlt]; /* weight of alternative chars */
char *tas[NumAlt]; /* alternative UTF8-strings or XML codes if tac[]=0 */
/* replacing old obj */
/* ToDo: (*obj)[NumAlt] + olen[NumAlt] ??? */
/* ToDo: bitmap for possible Picture|Object|Char ??? */
/* char *obj; */ /* pointer to text-object ... -> replaced by tas[] */
/* ... (melted chars, barcode, picture coords, ...) */
/* must be freed before box is freed! */
/* do _not_ copy only the pointer to object */
/* --------------------------------------------------------
* extension since v0.41 js05, Store frame vectors,
* which is a table of vectors sourrounding the char and its
* inner white holes. The advantage is the independence from
* resolution, handling of holes, overlap and rotation.
* --------------------------------------------------------- */
int num_frames; /* number of frames: 1 for cfhklmnrstuvwxyz */
/* 2 for abdegijopq */
int frame_vol[MaxNumFrames]; /* volume inside frame +/- (black/white) */
int frame_per[MaxNumFrames]; /* periphery, summed length of vectors */
int num_frame_vectors[MaxNumFrames]; /* index to next frame */
/* biggest frame should be stored first (outer frame) */
/* biggest has the maximum pair distance */
/* num vector loops */
int frame_vector[MaxFrameVectors][2]; /* may be 16*int=fixpoint_number */
};
typedef struct box Box;
/* true if the coordination pair (a,b) is outside the image p */
#define outbounds(p, a, b) (a < 0 || b < 0 || a >= (p)->x || b >= (p)->y)
/* ToDo: this structure seems to be obsolete, remove it */
typedef struct path {
int start; /* color at the beginning of the path, (0=white, 1=black) */
int *x; /* x coordinates of transitions */
int *y; /* y coordinates of transitions */
int num; /* current number of entries in x or y */
int max; /* maximum number of entries in x or y */
/* (if more values need to be stored, the arrays are enlarged) */
} path_t;
/* job_t contains all information needed for an OCR task */
typedef struct job_s {
struct { /* source data */
char *fname; /* input filename; default value: "-" */
pix p; /* source pixel data, pixelmap 8bit gray */
} src;
struct { /* temporary stuff, e.g. buffers */
#ifdef HAVE_GETTIMEOFDAY
struct timeval init_time; /* starting time of this job */
#endif
pix ppo; /* pixmap for visual debugging output, obsolete */
/* sometimes recognition function is called again and again, if result was 0
n_run tells the pixel function to return alternative results */
int n_run; /* num of run, if run_2 critical pattern get other results */
/* used for 2nd try, pixel uses slower filter function etc. */
List dblist; /* list of boxes loaded from the character database */
} tmp;
struct { /* results */
List boxlist; /* store every object in a box, which contains */
/* the characteristics of the object (see struct box) */
List linelist; /* recognized text lines after recognition */
struct tlines lines; /* used to access to line-data (statistics) */
/* here the positions (frames) of lines are */
/* stored for further use */
int avX,avY; /* average X,Y (avX=sumX/numC) */
int sumX,sumY,numC; /* sum of all X,Y; num chars */
} res;
struct { /* configuration */
int cs; /* critical grey value (pixel<cs => black pixel) */
/* range: 0..255, 0 means autodetection */
int spc; /* spacewidth/dots (0 = autodetect); default value: 0 */
int mode; /* operation modes; default value: 0 */
/* operation mode (see --help) */
int dust_size; /* dust size; default value: 10 */
int only_numbers; /* numbers only; default value: 0 */
int verbose; /* verbose mode; default value: 0 */
/* verbose option (see --help) */
FORMAT out_format; /* output format; default value: ISO8859_1*/
char *lc; /* debuglist of chars (_ = not recognized chars) */
/* default value: "_" */
char *db_path; /* pathname for database; default value: NULL */
char *cfilter; /* char filter; default value: NULL, ex: "A-Za-z" */
/* limit of certainty where chars are accepted as identified */
int certainty; /* in units of 100 (percent); 0..100; default 95 */
char *unrec_marker; /* output this string for every unrecognized char */
} cfg;
} job_t;
/* initialze job structure */
void job_init(job_t *job);
/* free job structure */
void job_free(job_t *job);
/*FIXME jb: remove JOB; */
extern job_t *JOB;
/* calculate the overlapp of the line (0-1) with black points
* by rekursiv bisection
* (evl. Fehlertoleranz mit pixel in Umgebung dx,dy suchen) (umschaltbar) ???
* MidPoint Line Algorithm (Bresenham) Foley: ComputerGraphics better?
* will be replaced by vector functions
*/
/* gerade y=dy/dx*x+b, implizit d=F(x,y)=dy*x-dx*y+b*dx=0
* incrementell y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y)) */
int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret);
/* look for white 0x02 or black 0x01 dots (0x03 = white+black) */
char get_bw(int x0, int x1, int y0, int y1,
pix *p, int cs,int mask);
/* look for black crossing a line x0,y0,x1,y1
* follow line and count crossings ([white]-black-transitions)
*/
int num_cross(int x0, int x1, int y0, int y1,
pix *p, int cs);
/* memory allocation with error checking */
void *xrealloc(void *ptr, size_t size);
/* follow a line x0,y0,x1,y1 recording locations of transitions,
* return count of transitions
*/
int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path);
/* -------------------------------------------------------------
* mark edge-points
* - first move forward until b/w-edge
* - more than 2 pixel?
* - loop around
* - if forward pixel : go up, rotate right
* - if forward no pixel : rotate left
* - stop if found first 2 pixel in same order
* mit an rechter-Wand-entlang-gehen strategie
* --------------------------------------------------------------
* turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
* out: last-position
* Zaehle dabei, Schritte,Sackgassen,xmax,ymax,ro-,ru-,lo-,lu-Ecken
* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*
* is this the right place for declaration?
*/
void turmite(pix *p, int *x, int *y,
int x0, int x1, int y0, int y1, int cs, int rw, int rb);
/* test if points are connected via t-pixel (rekursiv!) */
int joined(pix *p, int x0, int y0, int x1, int y1, int cs);
/* move from x,y to direction r until pixel or l steps
* return number of steps
*/
int loop(pix *p, int x, int y, int l, int cs, int col, DIRECTION r);
#define MAX_HOLES 3
typedef struct list_holes {
int num; /* numbers of holes, initialize with 0 */
struct hole_s {
int size,x,y,x0,y0,x1,y1; /* size, start point, outer rectangle */
} hole[MAX_HOLES];
} holes_t;
/* look for white holes surrounded by black points
* at moment white point with black in all four directions
*/
int num_hole(int x0, int x1, int y0, int y1, pix *p, int cs, holes_t *holes);
/* count for black nonconnected objects --- used for i,auml,ouml,etc. */
int num_obj(int x0, int x1, int y0, int y1, pix *p, int cs);
int distance( pix *p1, struct box *box1, /* box-frame */
pix *p2, struct box *box2, int cs);
/* call the OCR engine ;) */
/* char whatletter(struct box *box1,int cs); */
/* declared in pixel.c */
/* getpixel() was pixel() but it may collide with netpnm pixel declaration */
int getpixel(pix *p, int x, int y);
int marked(pix *p, int x, int y);
void put(pix * p, int x, int y, int ia, int io);
char* PNMToText(char* buf, long size, char *outputformat, long graylevel, long dustsize, long spacewidthdots, long certainty);
#ifdef __cplusplus
} /* extern C */
#endif
#endif /* __GOCR_H__ */

View File

@@ -0,0 +1,90 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
*/
#ifndef GOCR_LIST_H
#define GOCR_LIST_H
#ifdef DEBUG
#define g_debug(a) a
#else
#define g_debug(a)
#endif
/*
* Structures
*/
struct element {
struct element *next, *previous;
void *data;
};
typedef struct element Element;
struct list {
Element start; /* simplifies for(each_element) { ... */
Element stop; /* ... list_del() ... } v0.41 */
Element **current; /* for(each_element) */
int n; /* number of elements */
int level; /* level of nested fors */
};
typedef struct list List;
/*
* Functions
*/
void list_init ( List *l );
int list_app ( List *l, void *data );
int list_ins ( List *l, void *data_after, void *data);
Element*list_element_from_data ( List *l, void *data );
int list_del ( List *l, void *data );
void list_free ( List *l );
int list_and_data_free ( List *l, void (*free_data)(void *data));
int list_higher_level ( List *l );
void list_lower_level ( List *l );
void * list_next ( List *l, void *data );
void * list_prev ( List *l, void *data );
void list_sort ( List *l, int (*compare)(const void *, const void *) );
#define list_empty(l) ((l)->start.next == &(l)->stop ? 1 : 0)
#define list_get_header(l) ((l)->start.next->data)
#define list_get_tail(l) ((l)->stop.previous->data)
#define list_get_current(l) ((l)->current[(l)->level]->data)
#define list_get_cur_prev(l) ((l)->current[(l)->level]->previous == NULL ? \
NULL : (l)->current[(l)->level]->previous->data )
#define list_get_cur_next(l) ((l)->current[(l)->level]->next == NULL ? \
NULL : (l)->current[(l)->level]->next->data )
#define list_total(l) ((l)->n)
#define for_each_data(l) \
if (list_higher_level(l) == 0) { \
for ( ; (l)->current[(l)->level] \
&& (l)->current[(l)->level]!=&(l)->stop; (l)->current[(l)->level] = \
(l)->current[(l)->level]->next ) {
#define end_for_each(l) \
} \
list_lower_level(l); \
}
#endif

View File

@@ -0,0 +1,63 @@
#ifndef _OCR0_H
#define _OCR0_H
#include "pgm2asc.h"
/* ----------------------------------------------------------------
- functions with thousand of lines make the compilation very slow
therefore the ocr0-function is splitted in subfunctions
- shared data used often in ocr0-subroutines are stored
in ocr0_shared structure.
* ------------------------------------------------------------ */
typedef struct ocr0_shared { /* shared variables and properties */
struct box *box1; /* box in whole image */
pix *bp; /* extracted temporarly box, cleaned */
int cs; /* global threshold value (gray level) */
/* ToDo: or MACROS: X0 = box1->x0 */
int x0, x1, y0, y1; /* box coordinates related to box1 */
int dx, dy; /* size of box */
int hchar, gchar; /* relation to m1..m4 */
int aa[4][4]; /* corner points, see xX (x,y,dist^2,vector_idx) v0.41 */
holes_t holes; /* list of holes (max MAX_HOLES) */
} ocr0_shared_t;
/* tests for umlaut */
int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier);
/* detect chars */
wchar_t ocr0(struct box *box1, pix *b, int cs);
/* detect numbers */
wchar_t ocr0n(ocr0_shared_t *sdata);
static int sq(int x) { return x*x; } /* square */
/*
* go from vector j1 to vector j2 and measure maximum deviation of
* the steps from the line connecting j1 and j2
* return the squared maximum distance
* in units of the box size times 1024
*/
int line_deviation( struct box *box1, int j1, int j2 );
/*
* search vectors between j1 and j2 for nearest point a to point r
* example:
*
* r-> $$...$$ $ - mark vectors
* @@$..@@ @ - black pixels
* @@$..@@ . - white pixels
* @@@@.$@
* a-> @@$@$@@
* @$.@@@@
* @@..$@@
* @@..$@@
* j1 --> $$...$$ <-- j2
*
* ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
* j1 and j2 must be in the same frame
* return aa?
*/
int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry);
#endif

View File

@@ -0,0 +1,3 @@
/* #include "pgm2asc.h" */
#include "pnm.h"
/* wchar_t ocr1(struct box *box1, pix *b, int cs); */

View File

@@ -0,0 +1,23 @@
/*
see README for EMAIL-address
*/
/*======================================================================*/
/* OTSU global thresholding routine */
/* takes a 2D unsigned char array pointer, number of rows, and */
/* number of cols in the array. returns the value of the threshold */
/*======================================================================*/
int
otsu (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int vvv);
/*======================================================================*/
/* thresholding the image (set threshold to 128+32=160=0xA0) */
/* now we have a fixed thresholdValue good to recognize on gray image */
/* - so lower bits can used for other things (bad design?) */
/*======================================================================*/
int
thresholding (unsigned char *image, int rows, int cols, int x0, int y0, int dx, int dy, int thresholdValue);

View File

@@ -0,0 +1,37 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address */
#ifndef OUTPUT_H
#define OUTPUT_H
#include <stdlib.h>
#include <stdio.h>
#include "pnm.h"
#include "gocr.h"
#include "list.h"
void out_b(struct box *px, pix *b, int x0, int y0, int dx, int dy, int cs );
void out_x(struct box *px);
void out_x2(struct box *box1,struct box *box2);
int output_list(job_t *job);
int debug_img(char *fname, struct job_s *job, int opt);
#endif

View File

@@ -0,0 +1,9 @@
#include "pnm.h"
void readpcx(char *name,pix *p,int vvv);
/* write 8bit palette no RLE, ToDo: obsolete? */
void writebmp(char *name,pix p,int vvv);
/* ------------------------------------------------------------------------ */

View File

@@ -0,0 +1,110 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2006 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
*/
#ifndef PGM2ASC_H
#define PGM2ASC_H 1
#include "pnm.h"
#include "output.h"
#include "list.h"
#include "unicode.h"
#define pixel_at(pic, xx, yy) (pic).p[(xx)+((yy)*((pic).x))]
#define pixel_atp(pic, xx, yy) (pic)->p[(xx)+((yy)*((pic)->x))]
#ifndef HAVE_WCHAR_H
wchar_t *wcschr (const wchar_t *wcs, wchar_t wc);
wchar_t *wcscpy (wchar_t *dest, const wchar_t *src);
size_t wcslen (const wchar_t *s);
#endif
#ifndef HAVE_WCSDUP
wchar_t * wcsdup (const wchar_t *WS); /* its a gnu extension */
#endif
/* declared in pgm2asc.c */
/* set alternate chars and its weight, called from the engine
if a char is recognized to (weight) percent */
int setas(struct box *b, char *as, int weight); /* string + xml */
int setac(struct box *b, wchar_t ac, int weight); /* wchar */
/* for qsort() call */
int intcompare (const void *vr, const void *vs);
/* declared in box.c */
int box_gt(struct box *box1, struct box *box2);
int reset_box_ac(struct box *box); /* reset and free char table */
struct box *malloc_box( struct box *inibox ); /* alloc memory for a box */
int free_box( struct box *box ); /* free memory of a box */
int copybox( pix *p, int x0, int y0, int dx, int dy, pix *b, int len);
int reduce_vectors ( struct box *box1, int mode );
int merge_boxes( struct box *box1, struct box *box2 );
int cut_box( struct box *box1);
/* declared in database.c */
int load_db(void);
wchar_t ocr_db(struct box *box1);
/* declared in detect.c */
int detect_lines1(pix * p, int x0, int y0, int dx, int dy);
int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r);
int detect_rotation_angle(job_t *job);
int detect_text_lines(pix * pp, int mo);
int adjust_text_lines(pix * pp, int mo);
int detect_pictures(job_t *job);
/* declared in lines.c */
void store_boxtree_lines( int mo );
/* free memory for internal stored textlines.
* Needs to be called _after_ having retrieved the text.
* After freeing, no call to getTextLine is possible any
* more
*/
void free_textlines( void );
/* get result of ocr for a given line number.
* If the line is out of range, the function returns 0,
* otherwise a pointer to a complete line.
*/
const char *getTextLine( int );
/* append a string (s1) to the string buffer (buffer) of length (len)
* if buffer is to small or len==0 realloc buffer, len+=512
*/
char *append_to_line(char *buffer, const char *s1, int *len);
/* declared in remove.c */
int remove_dust( job_t *job );
int remove_pictures( job_t *job);
int remove_melted_serifs( pix *pp );
int remove_rest_of_dust();
int smooth_borders( job_t *job );
/* declared in pixel.c */
int marked(pix * p, int x, int y);
int pixel(pix *p, int x, int y);
void put(pix * p, int x, int y, int ia, int io);
/* start ocr on a image in job.src.p */
int pgm2asc(job_t *job);
#endif

View File

@@ -0,0 +1,37 @@
/* Handle PNM-files Dez98 JS
* 0,0 = left up
* PAM-formats
* PAM any P7
* PNM-formats
* PGM gray ASCII=P2 RAW=P5 dx dy col gray
* PPM RGB ASCII=P3 RAW=P6 dx dy col RGB
* PBM B/W ASCII=P1 RAW=P4 dx dy bitmap
*/
#ifndef GOCR_PNM_H
#define GOCR_PNM_H 1
#include "config.h"
struct pixmap {
unsigned char *p; /* pointer of image buffer (pixmap) */
int x; /* xsize */
int y; /* ysize */
int bpp; /* bytes per pixel: 1=gray 3=rgb */
};
typedef struct pixmap pix;
/* return 1 on multiple images (holding file open), 0 else */
int readpgm(char *name, pix *p, int vvv);
/* return 1 on multiple images (holding file open), 0 else */
int readpgmFromBuffer(char* buffer, long size, pix *p);
/* write pgm-map to pnm-file */
int writepgm(char *nam, pix *p);
int writepbm(char *nam, pix *p);
int writeppm(char *nam, pix *p); /* use lowest 3 bits for farbcoding */
/* ----- count colors ------ create histogram ------- */
void makehisto(pix p, unsigned col[256], int vvv);
#endif

View File

@@ -0,0 +1,42 @@
/*
---------------------- progress output ----------------------
output progress for GUIs to a pipe
format: "counter_name" counter maxcounter time estimated_time \r|\n
*/
#ifndef GOCR_PROGRESS_H
#define GOCR_PROGRESS_H "Oct06"
#include <time.h>
/* initialization of progress output, fname="<fileID>","<filename>","-" */
int ini_progress(char *fname);
/* ToDo: add by open_* and close_* */
/* place to store values for progress calculation, called often, but
* dont call systime so often
*/
typedef struct progress_counter {
const char *name; /* name of counter */
int lastprintcount; /* last counter printed for extrapolation */
int maxcount; /* max counter */
int numskip; /* num of counts to skip before timecall 0..maxcount */
time_t starttime; /* start time of this counter */
time_t lastprinttime; /* last time printed in seconds */
} progress_counter_t;
/* progress output p1=main_progress_0..100% p2=sub_progress_0..100% */
/* ToDo: improved_progress: counter, maxcount(ini), counter_name(ini),
* printinterval=10 # time before printing out progressmeter
* *numskip=1 # if (counter-lastprintcounter<numskip) return; gettime() ...
* *startutime, *lastprintutime, *lastprintcounter # numskip*=2 or /=2
* only 1output/10s, + estimated endtime (test on pixelfields)
* to stderr by default? remove subprogress, ini_progress? rm_progress?
* test on tcl
*/
progress_counter_t *open_progress(int maxcount, const char *name);
/* free counter */
int close_progress(progress_counter_t *counter);
/* output progress for pc */
int progress(int counter, progress_counter_t *pc);
/* --------------------- end of progress output ---------------------- */
#endif

View File

@@ -0,0 +1,6 @@
#include "pnm.h"
void readtga(char *name,pix *p,int mode); // mode: 0=gray 1=RGB
// ------------------------------------------------------------------------

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
/* include/config.h.in. Generated automatically from configure.in by autoheader. */
/* Define to empty if the keyword does not work. */
#undef const
/* Define if the setvbuf function takes the buffering type as its second
argument and the buffer pointer as the third, as on System V
before release 3. */
#undef SETVBUF_REVERSED
/* Define if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define if you have the gettimeofday function. */
#undef HAVE_GETTIMEOFDAY
/* Define if you have the popen function. */
#undef HAVE_POPEN
/* Define if you have the wcschr function. */
#undef HAVE_WCSCHR
/* Define if you have the wcsdup function. */
#define HAVE_WCSDUP
/* Define if you have the <pam.h> header file. */
#undef HAVE_PAM_H
/* Define if you have the <pnm.h> header file. */
#undef HAVE_PNM_H
/* Define if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define if you have the <wchar.h> header file. */
#define HAVE_WCHAR_H

View File

@@ -0,0 +1,2 @@
#define version_string "0.48"
#define release_string "20090802"

View File

@@ -0,0 +1,846 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for email address
ToDo:
- transform special xml bar code symbols (<>&) to xml symbols (&lt;&gt;&amp;)
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* #include <math.h> -- we do not want unnecessary dependencies */
#include "pgm2asc.h"
#include "gocr.h"
#include "pnm.h"
#ifndef DO_DEBUG /* can be defined outside (configure --with-debug) */
#define DO_DEBUG 0 /* 0 is the default */
#endif
#undef g_debug
#if DO_DEBUG
# define g_debug(a) if (JOB->cfg.verbose&1) { a }
#else
# define g_debug(a)
#endif
/*
detect barcode and add a XML-string to the box (obj-pointer, ToDo)
ToDo: barcode-output stderr->stdout
*/
double sqr(double x) { return(x*x); }
/* ----------------------------- code128 ---------------------------- *
* "BSBSBS", B=Bar, S=Space, better using 2*6=12bit-integer? */
#define Num128 107
const char *code128[Num128+1]={ /* can be generated by an algorithm? */
/* 00 */"212222","222122","222221","121223","121322","131222","122213","122312",
/* 08 */"132212","221213","221312","231212","112232","122132","122231","113222",
/* 16 */"123122","123221","223211","221132","221231","213212","223112","312131",
/* 24 */"311222","321122","321221","312212","322112","322211","212123","212321",
/* 32 */"232121","111323","131123","131321","112313","132113","132311","211313",
/* 40 */"231113","231311","112133","112331","132131","113123","113321","133121",
/* 48 */"313121","211331","231131","213113","213311","213131","311123","311321",
/* 56 */"331121","312113","312311","332111","314111","221411","431111","111224",
/* 64 */"111422","121124","121421","141122","141221","112214","112412","122114",
/* 72 */"122411","142112","142211","241211","221114","413111","241112","134111",
/* 80 */"111242","121142","121241","114212","124112","124211","411212","421112",
/* 88 */"421211","212141","214121","412121","111143","111341","131141","114113",
/* 96 */"114311","411113","411311","113141","114131","311141","411131","211412",
/*104 */"211214","211232","2331112","???"};
/*
code128: see code128.tex by Petr Olsak (108 codes)
quiet_zone: size=10 (before and after code128)
num_bars=3*(start+chars[N]+crc+stop)+1
B=bar S=space char=BSBSBS (size=11), stop=BSBSBSB (size=11+2)
Width: Bar,Space=1,[2,3,4] char=11 code=11*(N+3)+2 sumB=even,sumS=odd
startA="211412"=103 startB="211214"=104 startC="211232"=105(2dec_digits)
mode/code 0..95 96 97 98 99 100 101 102 103 104 105 106
1=A x20-x5f,0-x20 F3 F2 uB mC mB F4 F1 ^A ^B ^C $
2=B x20-x7f F3 F2 uA mC F4 mA F1 ^A ^B ^C $
3=C "00"-"95" "96" "97" "98" "99" mB mA F1 ^A ^B ^C $
uA,uB: switch mode for next char, mA,mB: switch mode permanently
crc=(start+1*char1+2*char2+3*char3+...+N*charN) mod 103
$=stop="2331112"=106 (4bars, with=13) => start/end==211
return num of chars or string
size B+W even-variants odd-variants num_codes
11 = 8+3 = (1+3+4,2+2+4,2+3+3)+(1+1+1) => (6+3+3)*(1) = 12
= 6+5 = (1+1+4,1+2+3,2+2+2)+(1+1+3,1+2+2) => (3+6+1)*(3+3) = 60
= 4+7 = (1+1+2)+(1+2+4,1+3+3,2+2+3) => (3)*(6+3+3) = 36
sum = 108
*/
/* example: barcode -E -e 128c -b 1434600120000884 >a.eps */
/* example: barcode -E -e 128b -b 14Test41 >a.eps */
/* example: barcode -E -e 128raw -b 105 17 14 30 >a.eps */
char *decode_code128(int *wb, int num_bars){
int i, w, i1, i2, i3=0, i4, i5=0, crc, mode=1;
double dww, dw, err, min_err; char cc, *buf;
char *result=NULL; /* malloc and store the result */
for(w=i=0;i<2*num_bars-1;i++) w+=wb[i]; /* summ all bars and spaces */
/* test code128 characteristics, ToDo: look for correct start/stop 211 seq. */
if ((num_bars-1)%3!=0 || num_bars<10 || w<11*(num_bars-1)/3+2)
return 0;
g_debug(fprintf(stderr," code128 b%d s%d b%d\n",wb[0],wb[1],wb[2]);)
if (3*wb[0]<4* wb[1]
|| 3*wb[0]<4* wb[2]
|| 4*wb[0]<3*(wb[1]+wb[2])
|| 3*wb[0]>4*(wb[1]+wb[2])) return 0; /* 211 */
dw=3.0*w/((num_bars-1)*11+6);
/* get enough memory for all digits in longest mode C */
buf =(char *) malloc( (num_bars-7)/3*2+1); if (!buf) return result;
result=(char *) malloc(256+(num_bars-7)/3*2+1);
dww=crc=0;
for(i4=i1=0;i1<(num_bars-1)/3;i1++) {
for(min_err=1e8,i3=Num128,i5=0;i5<Num128;i5++){ /* get best fit */
for(err=i2=0;i2<6;i2++) err+=sqr(code128[i5][i2]-'0'-wb[i1*6+i2]/dw);
if (err<min_err) { min_err=err; i3=i5; }
} dww+=min_err;
g_debug(fprintf(stderr,"\n %7s %3d err=%.3f ",code128[i3],i3,min_err);)
if(i3<Num128){ /* valid symbol */
if(i1==0){ if (i3>102 && i3<106) mode=i3-103+1; crc=i3; } /* start */
if(i1>0 && i1<(num_bars-1)/3-2){
crc+=i3*(i1); cc=0; /* first * 1 + second * 2 + third * 3 ... */
i5=((mode>3)?mode>>2:mode&3); mode&=3; /* mode can be modified now */
switch (i5) { /* mode=1..3=modeA..modeC */
case 1: if (i3>=64 && i3<96) cc=i3-64; /* modeA: x20-x5f,0-x20 */
else cc=i3+32;
if (i3==101) mode=1; /* switch to mode A */
if (i3== 99) mode=3; /* switch to mode C */
if (i3== 98) mode|=2<<2; /* shift to mode B */
break;
case 2: cc=i3+32; /* modeB: x20-x7f */
if (i3==100) mode=2; /* switch to mode B */
if (i3== 99) mode=3; /* switch to mode C */
if (i3== 98) mode|=1<<2; /* shift to mode A */
break;
case 3:
if (i3==101) mode=1; /* switch to mode A */
if (i3==100) mode=2; /* switch to mode B */
}
if (i5==3) { buf[i4]='0'+i3/10; i4++;
buf[i4]='0'+i3%10; i4++; } /* modeC: two digits */
else {
if (cc>=0x20 && i3<=0x7f) { buf[i4]=cc; i4++; } /* modeA+B: one digit */
if (cc>=0 && cc< 0x20) { buf[i4]='^'; i4++;
buf[i4]=cc+'@'; i4++; }
}
}
if(i1==(num_bars-1)/3-2){ crc=(crc+103-i3)%103; }
if(i1==(num_bars-1)/3-1){ if(i3!=106) i3=-1; } /* stop code */
mode &= 3; /* remove shift */
}
else fprintf(stderr," %s=%02d? ",buf,i5);
}
buf[i4]=0; /* end of string */
if (result)
sprintf(result,"<barcode type=\"128\" chars=\"%d\" code=\"%s\" "
"crc=\"%d\" error=\"%.3f\" />",
i4,buf,crc,dww/((num_bars-1)));
free(buf);
return result;
}
/* -------------------------------------------------------------------- UPC
EAN 13 (UPC,(1+6+1+6+1)*2bars,size=3+6*7+5+6*7+3=95)
EAN 8 (UPC,(1+4+1+4+1)*2bars,size=3+4*7+5+4*7+3=67)
UPC: (10 codes)
BSB SBSB^n SBSBS BSBS^n BSB
bsb ...... sbsbs ...... bsb
111 ...... 11111 ...... 111
num_bars=2*(2*6+3) middle=SBSBS=11111 right/left=BSB=111="101"
char: left=SBSB right=BSBS (size=7) only_dec_digits
SS+BB = (S+S) + (B+B) => BB:SS = 5:2 or 3:4
size ev+odd even + odd => variants
7 = 2 + 5 = (1+1) + (1+4,2+3) => (1)*(2+2) = 4 codes
= 4 + 3 = (1+3,2+2) + (1+3) => (2+1)*(2) = 6 codes += 10 codes
ToDo: make it more robust
- return error as mean deviation
* -------------------------------------------------------------------- */
/* example: barcode -E -e upc -b 12345678901 >a.eps # ok */
/* example: barcode -E -e ean -b 123456789012 >a.eps # ok */
#define NumUPC 20
const char *codeUPC[NumUPC+1]={ /* 0..9, first n = SBSB, last n = BSBS */
"3211","2221","2122","1411","1132", /* 0,1,2,3,4 normal (+0bit) */
"1231","1114","1312","1213","3112", /* 5,6,7,8,9 */
"1123","1222","2212","1141","2311", /* 0,1,2,3,4 mirrored (+1bit) */
"1321","4111","2131","3121","2113", /* 5,6,7,8,9 */
"????"}; /* not found */
char *decode_UPC(int *wb, int num_bars){ /* ToDo: char *dest, int len */
int i, w, i1, i2, i3, i4, i5, crc, mirrored, ean;
double err, min_err, dw, dww=0.0; char digit;
char *result=NULL, *buf=NULL; /* malloc and store the result */
for(w=i=0;i<2*num_bars-1;i++) w+=wb[i];
dw=2.0*w/((num_bars-6)*7+2*11); /* or min(wb[]) */
crc=0;
if ((num_bars)%2!=0 || num_bars<10 || w<7*(num_bars-6)/2+11
|| ((num_bars-6)/2)%2!=0) return 0; /* should be balanced */
/* check front BSB, middle SBSBS and end BSB */
dww=0;
for (i=0;i<3;i++) { dww=sqr(wb[i ]/dw-1); if (dww>0.4) return 0; }
for (i=0;i<5;i++) { dww=sqr(wb[i+ num_bars-3]/dw-1); if (dww>0.4) return 0; }
for (i=0;i<3;i++) { dww=sqr(wb[i+2*num_bars-4]/dw-1); if (dww>0.4) return 0; }
buf =(char *)malloc( (num_bars-6)/2+1); if (!buf) return result;
result=(char *)malloc(256+(num_bars-6)/2+1);
for(ean=i5=0,i1=3;i1<2*num_bars-4;i1+=4) { /* each digit (2bars+2spaces) */
if (i1==num_bars-3) { i1++; continue; } /* skip middle sync SBSBS */
for (i4=NumUPC,mirrored=0,digit='?',min_err=16e8,i2=0;i2<NumUPC;i2++) {
for (err=0,i3=0;i3<4;i3++) err+=sqr(codeUPC[i2][i3]-'0'-wb[i1+i3]/dw);
if (err<min_err) { min_err=err; i4=i2; digit='0'+i2%10; mirrored=i2/10; }
} dww+=min_err; crc+=(digit-'0')*((i5&1)?1:3); /* even*3+odd, last char is even */
buf[i5++]=digit; if (i5<7) ean=(ean<<1)|mirrored;
/* ToDo: error as deviation wb from ideal */
g_debug(fprintf(stderr,"\nDBG: UPC digit=%c mirrored=%d err=%.3f err_m=%.3f ",
digit,mirrored,min_err/4,dww/(i5*4));)
}
/* EAN has a 13th leading digit build by 3 of 6 mirorred digits */
if (ean & 0x20) ean^=0x3f; /* UPC-E mirrored 1xxxxx => 0yyyyy */
switch (ean) {
case 11: ean=1; break;
case 13: ean=2; break;
case 14: ean=3; break;
case 19: ean=4; break;
case 25: ean=5; break;
case 28: ean=6; break;
case 21: ean=7; break;
case 22: ean=8; break;
case 26: ean=9; break;
default: ean=0; /* no or invalid EAN digit or UPC-extension */
} crc+=ean*1;
/* ToDo: fix possible buffer OVL, complement crc */
buf[i5]=0;
if (result)
sprintf(result,"<barcode type=\"UPC\" chars=\"%d\" code=\"%d%s\" "
"crc=\"%d\" error=\"%.3f\" />",
i5+1,ean,buf,(10-crc%10)%10,dww/((num_bars-6)*2));
free(buf);
return result;
}
/* EAN/UPC add-on is either 2 or 5 digits. It always starts with a
* guard bar BSB, followed by ([digit + SB] * (N-1)) + digit. Digit is
* SBSB. Two digit add-on's have 7 bars, and 5 digit add ons have 16.
*/
char *decode_UPC_addon(int *wb, int num_bars){ /* ToDo: char *dest, int len */
int i, w, i1, i2, i3, i4, i5, digits=num_bars/3;
double err, min_err, dw, dww=0.0; char digit;
char *result=NULL, *buf=NULL; /* malloc and store the result */
if (num_bars!=7 && num_bars!=16)
return 0;
for(w=i=0;i<2*num_bars-1;i++) w+=wb[i];
dw=1.0*w/(digits*7+4 + (digits-1)*2);
/* check front BSB, and delineators SB */
dww=0;
for (i=0;i<2;i++) { dww=sqr(wb[i]/dw-1); if (dww>0.4) return 0; }
dww=sqr(wb[i]*0.5/dw-1); if (dww>0.4) return 0;
for (i=1;i<digits; i++) {
for (i1=0; i1<2; i1++) {
dww = sqr(wb[i*6 + 1 + i1]/dw-1);
if (dww > 0.4) return 0;
}
}
buf =(char *)malloc( digits+1); if (!buf) return result;
result=(char *)malloc(256+digits+1);
for(i5=0,i1=3;i1<2*num_bars-1;i1+=6) { /* each digit (2bars+2spaces) */
for (i4=NumUPC,digit='?',min_err=16e8,i2=0;i2<NumUPC;i2++) {
for (err=0,i3=0;i3<4;i3++) err+=sqr(codeUPC[i2][i3]-'0'-wb[i1+i3]/dw);
if (err<min_err) { min_err=err; i4=i2; digit='0'+i2%10; }
}
dww+=min_err;
buf[i5++]=digit;
/* ToDo: error as deviation wb from ideal */
g_debug(fprintf(stderr,"\nDBG: UPC digit=%c err=%.3f err_m=%.3f ",
digit, min_err/4, dww/(i5*4));)
}
buf[i5]=0;
if (result)
sprintf(result, "<barcode type=\"UPC_addon\" chars=\"%d\" code=\"%s\" "
"error=\"%.3f\" />",
i5, buf, dww/((num_bars-6)*2));
free(buf);
return result;
}
/* --------------------------------------------------------- *
* code 3 of 9, 3 thick of 9 bars
* BSBSBSBSB<S> size=7+3*aw aw=2(3), sumS/sumB=2/1?
* two widths: size=1 or size=2or3, digit_width=13(16)
* 5 bars and 4(+1) spaces per digit, last space is not used
* with 2 (or 0) wide bars, 1 (or 3) wide spaces per digit
* => 3 of 9 => max=9*8*7=504
* evenBB=(0of5)+(2of5) oddSS=(1of4)+(3of4) max=44
* ToDo: better code -...-.-.. as 046 or 083 (even,even,odd)
*/
#define Num39 (40+4) /* (3of9)=(2of5)(1of4)+(0of5)(3of4), (2of5)(.-..)=0..9 */
const char *code39= /* rearranged to BBBBBSSSS<S> (bars,spaces) */
"0..--..-.."
"1-...-.-..""2.-..-.-..""3--....-..""4..-.-.-..""5-.-...-.."
"6.--...-..""7...--.-..""8-..-..-..""9.-.-..-.."
"A-...-..-.""B.-..-..-.""C--.....-.""D..-.-..-.""E-.-....-."
"F.--....-.""G...--..-.""H-..-...-.""I.-.-...-.""J..--...-."
"K-...-...-""L.-..-...-""M--......-""N..-.-...-""O-.-.....-"
"P.--.....-""Q...--...-""R-..-....-""S.-.-....-""T..--....-"
"U-...--...""V.-..--...""W--...-...""X..-.--...""Y-.-..-..."
"Z.--..-...""-...---..."".-..-.-..."" .-.-.-...""*..--.-..."
/* (0of5)(3of4)=(.....)(3of4) store only 3of4? */
"$.....---.""/.....--.-""+.....-.--""%......---"
"?xxxxxxxxx";
/* example: barcode -E -e 39 -b 123abc | gs -sDEVICE=pnggray -r100 */
/* return index[] according to sorted values[], big first */
void sort(int *value, int *idx, int len){
int i,j;
for (j=0;j<len;j++) idx[j]=j; /* initialize */
for(i=1;i;) { /* bubble sort, len...len^2 steps */
for(i=j=0;j<len-1;j++) if(value[idx[j]]<value[idx[j+1]]) {
i=idx[j]; idx[j]=idx[j+1]; idx[j+1]=i; i=1;
}
}
}
char *decode_39(int *wb, int num_bars){ /* ToDo: char *dest, int len */
int i, w, i1, i3, i5, crc, idx[10];
double dw,dww,err; char *buf;
char *result=NULL; /* malloc and store the result */
/* check for multiple of 5 bars and minimum start+1char+stop=15 bars */
if ((num_bars)%5!=0 || num_bars<15) return 0;
for(w=i=0; i<2*num_bars-1;i++ ) w+=wb[i]; /* summ width to w */
dw=w*1.0/(16*(num_bars/5)); /* threshold = 1.5..2 */
/* whats best and most rigorosely for dw=threshold_width?
* - (1.5..2)*mean_width of every 5th space
* - (1.5..2)*summ(5bars+5spaces)/(13..16)
* - 3/4*summ(three thickest)/3
*/
dww=crc=0; /* error and checksum (not supported yet) */
#if 0 /* should we exclude any non-standard code39? */
/* check for correct start and end symbol * or NwNnWnWnN Narrow+Wide */
i=2*num_bars-2;
if (wb[ 0]>dw*2 || wb[ 1]<=dw*2 || wb[ 2]> dw*2) return 0;
if (wb[i-0]>dw*2 || wb[i-1]> dw*2 || wb[i-2]<=dw*2) return 0;
#endif
g_debug(fprintf(stderr," code39 base=%.3f chars=%2d\n ",dw,(num_bars)/5);)
buf =(char *)malloc( 1+(num_bars)/5); if (!buf) return result;
result=(char *)malloc(256+(num_bars)/5);
for(i5=i1=0;i1<2*num_bars-3;i1+=10) {
/* ToDo: looking for three widest bars/gaps, 0 or 2 bars, 1 or 3 spaces */
sort(wb+i1,idx,9);
for(err=0,i3=3;i3<9;i3++) // estimate error ??
err+=sqr(wb[i1+idx[i3]]/dw-1.0); /* narrow=1, wide=2..3 */
dww+=err;
for(i3=0;i3<Num39;i3++)
if (code39[10*i3+1+(idx[0]%2)*5+idx[0]/2]=='-'
&& code39[10*i3+1+(idx[1]%2)*5+idx[1]/2]=='-'
&& code39[10*i3+1+(idx[2]%2)*5+idx[2]/2]=='-') break;
if (i5>0 && i5<num_bars/5-2) crc+=i3;
buf[i5++]=code39[10*i3];
/* ToDo: check if wee have even number of black bars within 3 biggest */
g_debug(for(i3=0;i3<9;i3++)fprintf(stderr,"%02d ",wb[i1+i3]);
fprintf(stderr," posWide=%d,%d,%d %c err=%.3f\n ",
idx[0],idx[1],idx[2],buf[i5-1],err/6);)
{ int num_ws=0; // Jul09 ~codabar
if (idx[0]&1) num_ws++;
if (idx[1]&1) num_ws++;
if (idx[2]&1) num_ws++;
if ((num_ws&1)==0) { // num wide spaces must be 1 or 3
free (buf); free(result);
g_debug(fprintf(stderr," num wide spaces = %d, abort code39\n", num_ws);)
return 0;
}
}
}
buf[i5]=0;
if (result)
sprintf(result,"<barcode type=\"39\" chars=\"%d\" code=\"%s\" "
"crc=\"%c\" error=\"%.3f\" />",
i5,buf,code39[(crc%44)*10],dww/((num_bars/5)*6));
free(buf);
return result;
}
/* code interleaved 2 of 5 numbers-only (10 bars+spaces = 2 decimal digits)
B B B B B <= digit1 bars=1,3,5,...
S S S S S <= digit2 spaces=2,4,6,...
1122447700 <= weight digit=sum(bit*weight)%11 (except 7+4=11 means 0)
N = narrow bar, W = wide bar, n = narrow space, w = wide space
"NnNn"+interleaved+"WnN" (odd num W, even num w)
18 digits/inch
see http://www.barcode-1.net/i25code.html (Jun 2009)
minN=0.19mm=0.0075inch
sizeN>0.02inch: sizeW=2..3*sizeN (but constant)
sizeN<0.02inch: sizeW=2.2..3*sizeN
quite zones 10*sizeN or 0.25inch
heigh=max(0.15*symbol_length,0.25inch)
Len = (numChars*(2*(sizeW/sizeN)+3) + 6 + (sizeW/sizeN)) * sizeN
*/
#define Num25 10
const char *code25= /* is the code sorted randomly? */
"1-...-2.-..-3--...4..-.-5-.-..6.--..7...--8-..-.9.-.-.0..--.";
/* example: barcode -E -e i25 -b 123456 >a.eps */
/*
add i25, patch by: Chris Lee, 13 Jul 2009
ToDo: check correctness
*/
char *decode_i25(int *wb, int num_bars){ /* ToDo: char *dest, int len */
int i, w, i1, i3, i5, crc, idx[7], pos;
double dw, dww, err; char *buf;
char *result=NULL; /* malloc and store the result */
int *wb_temp;
int *wb_check;
int code_chars;
if ((num_bars)%5!=4) return 0; /* chars*5bars + 4 start/stop bars */
code_chars = ((num_bars - 4) / 5) * 2;
// dw=w*1.0/(9*(num_bars/3)); /* threshold = 1.5..2 */
wb_temp = (int *)malloc((code_chars * 5)*sizeof(int)); if (!wb_temp) { return NULL; }
wb_check = (int *)malloc( 7 *sizeof(int)); if (!wb_check) { return NULL; }
for (i=0; i<(code_chars * 5)+7; i++) {
if (i<4) { wb_check[i] = wb[i]; } /* start sequence NnNn... */
else if (i > ((code_chars*5)+3)) { /* end sequence ...WnN */
wb_check[(int)(i-(code_chars*5))] = wb[i]; }
else {
pos = i - 4;
/* reinterleave 0,5,1,6,2,7,3,8,4,9,... to 0,1,2,3,4,5,6,7,8,9,... */
// pos = (int)(10*(int)(pos/10) + 1.0*(pos%10)/2.0 + 4.5*(pos%2));
pos = 10*(pos/10) + (pos%10)/2 + 5*(pos&1);
wb_temp[pos] = wb[i];
}
}
wb = wb_temp;
/* check start / finish codes */
sort(wb_check,idx,7);
if (idx[0] != 4 /* widest bar W must be the 4th = 1st of end */
|| wb_check[idx[0]]==wb_check[idx[1]]) { /* exact 1 widest */
free(wb_temp);
free(wb_check);
g_debug(fprintf(stderr," need exact 1 widest at start of end, abort\n");)
return 0;
}
for(w=i=0; i<5*code_chars;i++ ) w+=wb[i]; /* summ width */
dw=w*1.0/(16*(num_bars/5)); /* threshold = 1.5..2 */
/* whats best and most rigorosely for dw=threshold_width?
* - (1.5..2)*mean_width of every 5th space
* - (1.5..2)*summ(5bars+5spaces)/(13..16)
* - 3/4*summ(three thickest)/3
*/
dww=crc=0; /* error and checksum (not supported yet) */
#if 0 /* should we exclude any non-standard code39? */
/* check for correct start and end symbol * or NwNnWnWnN Narrow+Wide */
i=2*num_bars-2;
if (wb[ 0]>dw*2 || wb[ 1]<=dw*2 || wb[ 2]> dw*2) return 0;
if (wb[i-0]>dw*2 || wb[i-1]> dw*2 || wb[i-2]<=dw*2) return 0;
#endif
g_debug(fprintf(stderr," code25 base=%.3f chars=%2d\n ",dw,code_chars);)
buf =malloc( code_chars); if (!buf) return result;
result=malloc(256+code_chars);
for(i5=i1=0;i1<5*code_chars;i1+=5) {
/* ToDo: looking for three widest bars/gaps */
sort(wb+i1,idx,5);
for(err=0,i3=2;i3<5;i3++)
err+=sqr(wb[i1+idx[i3]]/dw-1.0); /* narrow=1, wide=2..3 */
dww+=err;
for(i3=0;i3<Num25;i3++)
if (code25[6*i3+1+idx[0]]=='-'
&& code25[6*i3+1+idx[1]]=='-') break;
//if (i5>0 && i5<num_bars/3-2) crc+=i3;
buf[i5++]=code25[6*i3];
/* ToDo: check if we have even number of black bars within 3 biggest */
g_debug(for(i3=0;i3<5;i3++)fprintf(stderr,"%02d ",wb[i1+i3]);
fprintf(stderr," posWide=%d,%d %c err=%.3f\n ",
idx[0], idx[1], buf[i5-1], err/6);)
{
/* check that we have exact 2 widest bars, 2nd widest > 3th widest */
if (wb[i1+idx[1]]==wb[i1+idx[2]]) {
free(buf); free(result);
g_debug(fprintf(stderr," need exact 2 widest, abort\n");)
return 0;
}
}
}
buf[i5]=0;
if (result) // ToDo: fix CRC (not defined?)
sprintf(result,"<barcode type=\"i25\" chars=\"%d\" code=\"%s\" crc=\"%c\""
" error=\"%.3f\" />", i5,buf,code25[(crc%10)*10],dww/((num_bars/5)*6));
free(wb_temp);
free(wb_check);
free(buf);
return result;
}
/* code codabar, numbers only, 4 bars per char (1*wide bar, 1*wide space)
robust code (dot-matrix printer)
characters have same length (distance between 1st bar to 4th space)
??? codeproject: 6*n+2*w=12 or 5*n+3*w=14
??? suchymips.de:
variant 1: 18 different bar widths (Monarch code)
variant 2: 2 different bar widths (Codabar Matrix or ABC-Codabar)
9..11 digits/inch, N=narrow bar, W=wide bar, n=narrow space, w=wide space
see http://www.barcodeman.com/info/codabar.php (Jul 2009)
minN=0.17mm=0.0065inch, 11 digits/inch = 0.0909 = 14*minN
2of7 + extensions
extensions: 1 wide bar + 2 wide spaces (no narrow space between 2 wide)
4 start/stop sets = a/t, b/m, c/ *, d/e
- mean wide = thickest of 4 bars
- mean narrow = thinnest of 4 bars, thinnest of 3 (4) spaces or every 4th
wiki-sample: a31117013206375b (wide spaces between chars) schraeg!
barcode: t1234567t n=N=1 w=W=3 c=12,14 (not const.)
*/
const char *code27= /* 4bars+3spaces, 12+12 chars */
// 0..11: 3 nbar + 1 wbar + 2 nspace + 1 wspace
"0.....--1....--.2...-..-3--.....4..-..-."
"5-....-.6.-....-7.-..-..8.--....9-..-...-...--..$..--..."
// 12..15: 1 nbar + 3 wbar + 3 nspace + 0 wspace
":-...-.-/-.-...-.-.-.-..+..-.-.-"
// 16..23: 3 nbar + 1 wbar + 1 nspace + 2 wspace
"a..--.-.b.-.-..-c...-.--d...---.t..--.-.n.-.-..-*...-.--e...---."
// EOS
"????????";
/* example: barcode -E -e cbr -b 123456 >a.eps */
char *decode_27(int *wb, int num_bars){ /* ToDo: char *dest, int len */
int i, i1, i2, i3, i4, i5, b_idx[4], s_idx[3], b_w[4], s_w[3],
max_wdiff, err=0;
// double dw, err;
char *buf, char27[8]="......";
char *result=NULL; /* malloc and store the result */
int code_chars;
#if 0 // ToDo: verifications nb_max < wb_min etc.
int nb_min=99999, nb_max=0, nb_sum=0, nb_num=0; // narrow bar
int ns_min=99999, ns_max=0, ns_sum=0, ns_num=0; // narrow space
int wb_min=99999, wb_max=0, wb_sum=0, wb_num=0; // wide bar
int ws_min=99999, ws_max=0, ws_sum=0, ws_num=0; // wide space
#endif
if ((num_bars)%4!=0) return 0; /* chars*4bars */
code_chars = num_bars / 4;
// dw=w*1.0/(9*(num_bars/3)); /* threshold = 1.5..2 */
g_debug(fprintf(stderr," codabar chars= %d\n ", code_chars);)
buf =malloc( code_chars); if (!buf) return result;
result=malloc(256+code_chars);
i5=0; // index output string
for (i=0; i< code_chars; i++) {
b_w[0]=wb[i*8+0]; // 1st bar
b_w[1]=wb[i*8+2]; // 2nd bar
b_w[2]=wb[i*8+4]; // 3th bar
b_w[3]=wb[i*8+6]; // 4th bar
s_w[0]=wb[i*8+1]; // 1st space
s_w[1]=wb[i*8+3]; // 2nd space
s_w[2]=wb[i*8+5]; // 3th space
sort(b_w,b_idx,4); /* idx[0] points to widest bar */
sort(s_w,s_idx,3); /* idx[0] points to widest space */
g_debug(for(i3=0;i3<7;i3++)fprintf(stderr,"%02d ",wb[8*i+i3]);)
if (b_w[b_idx[0]]==b_w[b_idx[3]]) { err=__LINE__; break; } // min. 1 wide + narrow
// search max. diff between sorted widths
i2=b_w[b_idx[0]]-b_w[b_idx[1]]; i1=1; max_wdiff=i2; // diff widest - 2nd wides
i2=b_w[b_idx[1]]-b_w[b_idx[2]]; if (i2>max_wdiff) { i1=2; max_wdiff=i2; }
i2=b_w[b_idx[2]]-b_w[b_idx[3]]; if (i2>max_wdiff) { i1=3; max_wdiff=i2; }
if (i1==2) { err=__LINE__; break; } // 2 wide + 2 narrow bars not allowed
for (i3=0;i3<7;i3++) char27[i3]='.'; // reset char
if (i1==1) { // 1 wide bar (1 or 2 wspaces)
if (s_w[s_idx[0]]-s_w[s_idx[1]]
>s_w[s_idx[1]]-s_w[s_idx[2]]) { // 1 wspace
char27[2*b_idx[0]+0]='-';
char27[2*s_idx[0]+1]='-';
} else { // assume 2 wspaces
if (s_w[s_idx[2]]==s_w[s_idx[1]]) { err=__LINE__; break; }
char27[2*b_idx[0]+0]='-';
char27[2*s_idx[0]+1]='-';
char27[2*s_idx[1]+1]='-';
}
} else { // assume 3 wbars + 0 wspaces
char27[2*s_idx[0]+0]='-';
char27[2*s_idx[1]+0]='-';
char27[2*s_idx[2]+0]='-';
}
for(i4=24,i3=0;i3<24;i3++) {
if (code27[8*i3+1]==char27[0]
&& code27[8*i3+2]==char27[1]
&& code27[8*i3+3]==char27[2]
&& code27[8*i3+4]==char27[3]
&& code27[8*i3+5]==char27[4]
&& code27[8*i3+6]==char27[5]
&& code27[8*i3+7]==char27[6]) {
i4=i3; buf[i5++]=code27[8*i3]; break; }
}
g_debug(fprintf(stderr," %s c27= %c\n ", char27, ((i5)?buf[i5-1]:'?'));)
if (i4==24) { err=__LINE__; break; }
} // each char
if (i>=code_chars) { // else: inconsistent char
g_debug(fprintf(stderr," code27 base=%.3f chars=%2d\n ",0.0,code_chars);)
} else {
g_debug(fprintf(stderr," error %d at char %d, abort\n", err, i);)
free(result); result=0;
}
buf[i5]=0;
if (result) // ToDo: fix CRC (not defined?)
sprintf(result,"<barcode type=\"codabar\" chars=\"%d\" code=\"%s\""
" crc=\"%c\" error=\"%.3f\" />", i5,buf,'?',0.0);
free(buf);
return result; // free at parent!
}
/*
decode barcode
- check main characteristics (num bars, min+max width, etc.)
- detect code type
- the current algorithm measures the width of bars and spaces
called by detect_barcode()
ToDo: - like storing sequence of widths for 1D code
store array of bits for 2D matrix code and decode later
*/
char *decode_barcode(struct box *bb){ /* ToDo: char *dest, int len */
int i, num_bars, yy, w, ww, dx, xx, cs=JOB->cfg.cs, *wb;
char *result=NULL; /* store the result */
yy=(bb->y0+bb->y1)/2;
w=ww=bb->x1-bb->x0+1;
num_bars = num_cross(bb->x0,bb->x1,yy,yy,bb->p,JOB->cfg.cs);
if(JOB->cfg.verbose)
fprintf(stderr,"\n# ... detect bars=%3d w=%4d",num_bars,ww);
/* store width of bars and spaces to buffer wb */
wb=(int *)malloc(2*num_bars*sizeof(int)); if(!wb) return NULL;
xx=bb->x0;
xx-=loop(bb->p,xx,yy, 8,cs,1,LE);
xx+=loop(bb->p,xx,yy,ww,cs,0,RI); /* start with a bar! */
for (i=0;i<2*num_bars;i++) {
dx=loop(bb->p,xx,yy,w,cs,1^(i&1),RI);
xx+=dx;
w-=dx;
wb[i]=dx;
} wb[2*num_bars-1]=0;
/* ToDo: what about probability? if not unique
* - add argument char *result which can be modified or not,
* - or add box2? (would reuse of this code more difficult)
*/
/* test code128 characteristics, ToDo: look for correct start/stop 211 seq. */
if ((num_bars-1)%3==0 && num_bars>=10 && ww>=11*(num_bars-1)/3+2){
if (!result) result=decode_code128(wb,num_bars);
}
/* test UPC/EAN characteristics */
if ((num_bars)%2==0 && num_bars>=8 && ww>=7*(num_bars-6)/2+11
&& ((num_bars-6)/2)%2==0){ /* should be balanced */
if (!result) result=decode_UPC(wb,num_bars);
}
/* test UPC_addon by Michael van Rooyen, often on books */
if (num_bars==7 || num_bars==16)
if (!result) result=decode_UPC_addon(wb,num_bars);
/* test code39 characteristics */
if ((num_bars)%5==0 && num_bars>14){
if (!result) result=decode_39(wb,num_bars);
}
/* test i2of5 chartacteristics */
if ((num_bars)%5==4 && num_bars>3) {
if (!result) result=decode_i25(wb,num_bars);
}
/* test codabar chartacteristics */
if ((num_bars)%4==0 && num_bars>3) {
if (!result) result=decode_27(wb,num_bars);
}
free(wb);
return result;
}
/*
* taking the list of boxes and search for groups of bars (1D-barcodes)
*/
int detect_barcode(job_t *job)
{
int j=0, j2=0, bx0, by0, bx1, by1, bdx, bdy, bbx, rm,
x0, y0, dx, dy, cs, y, yl0, yl1, yr0, yr1;
struct box *box2, *box3;
if(JOB->cfg.verbose)
fprintf(stderr,"# barcode.c detect_barcode ");
x0=y0=0; rm=0; dx=job->src.p.x; dy=job->src.p.y; cs=JOB->cfg.cs;
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
/* detect width (bdx) and height (bdy) of possible bar */
/* ToDo: better check for a line */
bdx=box2->x1-box2->x0+1 /* substract correction for skewed bars */
-loop(box2->p,box2->x1,(box2->y0+box2->y1)/2,box2->x1-box2->x0,cs,0,LE)
-loop(box2->p,box2->x0,(box2->y0+box2->y1)/2,box2->x1-box2->x0,cs,0,RI);
bdy=box2->y1-box2->y0+1;
if (box2->c == PICTURE || box2->c == UNKNOWN)
if (box2->y0 >= y0 && box2->y1 <= y0 + dy /* within frame? */
&& box2->x0 >= x0 && box2->x1 <= x0 + dx
&& box2->y1 - box2->y0 > 19 /* min. length */
&& box2->y1 - box2->y0 > 8 * bdx
) { /* a bar? */
j=1; /* number of bars */
bx0=box2->x0; bx1=box2->x1; /* initial values for barcode frame */
by0=box2->y0; by1=box2->y1;
bbx=bx1-bx0+2; /* width of bar */
/* this is for scans which are not exactly horizontal */
yl0=yr0=by0; /* left and right upper bound */
yl1=yr1=by1; /* left and right lower bound */
/* --- iteratively take into account next nearest bar ---
* this is important, because bar-boxes are not in right order */
for (j2=1;j2;) {
j2=0;
/* expand a frame around the potential barcode (bx0,by0,bx1,by1) */
for_each_data(&(JOB->res.boxlist)) {
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
/* bdy=box3->y1-box3->y0+1; */
if (box2!=box3)
if (box3->c == PICTURE || box3->c == UNKNOWN)
if (box3->y0 >= y0 && box3->y1 <= y0 + dy /* within image */
&& box3->x0 >= x0 && box3->x1 <= x0 + dx
&& box3->y1 - box3->y0 > 19 /* min. length */
&& box3->y1 - box3->y0 > 4 * (bdx) /* height > 4*width2 */
&& box3->x1 - box3->x0 < 4 * (bdy) /* width < height/4 = bar */
&& (( abs(box3->y0-by0)<bdy/16+4 /* within bar-box ? */
&& abs(box3->y1-by1)<bdy/2 ) /* lower ends of UPC could be longer */
||( abs(box3->y0-yl0)<bdy/16+4 /* left side of frame */
&& abs(box3->y1-yl1)<bdy/2
&& box3->x0 <= bx0 )
||( abs(box3->y0-yr0)<bdy/16+4 /* right side of frame */
&& abs(box3->y1-yr1)<bdy/2
&& box3->x0 >= bx1 ) )
&& box3->x0 > bx0 - 12*bbx /* startspace=5...10 */
&& box3->x1 < bx1 + 12*bbx
&& box3->x0 > bx0 - bdy/2 /* dont glue two barcodes together */
&& box3->x1 < bx1 + bdy/2 /* ex: ean13a.jpg */
/* dont check bars which already within the frame twice ? */
&& ( box3->x1 > bx1 || box3->x0 < bx0 )
) { /* a bar? -> extend barcode frame only in x direction */
/* take minimum of y to have valid barcode for all y */
if (box3->x0<bx0) { bx0=box3->x0; yl0=box3->y0; yl1=box3->y1; }
if (box3->x1>bx1) { bx1=box3->x1; yr0=box3->y0; yr1=box3->y1; }
if (4*(box3->y1-box3->y0)>3*(by1-by0)) { /* carefull reduce */
if (box3->y0>by0) by0=box3->y0; /* ToDo: fix for non-horizontal */
if (box3->y1<by1) by1=box3->y1;
}
j++; /* found a near bar and count to num bars */
j2=1; /* continue searching (endless loop?) */
}
} end_for_each(&(JOB->res.boxlist));
}
/* j is the num of bars found above, some inner bars are not counted */
/* ToDo: better iterative add next nearest bars from sorted list near bars? */
if (j>5) {
char *code=0;
box2->c=PICTURE; /* BARCODE */
box2->x0=bx0; box2->y0=by0;
box2->x1=bx1; box2->y1=by1;
/* ToDo: add pointer to decoded text */
y=(box2->y0+box2->y1)/2;
if (JOB->cfg.verbose){
int nbars;
nbars=num_cross(box2->x0,box2->x1,y,y,box2->p,JOB->cfg.cs);
fprintf(stderr,"\n# barcode at %3d %3d size %3d %3d nbars %d (%d)",
bx0,by0,bx1-bx0+1,by1-by0+1,nbars,j);
if (j!=nbars)
fprintf(stderr,"\n# ... trouble: num_found_bars != num_cross");
/* this is because some far bars are detected before near bars */
}
/* transport the info to the gocr-output (development) */
/* ToDo: decode and print/store barcode bars=j */
code=decode_barcode(box2); /* ToDo: char *dest, int len */
if (!code) { /* failed */
code=(char *)malloc(128);
/* ToDo: analyze and output num_bars, width of bars etc. */
if(code) strncpy(code,"<barcode type=\"unknown\" />",128);
}
if (JOB->cfg.verbose)
fprintf(stderr,"\n# ... decoded as: %s", code);
setas(box2,code,99); /* ToDo: set a better weight */
free(code);
/* remove inner boxes, only if sure!? (ToDo: use cfg.certainty) */
for_each_data(&(JOB->res.boxlist)) {
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
/* bdy=box3->y1-box3->y0+1; */
if (box2!=box3)
if (box3->c == PICTURE || box3->c == UNKNOWN)
if ( abs(box3->y0-by0)<bdy/16+4 /* within bar-box ? */
&& abs(box3->y1-by1)<bdy/2 /* lower ends of UPC could be longer */
&& box3->x1 <= bx1
&& box3->x0 >= bx0
) {
rm++; /* count removed boxes */
list_del(&(JOB->res.boxlist),box3);
free_box(box3);
}
} end_for_each(&(JOB->res.boxlist));
if (JOB->cfg.verbose)
fprintf(stderr,"\n# ... removed boxes: %d", rm);
rm=0;
}
}
} end_for_each(&(JOB->res.boxlist));
/* recalculate averages without bars */
JOB->res.numC=JOB->res.sumX=JOB->res.sumY=j2=0;
for_each_data(&(JOB->res.boxlist)) {
j2++;
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box3->c==PICTURE) continue;
JOB->res.numC++; /* count remaining boxes */
JOB->res.sumX+=box3->x1-box3->x0+1;
JOB->res.sumY+=box3->y1-box3->y0+1;
} end_for_each(&(JOB->res.boxlist));
if(JOB->cfg.verbose)
fprintf(stderr,"\n# ... boxes %d nC %d\n",
j2, JOB->res.numC);
/* ToDo: detect DataMatrix = iec16022
* search square of 2 lines and 2 dottet lines (first no rotation)
* output characteristics pixel size, bytes, code type, etc.
*/
return 0;
}

View File

@@ -0,0 +1,372 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL address
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
/* do we need #include <math.h>? conflicts with INFINITY in unicode.h */
#include "gocr.h"
#include "pgm2asc.h"
/* for sorting letters by position on the image
/ ToDo: - use function same line like this or include lines.m1 etc. */
int box_gt(struct box *box1, struct box *box2) {
// box1 after box2 ?
if (box1->line > box2->line)
return 1;
if (box1->line < box2->line)
return 0;
if (box1->x0 > box2->x1) // before
return 1;
if (box1->x1 < box2->x0) // before
return 0;
if (box1->x0 > box2->x0) // before, overlapping!
return 1;
return 0;
}
/* --- copy part of pix p into new pix b ---- len=10000
* Returns: 0 on success, 1 on error.
* naming it as copybox isnt very clever, because it dont have to do with the
* char boxes (struct box)
*/
int copybox (pix * p, int x0, int y0, int dx, int dy, pix * b, int len) {
int x, y;
/* test boundaries */
if (b->p == NULL || dx < 0 || dy < 0 || dx * dy > len) {
fprintf(stderr, " error-copybox x=%5d %5d d=%5d %5d\n", x0, y0, dx, dy);
return 1;
}
b->x = dx;
b->y = dy;
b->bpp = 1;
#ifdef FASTER_INCOMPLETE
for (y = 0; y < dy; y++)
memcpy(&pixel_atp(b, 0, y), &pixel_atp(p, x0, y + y0 ), dx);
// and unmark pixels
#else
for (y = 0; y < dy; y++)
for (x = 0; x < dx; x++)
pixel_atp(b, x, y) = getpixel(p, x + x0, y + y0);
#endif
return 0;
}
/* reset table of alternative chars (and free memory) */
int reset_box_ac(struct box *box){
int i;
for (i=0; i<box->num_ac; i++)
if (box->tas[i]) {
/* fprintf(stderr,"DBG free_s[%d] %p %s\n",i,box->tas[i],box->tas[i]); */
free(box->tas[i]);
box->tas[i]=0; /* prevent double freeing */
}
box->num_ac=0; /* mark as freed */
return 0;
}
/* ini or copy a box: get memory for box and initialize the memory */
struct box *malloc_box (struct box *inibox) {
struct box *buf;
int i;
buf = (struct box *) malloc(sizeof(struct box));
if (!buf)
return NULL;
if (inibox) {
memcpy(buf, inibox, sizeof(struct box));
/* only pointer are copied, we want to copy the contents too */
for (i=0;i<inibox->num_ac;i++) {
if (inibox->tas[i]) {
buf->tas[i]=(char *)malloc(strlen(inibox->tas[i])+1);
memcpy(buf->tas[i], inibox->tas[i], strlen(inibox->tas[i])+1);
}
}
}
else { /* ToDo: init it */
buf->num_ac=0;
buf->num_frames=0;
}
/* fprintf(stderr,"\nDBG ini_box %p",buf); */
return buf;
}
/* free memory of box */
int free_box (struct box *box) {
if (!box) return 0;
/* fprintf(stderr,"DBG free_box %p\n",box); out_x(box); */
reset_box_ac(box); /* free alternative char table */
free(box); /* free the box memory */
return 0;
}
/* simplify the vectorgraph,
* but what is the best way?
* a) melting two neighbouring vectors with nearly same direction?
* (nearest angle to pi)
* b) melting three neigbours with smallest area?
* ToDo:
* mode = 0 - only lossless
* mode = 1 - reduce one vector, smallest possible loss
* mode = 2 - remove jitter (todo, or somewhere else)
* ToDo: include also loop around (last - first element)
* ToDo: reduce by 10..50%
*/
int reduce_vectors ( struct box *box1, int mode ) {
int i1, i2, nx, ny, mx, my, len,
minlen=1024, /* minlength of to neighbouring vectors */
besti1=0, /* frame for best reduction */
besti2=2; /* vector replacing its predecessor */
double sprod, maxsprod=-1;
if (mode!=1) fprintf(stderr,"ERR not supported yet, ToDo\n");
for (i2=1,i1=0; i1<box1->num_frames; i1++) { /* every frame */
for (;i2<box1->num_frame_vectors[i1]-1; i2++) { /* every vector */
/* predecessor n */
nx = box1->frame_vector[i2-0][0] - box1->frame_vector[i2-1][0];
ny = box1->frame_vector[i2-0][1] - box1->frame_vector[i2-1][1];
/* successor m */
mx = box1->frame_vector[i2+1][0] - box1->frame_vector[i2-0][0];
my = box1->frame_vector[i2+1][1] - box1->frame_vector[i2-0][1];
/* angle is w = a*b/(|a|*|b|) = 1 means parallel */
/* normalized: minimize w^2 = (a*b/(|a|*|b|)-1)^2 */
/* -1=90grd, 0=0grd, -2=180grd */
sprod = /* fabs */(abs(nx*mx+ny*my)*(nx*mx+ny*my)
/(1.*(nx*nx+ny*ny)*(mx*mx+my*my))-1);
/* we dont include math.h because INFINITY conflicts to unicode,h */
if (sprod<0) sprod=-sprod;
len = (mx*mx+my*my)*(nx*nx+ny*ny); /* sum lengths^2 */
// ..c ###c ... .. ...
// .b. len=2+2 #b.. len=2+5 #bc len=1+2 bc len=1+1 b#a len=4+5
// a.. spr=0 a... spr=1/10 a.. spr=1/4 a. spr=1 ##c spr=9/5
//
if ( len* sprod* sprod* sprod* sprod
<minlen*maxsprod*maxsprod*maxsprod*maxsprod
|| maxsprod<0) /* Bad! ToDo! */
{ maxsprod=sprod; besti1=i1; besti2=i2; minlen=len; }
}
}
if (box1->num_frames>0)
for (i2=besti2; i2<box1->num_frame_vectors[ box1->num_frames-1 ]-1; i2++) {
box1->frame_vector[i2][0]=box1->frame_vector[i2+1][0];
box1->frame_vector[i2][1]=box1->frame_vector[i2+1][1];
}
for (i1=besti1; i1<box1->num_frames; i1++)
box1->num_frame_vectors[i1]--;
// fprintf(stderr,"\nDBG_reduce_vectors i= %d nv= %d sprod=%f len2=%d\n# ...",
// besti2,box1->num_frame_vectors[ box1->num_frames-1 ],maxsprod,minlen);
// out_x(box1);
return 0;
}
/* add the contents of box2 to box1
* especially add vectors of box2 to box1
*/
int merge_boxes( struct box *box1, struct box *box2 ) {
int i1, i2, i3, i4;
struct box tmpbox, *bsmaller, *bbigger; /* for mixing and sorting */
/* DEBUG, use valgrind to check uninitialized memory */
#if 0
fprintf(stderr,"\nDBG merge_boxes_input:"); out_x(box1); out_x(box2);
#endif
/* pair distance is to expendable, taking borders is easier */
if ((box2->x1 - box2->x0)*(box2->y1 - box2->y0)
>(box1->x1 - box1->x0)*(box1->y1 - box1->y0)) {
bbigger=box2; bsmaller=box1; }
else {
bbigger=box1; bsmaller=box2; }
/* ToDo: does not work if a third box is added */
if (box2->y0>box1->y1 || box2->y1<box1->y0
|| box2->x0>box1->x1 || box2->x1<box1->x0) {
box1->num_boxes += box2->num_boxes; /* num seperate objects 2=ij */
} else {
if (box2->num_boxes>box1->num_boxes) box1->num_boxes=box2->num_boxes;
box1->num_subboxes += box2->num_subboxes+1; /* num holes 1=abdepq 2=B */
}
box1->dots += box2->dots; /* num i-dots */
if ( box2->x0 < box1->x0 ) box1->x0 = box2->x0;
if ( box2->x1 > box1->x1 ) box1->x1 = box2->x1;
if ( box2->y0 < box1->y0 ) box1->y0 = box2->y0;
if ( box2->y1 > box1->y1 ) box1->y1 = box2->y1;
i1 = i2 = 0;
if (bbigger->num_frames)
i1 = bbigger->num_frame_vectors[ bbigger->num_frames - 1 ];
if (bsmaller->num_frames)
i2 = bsmaller->num_frame_vectors[ bsmaller->num_frames - 1 ];
while (i1+i2 > MaxFrameVectors) {
if (i1>i2) { reduce_vectors( bbigger, 1 ); i1--; }
else { reduce_vectors( bsmaller, 1 ); i2--; }
}
/* if i1+i2>MaxFrameVectors simplify the vectorgraph */
/* if sum num_frames>MaxNumFrames through shortest graph away and warn */
/* first copy the bigger box */
memcpy(&tmpbox, bbigger, sizeof(struct box));
/* attach the smaller box */
for (i4=i3=0; i3<bsmaller->num_frames; i3++) {
if (tmpbox.num_frames>=MaxNumFrames) break;
for (; i4<bsmaller->num_frame_vectors[i3]; i4++) {
memcpy(tmpbox.frame_vector[i1],
bsmaller->frame_vector[i4],2*sizeof(int));
i1++;
}
tmpbox.num_frame_vectors[ tmpbox.num_frames ] = i1;
tmpbox.frame_vol[ tmpbox.num_frames ] = bsmaller->frame_vol[ i3 ];
tmpbox.frame_per[ tmpbox.num_frames ] = bsmaller->frame_per[ i3 ];
tmpbox.num_frames++;
if (tmpbox.num_frames>=MaxNumFrames) {
if (JOB->cfg.verbose)
fprintf(stderr,"\nDBG merge_boxes MaxNumFrames reached");
break;
}
}
/* copy tmpbox to destination */
box1->num_frames = tmpbox.num_frames;
memcpy(box1->num_frame_vectors,
tmpbox.num_frame_vectors,sizeof(int)*MaxNumFrames);
memcpy(box1->frame_vol,
tmpbox.frame_vol,sizeof(int)*MaxNumFrames);
memcpy(box1->frame_per,
tmpbox.frame_per,sizeof(int)*MaxNumFrames);
memcpy(box1->frame_vector,
tmpbox.frame_vector,sizeof(int)*2*MaxFrameVectors);
#if 0
if (JOB->cfg.verbose)
fprintf(stderr,"\nDBG merge_boxes_result:"); out_x(box1);
#endif
return 0;
}
/* used for division of glued chars
* after a box is splitted into 2, where vectors are copied to both,
* vectors outside the new box are cutted and thrown away,
* later replaced by
* - 1st remove outside vectors with outside neighbours (complete frames?)
* add vector on outside vector with inside neighbours
* care about connections through box between outside vectors
* - 2nd reduce outside crossings (inclusive splitting frames if necessary)
* depending on direction (rotation) of outside connections
* - 3th shift outside vectors to crossing points
* - split add this points, connect only in-out...out-in,
* - cutting can result in more objects
* ToDo:
* dont connect --1---2--------3----4-- new-y1 (inside above not drawn)
* \ \->>>>-/ / outside
* \----<<<<-----/ old-y1
* |======| subtractable?
*
* only connect --1---2--------3----4-- new-y1
* \>>/ \>>>/ old-y1 outside
* ToDo: what about cutting 2 frames (example: 2fold melted MN)
* better restart framing algo?
*
* ToDo: new vol, per
*/
int cut_box( struct box *box1) {
int i1, i2, i3, i4, x, y, lx, ly, dbg=0;
if (JOB->cfg.verbose) dbg=1; // debug level, enlarge to get more output
if (dbg) fprintf(stderr,"\n cut box x= %3d %3d", box1->x0, box1->y0);
/* check if complete frames are outside the box */
for (i1=0; i1<box1->num_frames; i1++){
if (dbg>2) fprintf(stderr,"\n checking frame %d outside", i1);
i2 = ((i1)?box1->num_frame_vectors[ i1-1 ]:0); // this frame
i3 = box1->num_frame_vectors[ i1 ]; // next frame
for (i4=i2; i4 < i3; i4++) {
x = box1->frame_vector[i4][0];
y = box1->frame_vector[i4][1];
/* break, if one vector is lying inside */
if (x>=box1->x0 && x<=box1->x1 && y>=box1->y0 && y<=box1->y1) break;
}
if (i4==i3) { /* all vectors outside */
if (dbg>1) fprintf(stderr,"\n remove frame %d",i1);
/* replace all frames i1,i1+1,... by i1+1,i1+2,... */
/* replace (x,y) pairs first */
for (i4=i2; i4<box1->num_frame_vectors[ box1->num_frames-1 ]-(i3-i2);
i4++) {
box1->frame_vector[i4][0] = box1->frame_vector[i4+i3-i2][0];
box1->frame_vector[i4][1] = box1->frame_vector[i4+i3-i2][1];
}
/* replace the num_frame_vectors */
for (i4=i1; i4<box1->num_frames-1; i4++)
box1->num_frame_vectors[ i4 ] =
box1->num_frame_vectors[ i4+1 ]-(i3-i2);
box1->num_frames--; i1--;
}
}
/* remove vectors outside the box */
i3=0;
for (i1=0; i1<box1->num_frames; i1++){
if (dbg>2) fprintf(stderr,"\n check cutting vectors on frame %d", i1);
x = box1->frame_vector[0][0]; /* last x */
y = box1->frame_vector[0][1]; /* last y */
/* ToDo: start inside to get a closed object */
if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) i3=1;
for (i2=0; i2<box1->num_frame_vectors[ i1 ]; i2++) {
lx = x; /* last x */
ly = y; /* last y */
x = box1->frame_vector[i2][0];
y = box1->frame_vector[i2][1];
// fprintf(stderr,"DBG LEV3 i2= %3d xy= %3d %3d",i2,x,y);
/* check if outside */
if (x<box1->x0 || x>box1->x1 || y<box1->y0 || y>box1->y1) {
/* replace by nearest point at border, ToDo: better crossingpoint */
if (i3==0) { /* wrong if it starts outside */
if (x < box1->x0) x = box1->frame_vector[i2][0] = box1->x0;
if (x > box1->x1) x = box1->frame_vector[i2][0] = box1->x1;
if (y < box1->y0) y = box1->frame_vector[i2][1] = box1->y0;
if (y > box1->y1) y = box1->frame_vector[i2][1] = box1->y1;
} else {
/* remove vector */
if (dbg>1) fprintf(stderr,"\n remove vector[%d][%d] x= %2d %2d",i1,i2,x-box1->x0,y-box1->y0);
for (i4=i2;i4<box1->num_frame_vectors[ box1->num_frames-1 ]-1;i4++) {
box1->frame_vector[i4][0] = box1->frame_vector[i4+1][0];
box1->frame_vector[i4][1] = box1->frame_vector[i4+1][1];
}
for (i4=i1; i4<box1->num_frames; i4++)
box1->num_frame_vectors[ i4 ]--;
i2--; /* next element is shiftet now, setting back the counter */
}
i3++;
// fprintf(stderr," outside i3= %d\n",i3);
continue;
}
// fprintf(stderr," inside i3= %d",i3);
if (i3) { /* ToDo: better crossing point last vector and border */
if (lx < box1->x0) lx = box1->x0;
if (lx > box1->x1) lx = box1->x1;
if (ly < box1->y0) ly = box1->y0;
if (ly > box1->y1) ly = box1->y1;
x = box1->frame_vector[i2][0] = lx;
y = box1->frame_vector[i2][1] = ly;
i3 = 0;
}
// fprintf(stderr," xy= %3d %3d\n",x,y);
}
}
if (dbg>2) { fprintf(stderr,"\nDBG cut_box_result:"); out_x(box1); }
return 0;
}

View File

@@ -0,0 +1,462 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL address
*/
#include <stdio.h>
#include <stdlib.h>
#include "gocr.h"
#include "pnm.h"
#include "pgm2asc.h"
#include <string.h>
#include <time.h>
#define Blen 256
// load boxes from database into boxlist (for faster access)
// used as alternate engine, comparing chars with database
int load_db(void) {
FILE *f1;
char s1[Blen+1],
s2[Blen+1] = "./db/", /* ToDo: replace by constant! by configure */
*s3;
int i, j, ii, i2, line;
struct box *box1;
pix *pp;
if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
i2=strlen(s2);
if (JOB->cfg.verbose)
fprintf(stderr, "# load database %s %s ... ",s2,JOB->cfg.db_path);
strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
f1 = fopen(s2, "r");
if (!f1) {
fprintf(stderr, " DB %s not found\n",s2);
return 1;
}
line = 0; /* line counter for better error report */
for (ii = 0; !feof(f1); ii++) {
/* bbg: should write a better input routine */
if (!fgets(s1, Blen, f1)) break; line++;
j = strlen(s1);
/* remove carriage return sequences from line */
while (j > 0 && (s1[j - 1] == '\r' || s1[j - 1] == '\n'))
s1[--j] = 0;
if (!j) continue; /* skip empty line */
if (s1[0]=='#') continue; /* skip comments (v0.44) */
/* copy file name */
for (i = 0; i < j && i+i2 < Blen && strchr(" \t,;",s1[i]) == 0; i++)
s2[i2 + i] = s1[i];
s2[i2+i]=0;
/* skip spaces */
for (; i < j && strchr(" \t",s1[i]) != 0; i++);
/* by now: read pix, fill box, goto next ??? */
pp = (pix *)malloc(sizeof(pix));
if( !pp ) fprintf(stderr,"malloc error in load_db pix\n");
// if (JOB->cfg.verbose) fprintf(stderr,"\n# readpgm %s ",s2);
if (readpgm(s2, pp, 0 * JOB->cfg.verbose)!=0) {
fprintf(stderr,"\ndatabase error: readpgm %s\n", s2);
exit(-1);
}
box1 = (struct box *)malloc_box(NULL);
if(!box1) fprintf(stderr,"malloc error in load_db box1\n");
box1->x0 = 0;
box1->x1 = pp->x-1; // white border 1 pixel width
box1->y0 = 0;
box1->y1 = pp->y-1;
box1->x = 1;
box1->y = 1;
box1->dots = 0;
box1->c = 0;
box1->modifier = 0; /* ToDo: obsolete */
box1->tas[0]=NULL;
box1->tac[0]=0;
box1->wac[0]=100; /* really 100% sure? */
box1->num_ac=1;
if (s1[i]=='"'){ /* parse a string */
j=strrchr(s1+i+1,'"')-(s1+i+1); /* we only look for first and last "" */
if (j>=1) {
s3=(char *)malloc(j+1);
if (!s3) fprintf (stderr, "malloc error in load_db s3\n");
if (s3) {
memcpy(s3,s1+i+1,j);
s3[j]=0;
box1->tas[0]=s3;
// fprintf(stderr,"\nstring=%s",s3);
}
} else { fprintf(stderr,"load_db: string parse error L%d\n",line); }
} else {
box1->tac[0] = box1->c = s1[i]; /* try to interpret as ASCII */
/* we can live without hexcode in future if we use UTF8-strings */
s3=s1+i;
j=strtol( s1+i, &s3, 16); /* try to read 4 to 8 digit hex unicode */
/* if its an hexcode, ASCII interpretation is overwritten */
if( j && i+3<=Blen && s3-s1-i>3 ) box1->tac[0] = box1->c = j;
// fprintf(stderr,"\nhexcode=%04x=%04x %d",(int)j,(int)box1->c,s3-s1-i);
}
box1->num = 0;
box1->line = -1;
box1->m1 = 0; /* ToDo: should be given too in the database! */
box1->m2 = 0;
box1->m3 = 0;
box1->m4 = 0;
box1->p = pp;
list_app(&JOB->tmp.dblist, box1); // append to list
#if 0
out_x(box1);
#endif
}
fclose(f1);
if (JOB->cfg.verbose)
fprintf(stderr, " %d chars loaded\n", ii);
return 0;
}
// expand database from box/boxlist name=db_$utime.pbm
// this is added in version v0.3.3
int store_db(struct box *box1) {
FILE *f1;
char s2[Blen+1] = "./db/", s3[Blen+1];
int i2, dx, dy;
unsigned c_out;
pix b; /* temporary mini page */
if( JOB->cfg.db_path ) strncpy(s2,JOB->cfg.db_path,Blen-1);
i2=strlen(s2);
/* add (first) char and time to the file name for better debugging */
/* decide between 7bit ASCII and UTF8-char or string */
c_out = ((box1->num_ac && box1->tas[0]) ?
(unsigned char )box1->tas[0][0] /* char */ :
box1->c /* wchar */);
/* (unsigned int)(( char)0x80) = 0xffffff80 */
/* (unsigned int)((unsigned char)0x80) = 0x00000080 */
/* name generation can cause problems, if called twice within a second */
sprintf(s3,"db_%04x_%08lx.pbm", c_out, (unsigned long)time(NULL));
/* ToDo: the file name may be not unique */
strncpy(s2+i2,"db.lst",Blen-i2);s2[Blen]=0;
f1 = fopen(s2, "a");
if (!f1) {
fprintf(stderr, " could not access %s\n",s2);
return 1;
}
strncpy(s2+i2,s3,strlen(s3)); s2[i2+strlen(s3)]=0;
/* store image and infos about the char */
/* ToDo: store the vector list instead of the pixelarray */
if (JOB->cfg.verbose)
fprintf(stderr, "store_db: add file %s to database (nac=%d c=%04x)"
"\n#",s3, box1->num_ac, c_out);
dx=box1->x1-box1->x0+1;
dy=box1->y1-box1->y0+1;
b.p = (unsigned char *) malloc( dx * dy );
if( !b.p ){
fprintf( stderr, "\nFATAL: malloc failed, skip store_db" );
return 2;
}
if (copybox(box1->p, box1->x0, box1->y0, dx, dy, &b, dx * dy))
return -1;
writepbm(s2,&b); /* What is to do on error? */
free(b.p);
/* store the database line */
/* some infos about box1->m1,..,m4 should added (base line, high etc.) */
if (box1->num_ac && box1->tas[0]) {
fprintf(f1, "%s \"%s\"\n",s3,box1->tas[0]);
/* ToDo: what if tas contains '"'? */
} else {
if( (box1->c >= '0' && box1->c <= '9')
|| (box1->c >= 'A' && box1->c <= 'Z')
|| (box1->c >= 'a' && box1->c <= 'z') )
fprintf(f1, "%s %c\n",s3,(char)box1->c);
else {
if (((box1->c)>>16)>>16)
fprintf(f1, "%s %08x\n",s3,(unsigned int)box1->c);
else
fprintf(f1, "%s %04x\n",s3,(unsigned int)box1->c);
}
}
fclose(f1);
return 0;
}
/* function is only for user prompt on console to identify chars
it prints out a part of pixmap b at point x0,y0 to stderr
using dots .,; if no pixel, and @xoO for pixels
*/
void out_env(struct box *px ){
int x0,y0,x1,y1,dx,dy,x,y,x2,y2,yy0,tx,ty,i,cs;
char c1, c2; pix *b;
cs=JOB->cfg.cs;
yy0=px->y0;
{ /* overwrite rest of arguments */
b=px->p;
x0=px->x0; x1=px->x1; dx=x1-x0+1;
y0=px->y0; y1=px->y1; dy=y1-y0+1;
y0-=2; y1+=2;
if (px->m4 && y0>px->m1) y0=px->m1;
if (px->m4 && y1<px->m4) y1=px->m4;
if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
if (x1-x0+1<52) { x0-=10; x1+=10; } /* fragment? expand frame */
if (x1-x0+1<62) { x0-=5; x1+=5; }
if (y1-y0+1<10) { y0-= 4; y1+= 4; } /* fragment? */
if (x0<0) x0=0; if (x1>=b->x) x1=b->x-1;
if (y0<0) y0=0; if (y1>=b->y) y1=b->y-1;
dx=x1-x0+1;
dy=y1-y0+1; yy0=y0;
fprintf(stderr,"\n# show box + environment");
fprintf(stderr,"\n# show box x= %4d %4d d= %3d %3d r= %d %d",
px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
px->x - px->x0, px->y - px->y0);
if (px->num_ac){ /* output table of chars and its probabilities */
fprintf(stderr,"\n# list box char: ");
for(i=0;i<px->num_ac && i<NumAlt;i++)
/* output the (xml-)string (picture position, barcodes, glyphs, ...) */
if (px->tas[i])
fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
else
fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
}
fprintf(stderr,"\n");
if (px->dots && px->m2 && px->m1<y0) { yy0=px->m1; dy=px->y1-yy0+1; }
}
tx=dx/80+1;
ty=dy/40+1; // step, usually 1, but greater on large maps
fprintf(stderr,"# show pattern x= %4d %4d d= %3d %3d t= %d %d\n",
x0,y0,dx,dy,tx,ty);
if (dx>0)
for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
/* image is the boxframe + environment in the original bitmap */
for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
c1='.';
for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
for(x2=x;x2<x+tx && x2<x0+dx;x2++)
{ if((getpixel(b,x2,y2)<cs)) c1='#'; }
// show pixels outside the box thinner/weaker
if (x+tx-1 < px->x0 || x > px->x1
|| y+ty-1 < px->y0 || y > px->y1) c1=((c1=='#')?'O':',');
fprintf(stderr,"%c", c1 );
}
c1=c2=' ';
/* mark lines with < */
if (px) if (y==px->m1 || y==px->m2 || y==px->m3 || y==px->m4) c1='<';
if (y==px->y0 || y==px->y1) c2='-'; /* boxmarks */
fprintf(stderr,"%c%c\n",c1,c2);
}
}
/*
// second variant, for database (with slightly other behaviour)
// new variant
// look at the environment of the pixel too (contrast etc.)
// detailed analysis only of diff pixels!
//
// 100% * distance, 0 is best fit
// = similarity of 2 chars for recognition of noisy chars
// weigth of pixels with only one same neighbour set to 0
// look at contours too!
ToDo: especially on small boxes distance should only be 0 if
characters are 100% identical!
*/
// #define DEBUG 2
int distance2( pix *p1, struct box *box1,
pix *p2, struct box *box2, int cs){
int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,
x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2,tx,ty;
#if DEBUG == 2
if(JOB->cfg.verbose)
fprintf(stderr," DEBUG: distance2\n");
#endif
x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);dx=dx1;
dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);dy=dy1;
if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) rbad++; // how to weight?
// compare relations to baseline and upper line
if(box1->m4>0 && box2->m4>0){ // used ???
if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
}
tx=dx/16; if(dx<17)tx=1; // raster
ty=dy/32; if(dy<33)ty=1;
// compare pixels
for( y=0;y<dy;y+=ty )
for( x=0;x<dx;x+=tx ) { // try global shift too ???
v1=((getpixel(p1,x1+x*dx1/dx,y1+y*dy1/dy)<cs)?1:0); i1=8; // better gray?
v2=((getpixel(p2,x2+x*dx2/dx,y2+y*dy2/dy)<cs)?1:0); i2=8; // better gray?
if(v1==v2) { rgood+=16; continue; } // all things are right!
// what about different pixel???
// test overlapp of surounding pixels ???
v1=1; rbad+=4;
v1=-1;
for(i1=-1;i1<2;i1++)
for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
if( ((getpixel(p1,x1+x*dx1/dx+i1*(1+dx1/32),y1+y*dy1/dy+i2*(1+dy1/32))<cs)?1:0)
!=((getpixel(p2,x2+x*dx2/dx+i1*(1+dx2/32),y2+y*dy2/dy+i2*(1+dy2/32))<cs)?1:0) ) v1++;
}
if(v1>0)
rbad+=16*v1;
}
if(rgood+rbad) rc= 100*rbad/(rgood+rbad); else rc=99;
/* if width/high is not correct add badness */
rc += ( abs(dx1*dy2-dx2*dy1) * 10 ) / (dy1*dy2);
if (rc>100) rc=100;
if(/* rc<10 && */ JOB->cfg.verbose /* &1024 */){
#if DEBUG == 2
fprintf(stderr," distance2 rc=%d rgood=%d rbad=%d\n",rc,rgood,rbad);
// out_b(NULL,p1,box1->x0,box1->y0,box1->x1-box1->x0+1,
// box1->y1-box1->y0+1,cs);
// out_b(NULL,p2,box2->x0,box2->y0,box2->x1-box2->x0+1,
// box2->y1-box2->y0+1,cs);
out_x(box1);
out_x(box2);
#endif
}
return rc;
}
wchar_t ocr_db(struct box *box1) {
int dd = 1000, dist = 1000;
wchar_t c = UNKNOWN;
unsigned char buf[200]; /* Oct08 JS: add unsigned to avoid UTF problems */
Box *box2, *box3;
if (!list_empty(&JOB->tmp.dblist)){
box3 = (Box *)list_get_header(&JOB->tmp.dblist);
if(JOB->cfg.verbose)
fprintf(stderr,"\n#DEBUG: ocr_db (%d,%d) ",box1->x0, box1->y0);
for_each_data(&JOB->tmp.dblist) {
box2 = (Box *)list_get_current(&JOB->tmp.dblist);
/* do preselect!!! distance() slowly */
dd = distance2( box2->p, box2, box1->p, box1, JOB->cfg.cs);
if (dd <= dist) { /* new best fit */
dist = dd;
box3 = box2; /* box3 is a pointer and not copied box2 */
if (dist<100 && 100-dist >= JOB->cfg.certainty) {
/* some deviation of the pattern is tolerated */
int i, wa;
for (i=0;i<box3->num_ac;i++) {
wa = (100-dist)*box3->wac[i]/100; /* weight *= (100-dist) */
if (box3->tas[i]) setas(box1,box3->tas[i],wa);
else setac(box1,box3->tac[i],wa);
}
if (box3->num_ac) c=box3->tac[0]; /* 0 for strings (!UNKNOWN) */
if (JOB->cfg.verbose)
fprintf(stderr, " dist=%4d c= %c 0x%02x %s wc= %3d", dist,
((box3->c>32 && box3->c<127) ? (char) box3->c : '.'),
(int)box3->c, ((box3->tas[0])?box3->tas[0]:""), box3->wac[0]);
}
if (dd<=0 && ((box3->num_ac && box3->tas[0]) || box3->c >= 128
|| !strchr ("l1|I0O", box3->c)))
break; /* speedup if found */
}
} end_for_each(&JOB->tmp.dblist);
}
if( (JOB->cfg.mode&128) != 0 && c == UNKNOWN ) { /* prompt the user */
/* should the output go to stderr or special pipe??? */
int utf8_ok=0; /* trigger this flag if input is ok */
int i, endchar; /* index */
out_env(box1); /* old: out_x(box1); */
fprintf(stderr,"The above pattern was not recognized.\n"
"Enter UTF8 char or string for above pattern. Leave empty if unsure.\n"
"Press RET at the end (ALT+RET to store into RAM only) : "
); /* ToDo: empty + alt-return (0x1b 0x0a) for help? ^a for skip all */
/* UTF-8 (man 7 utf-8):
* 7bit = 0xxxxxxx (0000-007F)
* 11bit = 110xxxxx 10xxxxxx (0080-07FF)
* 16bit = 1110xxxx 10xxxxxx 10xxxxxx (0800-FFFF)
* 21bit = 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 26bit = 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 31bit = 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
buf[0]=0;
/* shift/ctrl/altgr-enter acts like enter or ^j or ^m,
* alt-enter returns 0x1b 0x0a and returns from fgets()
* ^d (EOF) returns (nil) from fgets()
* x+(2*)ctrl-d returns from fgets() without returning a 0x0a
* if not UTF-input-mode, we are in trouble?
* ^a=0x01, ^b=0x02, ^e=05, ..., ToDo: meaning of no-input or <=space
*/
fgets((char *)buf,200,stdin); /* including \n=0x0a */
dd=strlen((char *)buf);
/* output hexcode if verbose set */
if (JOB->cfg.verbose) {
fprintf(stderr, "\n# fgets [%d]:", dd);
for(i=0; i<dd; i++)
fprintf(stderr, " %02x", (unsigned)((unsigned char)buf[i]));
fprintf(stderr, "\n#");
}
/* we dont accept chars which could destroy database file */
for (i=0; i<dd; i++) if (buf[i]<32) break; /* need unsigned char here */
endchar=buf[i]; /* last char is 0x0a (ret) 0x00 (EOF) or 0x1b (alt+ret) */
if (endchar==0x01) { i=0;JOB->cfg.mode&=~128; } /* skip all */
buf[dd=i]=0; /* replace final 0x0a or other special codes */
if (dd==1 && !(buf[0]&128)) { c=buf[0]; utf8_ok=1; } /* single char */
if (dd>1 && dd<7) { /* try to decode single wide char (utf8) */
int u0, u1; /* define UTF8-start sequences, u0=0bits u1=1bits */
u0= 1<<(7-dd); /* compute start byte from UTF8-length */
u1=255&~((1<<(8-dd))-1);
/* count number of following 10xxxxxx bytes to i */
for (i=1;i<dd;i++) if ((buf[i]&0xc0)!=0x80) break; /* 10xxxxxx */
if (i==dd && (buf[0]&(u0|u1))==u1) { utf8_ok=1;
c=buf[0]&(u0-1); /* 11..0x.. */
for (i=1;i<dd;i++) { c<<=6; c|=buf[i]&0x3F; } /* 10xxxxxx */
}
}
if (dd>0){ /* ToDo: skip space and tab too? */
if (utf8_ok==1) { setac(box1, c, 100); } /* store single wchar */
if (utf8_ok==0) { /* store a string of chars (UTF8-string) */
c='_'; /* what should we do with c? probably a bad idea? */
setas(box1, (char *)buf, 100);
}
/* decide between
* 0) just help gocr to find the results and (dont remember, 0x01)
* 1) help and remember in the same run (store to memory, 0x1b)
* 2) expand the database (dont store ugly chars to the database!)
*/
if (endchar!=0x01){ /* ^a before hit return */
/* is there a reason to dont store to memory? */
list_app(&JOB->tmp.dblist, box1); /* append to list for 1+2 */
}
if (endchar!=0x01 && endchar!=0x1b){
store_db(box1); /* store to disk for 2 */
}
if (JOB->cfg.verbose)
fprintf(stderr, " got char= %c 16bit= 0x%04x string= \"%s\"\n",
((c>32 && c<127)?(char)c:'.'), (int)c, buf);
}
}
return c;
}

View File

@@ -0,0 +1,943 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2007 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
check README for my email address
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h> // toupper, tolower
#include "pgm2asc.h"
#include "gocr.h"
// ----- detect lines ---------------
/* suggestion: Fourier transform and set line frequency where the
amplitude has a maximum (JS: slow and not smarty enough).
option: range for line numbers 1..1000 or similar
todo: look for thickest line, and divide if thickness=2*mean_thickness
Set these elements of the box structs:
m1 <-- top of upper case letters and (bdfhkl) (can differ)
m2 <-- top of letters (acegmnopqrsuvwxyz)
m3 <-- baseline
m4 <-- bottom of hanging letters (gqpy)
performance can be improved by working with a temporary
list of boxes of the special text line
- Jun23,00 more robustness of m3 (test liebfrau1)
- Feb01,02 more robustness of m4 (test s46_084.pgm)
- Dec03,12 fix problems with footnotes
ToDo:
- generate lists of boxes per line (faster access)
- use statistics
- for each box look at it neighbours and set box-m1..m4
- m[1..4].max .min if m4.min-m3.max<1 probability lower
*/
int detect_lines1(pix * p, int x0, int y0, int dx, int dy)
{
int i, jj, j2, y, yy, my, mi, mc, i1, i2, i3, i4,
m1, m2, m3, m4, ma1, ma2, ma3, ma4, m3pre, m4pre;
struct box *box2, *box3; /* box3 is for verbose / debugging */
struct tlines *lines = &JOB->res.lines;
/* ToDo: optional read line-data from external source??? */
if (lines->num == 0) { // initialize one dummy-line for pictures etc.
lines->m4[0] = 0;
lines->m3[0] = 0;
lines->m2[0] = 0;
lines->m1[0] = 0;
lines->x0[0] = p->x; /* expand to left end during detection */
lines->x1[0] = 0; /* expand to right end */
lines->pitch[0] = JOB->cfg.spc; /* default word pitch */
lines->mono[0] = 0; /* default spacing, 0 = prop */
lines->num++;
}
i = lines->num;
if (dy < 4)
return 0; /* image is to low for latin chars */
my = jj = 0;
// get the mean height of all hollow chars
// (better than mean value of everything including bg-pattern or dust?)
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if ( box2->c != PICTURE
&& box2->num_frames>1 && box2->num_frames<3 /* 1 or 2 holes */
&& box2->y0 >= y0 && box2->y1 <= y0 + dy
&& box2->x0 >= x0 && box2->x1 <= x0 + dx
&& box2->frame_vol[0]>0
&& box2->frame_vol[1]<0
) {
jj++;
my += box2->y1 - box2->y0 + 1;
}
} end_for_each(&(JOB->res.boxlist));
if (jj==0) {
// get the mean height of all chars
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if ( box2->c != PICTURE
&& box2->y1 - box2->y0 + 1 >= 4 /* 4x6 font */
&& box2->y0 >= y0 && box2->y1 <= y0 + dy
&& box2->x0 >= x0 && box2->x1 <= x0 + dx ) {
jj++;
my += box2->y1 - box2->y0 + 1;
}
} end_for_each(&(JOB->res.boxlist));
}
if (jj == 0)
return 0; /* no chars detected */
/* ToDo: a better way could be to mark good boxes (of typical high a-zA-Z0-9)
* first and handle only marked boxes for line scan, exclude ?!,.:;etc
* but without setect the chars itself (using good statistics)
* see adjust_text_lines()
*/
my /= jj; /* we only care about chars with high arround my */
if (JOB->cfg.verbose & 16)
fprintf(stderr,"\n# detect_lines1(%d %d %d %d) vvv&16 chars=%d my=%d\n# ",
x0, y0, dx, dy, jj, my);
// "my" is the average over the whole image (bad, if different fontsizes)
if (my < 4)
return 0; /* mean high is to small => error */
m4pre=m3pre=y0; /* lower bond of upper line */
// better function for scanning line around a letter ???
// or define lines around known chars "eaTmM"
for (j2 = y = y0; y < y0 + dy; y++) {
// look for max. of upper and lower bound of next line
m1 = y0 + dy;
jj = 0;
#if 1
/* this is only for test runs */
if (JOB->cfg.verbose & 16)
fprintf(stderr,"searching new line %d\n# ",i /* lines->num */);
#endif
box3 = NULL; /* mark the most upper box starting next line */
// find highest point of next line => store to m1-min (m1>=y)
// only objects greater 2/3*my and smaller 3*my are allowed
// a higher "!" at end of line can result in a to low m1
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->line>0 || box2->c == PICTURE) continue;
if (lines->dx)
yy = lines->dy * box2->x0 / (lines->dx); /* correct crooked lines */
else yy=0;
if ( box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
&& box2->x0 >= x0 && box2->x1 < x0 + dx // within box ?
&& box2->c != PICTURE // no picture
&& box2->num_boxes <= 1 // ignore 2 for "!?i" 3 for "&auml;"
&& 3 * (box2->y1 - box2->y0) > 2 * my // not to small
&& (box2->y1 - box2->y0) < 3 * my // not to big
&& (box2->y1 - box2->y0) > 4) // minimum absolute size
{
if (box2->y0 < m1 + yy) {
m1 = box2->y0 - yy; /* highest upper boundary */
box3 = box2;
}
// fprintf(stderr,"\n %3d %3d %+3d %d m1= %3d",
// box2->x0, box2->y0, box2->y1 - box2->y0 + 1, box2->num_boxes, m1);
}
} end_for_each(&(JOB->res.boxlist));
if (!box3 || m1 >= y0+dy) break; /* no further line found */
if (JOB->cfg.verbose & 16)
fprintf(stderr," most upper box at new line xy= %4d %4d %+4d %+4d\n# ",
box3->x0, box3->y0, box3->x1-box3->x0, box3->y1-box3->y0);
// at the moment values depend from single chars, which can
// result in bad values (ex: 4x6 /\=)
// ToDo: 2) mean size of next line (store list of y0,y1)
// ToDo: 3) count num0[(y0-m1)*16/my], num1[(y1-m1)*16/my]
// ToDo: or down-top search horizontal nerarest neighbours
lines->x0[i] = x0 + dx - 1; /* expand during operation to left end */
lines->x1[i] = x0; /* expand to the right end of line */
m4=m2=m1; mi=m1+my; m3=m1+2*my; jj=0;
// find limits for upper bound, base line and ground line
// m2-max m3-min m4-max
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->line>0 || box2->c == PICTURE) continue;
if ( box2->y0 < y0 || box2->y1 >= y0 + dy
|| box2->x0 < x0 || box2->x1 >= x0 + dx ) continue; // out of image
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx);
else yy=0;
/* check for ij-dots, used if chars of same high */
if ( box2->y0 >= y + yy
&& box2->y0 >= y
&& (box2->y1 - box2->y0) < my
&& box2->y1 < m1 + yy + my/4
&& box2->y0 < mi + yy ) {
mi = box2->y0 - yy; /* highest upper boundary i-dot */
}
// fprintf(stderr,"\n check %3d %3d-%3d y=%d yy=%d m1=%d", box2->x0, box2->y0, box2->y1, y, yy, m1);
/* get m2-max m3-min m4-max */
if ( box2->y0 >= y + yy // lower than y
&& 3 * (box2->y1 - box2->y0 + 1) > 2 * my // right size ?
&& (box2->y1 - box2->y0 + 1) < 3 * my // font mix, size = 2.6*my
&& (box2->y1 - box2->y0 + 1) > 3 // 4x6 lowercase=4
&& box2->y0 >= m1 // in m1 range?
&& box2->y0 <= m1 + yy + 9 * my / 8 // my can be to small if mixed
// ToDo: we need a better (local?) algorithm for big headlines > 2*my
&& box2->y1 <= m1 + yy + 3 * my
&& box2->y1 >= m1 + yy + my / 2
// lines can differ in high, my may be to small (smaller headlines)
&& box2->y0+box2->y1 <= 2*box3->y1
)
{
jj++; // count chars for debugging purpose
if (box2->y0 > m2 + yy) {
m2 = box2->y0 - yy; /* highest upper boundary */
if (JOB->cfg.verbose & 16)
fprintf(stderr," set m2= %d yy= %d\n# ",m2, yy);
}
if (box2->y1 > m4 + yy && (my>6 || box2->y1 < m3+my)) {
m4 = box2->y1 - yy; /* lowest lower boundary, small font lines can touch */
}
if ( box2->y1 < m3 + yy
&& ( ( 2*box2->y1 > m2+ m4+yy && m2>m1)
|| ( 4*box2->y1 > m1+3*m4+yy) ) ) // care for TeX: \(^1\)Footnote 2003
/* "'!?" could cause trouble here, therefore this lines */
/* ToDo: get_bw costs time, check pre and next */
if( get_bw(box2->x0,box2->x1,box2->y1+1 ,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 0
|| get_bw(box2->x0,box2->x1,box2->y1+my/2,box2->y1+my/2,box2->p,JOB->cfg.cs,1) == 1
|| num_cross(box2->x0,box2->x1,(box2->y0+box2->y1)/2,(box2->y0+box2->y1)/2,box2->p,JOB->cfg.cs)>2 )
{
m3 = box2->y1 - yy; /* highest lower boundary */
// printf("\n# set1 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
// out_x(box2);
}
if (box2->y0 + box2->y1 > 2*(m3 + yy)
&& box2->y1 < m4 + yy - my/4 -1
&& box2->y1 >= (m2 + m4)/2 // care for TeX: \(^1\)Footnote 2003
&& m2 > m1 ) // be sure to not use ', m2 must be ok
{
m3 = box2->y1 - yy; /* highest lower boundary */
// printf("\n# set2 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
// out_x(box2);
}
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
// printf(" m=%3d %+2d %+2d %+2d yy=%3d\n",m1,m2-m1,m3-m1,m4-m1,yy);
}
} end_for_each(&(JOB->res.boxlist));
#if 1
/* this is only for test runs */
if (JOB->cfg.verbose & 16)
fprintf(stderr," step 1 y=%4d m= %4d %+3d %+3d %+3d"
" my=%2d chars=%3d\n# ",
y, m1, m2-m1, m3-m1, m4-m1, my, jj);
#endif
if (m3 == m1)
break;
#if 1 /* make averages about the line */
// same again better estimation
mc = (3 * m3 + m1) / 4; /* lower center ? */
ma1 = ma2 = ma3 = ma4 = i1 = i2 = i3 = i4 = jj = 0;
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->line>0 || box2->c == PICTURE) continue;
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
if (box2->y0 >= y + yy && box2->y1 < y0 + dy // lower than y
&& box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
&& box2->c != PICTURE // no picture
&& 2 * (box2->y1 - box2->y0) > my // right size ?
&& (box2->y1 - box2->y0) < 4 * my) {
if ( box2->y0 - yy >= m1-my/4
&& box2->y0 - yy <= m2+my/4
&& box2->y1 - yy >= m3-my/4
&& box2->y1 - yy <= m4+my/4 ) { /* its within allowed range! */
// jj++; // not used
if (abs(box2->y0 - yy - m1) <= abs(box2->y0 - yy - m2))
{ i1++; ma1 += box2->y0 - yy; }
else { i2++; ma2 += box2->y0 - yy; }
if (abs(box2->y1 - yy - m3) < abs(box2->y1 - yy - m4))
{ i3++; ma3 += box2->y1 - yy; }
else { i4++; ma4 += box2->y1 - yy; }
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
}
}
} end_for_each(&(JOB->res.boxlist));
if (i1) m1 = (ma1+i1/2) / i1; /* best rounded */
if (i2) m2 = (ma2+i2/2) / i2;
if (i3) m3 = (ma3+i3-1) / i3; /* round up */
if (i4) m4 = (ma4+i4-1) / i4;
// printf("\n# .. set3 m3 m=%3d %+2d %+2d %+2d",m1,m2-m1,m3-m1,m4-m1);
#endif
/* expand right and left end of line */
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->line>0 || box2->c == PICTURE) continue;
if (lines->dx) yy = lines->dy * box2->x0 / (lines->dx); else yy=0;
if ( box2->y0 >= y0 && box2->y1 < y0 + dy
&& box2->x0 >= x0 && box2->x1 < x0 + dx // in box ?
&& box2->c != PICTURE // no picture
&& box2->y0 >= m1-1
&& box2->y0 <= m4
&& box2->y1 >= m1
&& box2->y1 <= m4+1 ) { /* its within line */
if (box2->x1>lines->x1[i]) lines->x1[i] = box2->x1; /* right end */
if (box2->x0<lines->x0[i]) lines->x0[i] = box2->x0; /* left end */
}
} end_for_each(&(JOB->res.boxlist));
#if 1
/* this is only for test runs */
if (JOB->cfg.verbose & 16)
fprintf(stderr," step 2 y=%4d m= %4d %+3d %+3d %+3d\n# ",
y,m1,m2-m1,m3-m1,m4-m1);
#endif
if (m4 == m1) {
if(m3+m4>2*y) y = (m4+m3)/2; /* lower end may overlap the next line */
continue;
}
jj=0;
lines->wt[i] = 100;
if (5 * (m2 - m1 +1) < m3 - m2 || (m2 - m1) < 2) jj|=1; /* same high */
if (5 * (m4 - m3 +1) < m3 - m2 || (m4 - m3) < 1) jj|=2; /* same base */
if (jj&1) lines->wt[i] = 75*lines->wt[i]/100;
if (jj&2) lines->wt[i] = 75*lines->wt[i]/100;
if (jj>0 && JOB->cfg.verbose) {
fprintf(stderr," trouble on line %d, wt*100= %d\n",i,lines->wt[i]);
fprintf(stderr,"# m= %4d %+3d %+3d %+3d\n",m1,m2-m1,m3-m1,m4-m1);
fprintf(stderr,"# i= %3d %3d %3d %3d (counts)\n",i1,i2,i3,i4);
if (jj==3) fprintf(stderr,"# all boxes of same high!\n# ");
if (jj==1) fprintf(stderr,"# all boxes of same upper bound!\n# ");
if (jj==2) fprintf(stderr,"# all boxes of same lower bound!\n# ");
}
/* ToDo: check for dots ij,. to get the missing information */
#if 1
/* jj=3: ABCDEF123456 or mnmno or gqpy or lkhfdtb => we are in trouble */
if (jj==3 && (m4-m1)>my) { jj=0; m2=m1+my/8+1; m4=m3+my/8+1; } /* ABC123 */
/* using idots, may fail on "ABCDEFG&Auml;&Uuml;&Ouml;" */
if (jj==3 && mi>0 && mi<m1 && mi>m4pre) { jj=2; m1=mi; } /* use ij dots */
if (jj==1 && m2-(m3-m2)/4>m3pre ) { /* expect: acegmnopqrsuvwxyz */
if (m1-m4pre<m4-m1) /* fails for 0123ABCD+Q$ */
m1 = ( m2 + m4pre ) / 2 ;
else
m1 = ( m2 - (m3 - m2) / 4 );
}
if (jj==3)
m2 = m1 + (m3 - m1) / 4 + 1; /* expect: 0123456789ABCDEF */
if ( (m2 - m1) < 2)
m2 = m1 + 2; /* font hight < 8 pixel ? */
if (jj&2)
m4 = m3 + (m4 - m1) / 4 + 1; /* chars have same lower base */
if (jj>0 && JOB->cfg.verbose & 16) {
fprintf(stderr," m= %4d %+2d %+2d %+2d my= %4d\n# ",
m1, m2-m1, m3-m1, m4-m1, my);
}
#endif
{ // empty space between lines
lines->m4[i] = m4;
lines->m3[i] = m3;
lines->m2[i] = m2;
lines->m1[i] = m1;
lines->pitch[i] = JOB->cfg.spc; /* default word pitch */
lines->mono[i] = 0; /* default spacing, 0=prop, 1=mono */
if (JOB->cfg.verbose & 16)
fprintf(stderr, " m= %4d %+3d %+3d %+3d w= %d (line=%d)\n# ",
m1, m2 - m1, m3 - m1, m4 - m1, lines->wt[i], i);
if (i < MAXlines && m4 - m1 > 4)
i++;
if (i >= MAXlines) {
fprintf(stderr, "Warning: lines>MAXlines\n");
break;
}
}
if (m3+m4>2*y) y = (m3+m4)/2; /* lower end may overlap the next line */
if (m3>m3pre) m3pre = m3; else m3=y0; /* set for next-line scan */
if (m4>m4pre) m4pre = m4; else m4=y0; /* set for next-line scan */
}
lines->num = i;
if (JOB->cfg.verbose)
fprintf(stderr, " num_lines= %d", lines->num-1);
return 0;
}
// ----- layout analyzis of dx*dy region at x0,y0 -----
// ----- detect lines via recursive division (new version) ---------------
// what about text in frames???
// ToDo: change to bottom-top analyse or/and take rotation into account
int detect_lines2(pix *p,int x0,int y0,int dx,int dy,int r){
int i,x2,y2,x3,y3,x4,y4,x5,y5,y6,mx,my,x30,x31,y30,y31;
struct box *box2,*box3;
// shrink box
if(dx<=0 || dy<=0) return 0;
if(y0+dy< p->y/128 && y0==0) return 0; /* looks like dust */
if(y0>p->y-p->y/128 && y0+dy==p->y) return 0; /* looks like dust */
if(r>1000){ return -1;} // something is wrong
if(JOB->cfg.verbose)fprintf(stderr,"\n# r=%2d ",r);
mx=my=i=0; // mean thickness
// remove border, shrink size
x2=x0+dx-1; // min x
y2=y0+dy-1; // min y
x3=x0; // max x
y3=y0; // max y
for_each_data(&(JOB->res.boxlist)) {
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
if(box3->y0>=y0 && box3->y1<y0+dy &&
box3->x0>=x0 && box3->x1<x0+dx)
{
if( box3->x1 > x3 ) x3=box3->x1; // max x
if( box3->x0 < x2 ) x2=box3->x0; // min x
if( box3->y1 > y3 ) y3=box3->y1; // max y
if( box3->y0 < y2 ) y2=box3->y0; // min y
if(box3->c!=PICTURE)
if( box3->y1 - box3->y0 > 4 )
{
i++;
mx+=box3->x1-box3->x0+1; // mean x
my+=box3->y1-box3->y0+1; // mean y
}
}
} end_for_each(&(JOB->res.boxlist));
x0=x2; dx=x3-x2+1;
y0=y2; dy=y3-y2+1;
if(i==0 || dx<=0 || dy<=0) return 0;
mx/=i;my/=i;
// better look for widest h/v-gap, ToDo: vertical lines?
if(r<8){ // max. depth
// detect widest horizontal gap
y2=y3=y4=y5=y6=0;
x2=x3=x4=x5=y5=0;// min. 3 lines
// position and thickness of gap, y6=num_gaps, nbox^2 ops
for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if( box2->c!=PICTURE ) /* ToDo: not sure, that this is a good idea */
if( box2->y0>=y0 && box2->y1<y0+dy
&& box2->x0>=x0 && box2->x1<x0+dx
&& box2->y1-box2->y0>my/2 ){ // no pictures & dust???
y4=y0+dy-1; // nearest vert. box
x4=x0+dx-1;
// ToDo: rotate back box2->x1,y1 to x21,y21
// look for nearest lowest (y4) and right (x4) neighbour
// of every box (box2)
for_each_data(&(JOB->res.boxlist)) {
box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
if(box3!=box2)
if(box3->y0>=y0 && box3->y1<y0+dy)
if(box3->x0>=x0 && box3->x1<x0+dx)
if(box3->c!=PICTURE) /* ToDo: not sure, that this is a good idea */
if(box3->y1-box3->y0>my/2 ){
// ToDo: here we need the rotation around box2
x30=box3->x0;
x31=box3->x1;
y30=box3->y0;
y31=box3->y1;
// get min. distances to lower and to right direction
if( y31 > box2->y1 && y30 < y4 ) y4=y30-1;
if( x31 > box2->x1 && x30 < x4 ) x4=x30-1;
}
} end_for_each(&(JOB->res.boxlist));
// set the witdht and position of largest hor./vert. gap
// largest gap: width position
if( y4-box2->y1 > y3 ) { y3=y4-box2->y1; y2=(y4+box2->y1)/2; }
if( x4-box2->x1 > x3 ) { x3=x4-box2->x1; x2=(x4+box2->x1)/2; }
}
} end_for_each(&(JOB->res.boxlist));
// fprintf(stderr,"\n widest y-gap= %4d %4d",y2,y3);
// fprintf(stderr,"\n widest x-gap= %4d %4d",x2,x3);
i=0; // i=1 at x, i=2 at y
// this is the critical point
// is this a good decision or not???
if(x3>0 || y3>0){
if(x3>mx && x3>2*y3 && (dy>5*x3 || (x3>10*y3 && y3>0))) i=1; else
if(dx>5*y3 && y3>my) i=2;
}
// compare with largest box???
for_each_data(&(JOB->res.boxlist)) { // not very efficient, sorry
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if( box2->c == PICTURE )
if( box2->y0>=y0 && box2->y1<y0+dy
&& box2->x0>=x0 && box2->x1<x0+dx )
{ // hline ???
// largest gap: width position
if( box2->x1-box2->x0+4 > dx && box2->y1+4<y0+dy ) { y3=1; y2=box2->y1+1; i=2; break; }
if( box2->x1-box2->x0+4 > dx && box2->y0-4>y0 ) { y3=1; y2=box2->y0-1; i=2; break; }
if( box2->y1-box2->y0+4 > dy && box2->x1+4<x0+dx ) { x3=1; x2=box2->x1+1; i=1; break; }
if( box2->y1-box2->y0+4 > dy && box2->x0-4>x0 ) { x3=1; x2=box2->x0-1; i=1; break; }
}
} end_for_each(&(JOB->res.boxlist));
if(JOB->cfg.verbose)fprintf(stderr," i=%d",i);
if(JOB->cfg.verbose && i) fprintf(stderr," divide at %s x=%4d y=%4d dx=%4d dy=%4d",
((i)?( (i==1)?"x":"y" ):"?"),x2,y2,x3,y3);
// divide horizontally if v-gap is thicker than h-gap
// and length is larger 5*width
if(i==1){ detect_lines2(p,x0,y0,x2-x0+1,dy,r+1);
return detect_lines2(p,x2,y0,x0+dx-x2+1,dy,r+1); }
// divide vertically
if(i==2){ detect_lines2(p,x0,y0,dx,y2-y0+1,r+1);
return detect_lines2(p,x0,y2,dx,y0+dy-y2+1,r+1);
}
}
if(JOB->cfg.verbose) if(dx<5 || dy<7)fprintf(stderr," empty box");
if(dx<5 || dy<7) return 0; // do not care about dust
if(JOB->cfg.verbose)fprintf(stderr, " box detected at %4d %4d %4d %4d",x0,y0,dx,dy);
if(JOB->tmp.ppo.p){
for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0 ,255,16);
for(i=0;i<dx;i++)put(&JOB->tmp.ppo,x0+i ,y0+dy-1,255,16);
for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0 ,y0+i ,255,16);
for(i=0;i<dy;i++)put(&JOB->tmp.ppo,x0+dx-1,y0+i ,255,16);
// writebmp("out10.bmp",p2,JOB->cfg.verbose); // colored should be better
}
return detect_lines1(p,x0-0*1,y0-0*2,dx+0*2,dy+0*3);
/*
struct tlines *lines = &JOB->res.lines;
i=lines->num; lines->num++;
lines->m1[i]=y0; lines->m2[i]=y0+5*dy/16;
lines->m3[i]=y0+12*dy/16; lines->m4[i]=y0+dy-1;
lines->x0[i]=x0; lines->x1[i]=x0+dx-1;
if(JOB->cfg.verbose)fprintf(stderr," - line= %d",lines->num);
return 0;
*/
}
/* ToDo: herons algorithm for square root x=(x+y/x)/2 is more efficient
* than interval subdivision (?) (germ.: Intervallschachtelung)
* without using matlib
* see http://www.math.vt.edu/people/brown/doc/sqrts.pdf
*/
int my_sqrt(int x){
int y0=0,y1=x,ym;
for (;y0<y1-1;){
ym=(y0+y1)/2;
if (ym*ym<x) y0=ym; else y1=ym;
}
return y0;
}
/*
** Detect rotation angle (one for whole image)
** old: longest text-line and determining the angle of this line.
*
* search right nearest neighbour of each box and average vectors
* to get the text orientation,
* upside down decision is not made here (I dont know how to do it)
* ToDo: set job->res.lines.{dx,dy}
* pass 1: get mean vector to nearest char
* pass 2: get mean vector to nearest char without outriders to pass 1
* extimate direction as (dx,dy,num)[pass]
* ToDo: estimate an error, boxes only work fine for zero-rotation
* for 45 degree use vectors, not boxes to get base line
*/
#define INorm 1024 /* integer unit 1.0 */
int detect_rotation_angle(job_t *job){
struct box *box2, *box3,
*box_nn; /* nearest neighbour box */
int x2, y2, x3, y3, dist, mindist, pass,
rx=0, ry=0, re=0, // final result
/* to avoid 2nd run, wie store pairs in 2 different categories */
nn[4]={0,0,0,0}, /* num_pairs used for estimation [(pass-1)%2,pass%2] */
dx[4]={0,0,0,0}, /* x-component of rotation vector per pass */
dy[4]={0,0,0,0}, /* y-component of rotation vector per pass */
er[4]={INorm/4,0,0,0}; /* mean angle deviation to pass-1 (radius^2) */
// de; /* ToDo: absolute maximum error (dx^2+dy^2) */
// ToDo: next pass: go to bigger distances and reduce max error
// error is diff between passes? or diff of bottoms and top borders (?)
rx=1024; ry=0; // default
for (pass=0;pass<4;pass++) {
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *)list_get_current(&(job->res.boxlist));
if (box2->c==PICTURE) continue;
/* subfunction probability of char */
// i?
// if (box2->x1 - box2->x0 < 3) continue; /* smallest font is 4x6 */
if (box2->y1 - box2->y0 < 4) continue;
/* set maximum possible distance */
box_nn=box2; // initial box to compare with
// ToDo: clustering or majority
// the algorithm is far from being perfect, pitfalls are likely
// but its better than the old algorithm, ToDo: database-rotated-images
mindist = job->src.p.x * job->src.p.x + job->src.p.y * job->src.p.y;
/* get middle point of the box */
x2 = (box2->x0 + box2->x1)/2;
y2 = (box2->y0 + box2->y1)/2;
re=0;
/* search for nearest neighbour box_nn[pass+1] of box_nn[pass] */
for_each_data(&(job->res.boxlist)) {
box3 = (struct box *)list_get_current(&(job->res.boxlist));
/* try to select only potential neighbouring chars */
/* select out all senseless combinations */
if (box3->c==PICTURE || box3==box2) continue;
x3 = (box3->x0 + box3->x1)/2;
y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
if (x3<x2) continue; /* simplify by going right only */
// through-away deviation of angles if > pass-1?
// scalprod max in direction, cross prod min in direction
// a,b (vectors): <a,b>^2/(|a|*|b|)^2 = 0(90deg)..0.5(45deg).. 1(0deg)
// * 1024 ??
if (pass>0) { // new variant = scalar product
// danger of int overflow, ToDo: use int fraction
re =(int) ((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
*(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
/(1.*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
*(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1])));
if (INorm-re>er[pass-1]) continue; // hits mean deviation
}
/* neighbours should have same order of size (?) */
if (3*(box3->y1-box3->y0+4) < 2*(box2->y1-box2->y0+1)) continue;
if (2*(box3->y1-box3->y0+1) > 3*(box2->y1-box2->y0+4)) continue;
if (2*(box3->x1-box3->x0+1) > 5*(box2->x1-box2->x0+4)) continue;
if (5*(box3->x1-box3->x0+4) < 2*(box2->x1-box2->x0+1)) continue;
/* should be in right range, Idea: center3 outside box2? noholes */
if ((x3<box2->x1-1) && (x3>box2->x0+1)
&& (y3<box2->y1-1) && (y3>box2->y0+1)) continue;
// if chars are of different size, connect careful
if ( abs(x3-x2) > 2*(box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
if ( abs(y3-y2) > (box2->x1 - box2->x0 + box3->x1 - box3 ->x0 + 2)) continue;
dist = (y3-y2)*(y3-y2) + (x3-x2)*(x3-x2);
// make distances in pass-1 directions shorter or continue if not in pass-1 range?
if (dist<9) continue; /* minimum distance^2 is 3^2 */
if (dist<mindist) { mindist=dist; box_nn=box3;}
// fprintf(stderr,"x y %d %d %d %d dist %d min %d\n",
// x2,y2,x3,y3,dist,mindist);
} end_for_each(&(job->res.boxlist));
if (box_nn==box2) continue; /* has no neighbour, next box */
box3=box_nn; dist=mindist;
x3 = (box3->x0 + box3->x1)/2;
y3 = (box3->y0 + box3->y1)/2; /* get middle point of the box */
// dist = my_sqrt(1024*((x3-x2)*(x3-x2)+(y3-y2)*(y3-y2)));
// compare with first box
x2 = (box2->x0 + box2->x1)/2;
y2 = (box2->y0 + box2->y1)/2;
// if the high of neighbouring boxes differ, use min diff (y0,y1)
if (pass>0 && 16*abs(dy[pass-1]) < dx[pass-1]) // dont work for strong rot.
if (abs(box2->y1-box2->y0-box3->y1+box3->y0)>(box2->y1-box2->y0)/8) {
// ad eh ck ...
if (abs(box2->y1-box3->y1)<abs(y3-y2)) { y2=box2->y1; y3=box3->y1; }
// ag ep qu ...
if (abs(box2->y0-box3->y0)<abs(y3-y2)) { y2=box2->y0; y3=box3->y0; }
}
if (abs(x3-x2)<4) continue;
dx[pass]+=(x3-x2)*1024; /* normalized before averaging */
dy[pass]+=(y3-y2)*1024; /* 1024 is for the precision */
nn[pass]++;
if (pass>0) { // set error = mean deviation from pass -1
re = INorm-(int)((1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])
*(1.*(x3-x2)*dx[pass-1]+(y3-y2)*dy[pass-1])*INorm
/((1.*(x3-x2)*(x3-x2)+(y3-y2)*(y3-y2))
*(1.*dx[pass-1]*dx[pass-1]+dy[pass-1]*dy[pass-1]))
);
er[pass]+=re;
}
#if 0
if(JOB->cfg.verbose)
fprintf(stderr,"# next nb (x,y,dx,dy,re) %6d %6d %5d %5d %5d pass %d\n",
x2, y2, x3-x2, y3-y2, re, pass+1);
#endif
} end_for_each(&(job->res.boxlist));
if (!nn[pass]) break;
if (nn[pass]) {
/* meanvalues */
rx=dx[pass]/=nn[pass];
ry=dy[pass]/=nn[pass];
if (pass>0) er[pass]/=nn[pass];
}
if(JOB->cfg.verbose)
fprintf(stderr,"# rotation angle (x,y,maxr,num)"
" %6d %6d %6d %4d pass %d\n",
rx, ry, er[pass], nn[pass], pass+1);
}
if (abs(ry*100)>abs(rx*50))
fprintf(stderr,"<!-- gocr will fail, strong rotation angle detected -->\n");
/* ToDo: normalize to 2^10 bit (square fits to 32 it) */
JOB->res.lines.dx=rx;
JOB->res.lines.dy=ry;
return 0;
}
/* ----- detect lines --------------- */
int detect_text_lines(pix * pp, int mo) {
if (JOB->cfg.verbose)
fprintf(stderr, "# detect.c detect_text_lines (vvv=16 for more info) ");
if (mo & 4){
if (JOB->cfg.verbose) fprintf(stderr, "# zoning\n# ... ");
detect_lines2(pp, 0, 0, pp->x, pp->y, 0); // later replaced by better algo
} else
detect_lines1(pp, 0, 0, pp->x, pp->y); // old algo
if(JOB->cfg.verbose) fprintf(stderr,"\n");
return 0;
}
/* ----- adjust lines --------------- */
// rotation angle? JOB->res.lines.dy, .x0 removed later
// this is for cases, where m1..m4 is not very sure detected before
// chars are recognized
int adjust_text_lines(pix * pp, int mo) {
struct box *box2;
int *m, /* summ m1..m4, num_chars for m1..m4, min m1..m4, max. m1..m4 */
l, i, dy, dx, diff=0, y0, y1;
if ((l=JOB->res.lines.num)<2) return 0; // ???
if (JOB->cfg.verbose)
fprintf(stderr, "# adjust text lines ");
m=(int *)malloc(l*16*sizeof(int));
if (!m) { fprintf(stderr," malloc failed\n"); return 0;}
for (i=0;i<16*l;i++) m[i]=0; /* initialize */
dy=JOB->res.lines.dy; /* tan(alpha) of skewing */
dx=JOB->res.lines.dx; /* old: width of image */
// js: later skewing is replaced by one transformation of vectorized image
if (dx)
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->line<=0) continue;
if (box2->num_ac<1) continue;
if (box2->wac[0]<95) continue;
if (box2->m2==0 || box2->y1<box2->m2) continue; // char outside line
if (box2->m3==4 || box2->y0>box2->m3) continue; // char outside line
y0=box2->y0-((box2->x1)*dy/dx); /* corrected by page skewing */
y1=box2->y1-((box2->x1)*dy/dx);
if (strchr("aemnr",(char)box2->tac[0])) { // cC vV sS oO ... is unsure!
m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
}
if (strchr("bdhklABDEFGHIKLMNRT123456789",(char)box2->tac[0])) {
m[box2->line*16+0]+=y0; m[box2->line*16+4]++; // num m1
m[box2->line*16+2]+=y1; m[box2->line*16+6]++; // num m3
if (m[box2->line*16+ 8]>y0) m[box2->line*16+ 8]=y0; /* min m1 */
if (m[box2->line*16+12]<y0) m[box2->line*16+12]=y0; /* max m1 */
if (m[box2->line*16+10]>y1) m[box2->line*16+10]=y1; /* min m3 */
if (m[box2->line*16+14]<y1) m[box2->line*16+14]=y1; /* max m3 */
}
if (strchr("gq",(char)box2->tac[0])) {
m[box2->line*16+1]+=y0; m[box2->line*16+5]++; // num m2
m[box2->line*16+3]+=y1; m[box2->line*16+7]++; // num m4
if (m[box2->line*16+ 9]>y0) m[box2->line*16+ 9]=y0; /* min m2 */
if (m[box2->line*16+13]<y0) m[box2->line*16+13]=y0; /* max m2 */
if (m[box2->line*16+11]>y1) m[box2->line*16+11]=y1; /* min m4 */
if (m[box2->line*16+15]<y1) m[box2->line*16+15]=y1; /* max m4 */
}
} end_for_each(&(JOB->res.boxlist));
for (i=1;i<l;i++) {
diff=0; // show diff per line
if (m[i*16+4]) diff+=abs(JOB->res.lines.m1[i]-m[i*16+0]/m[i*16+4]);
if (m[i*16+5]) diff+=abs(JOB->res.lines.m2[i]-m[i*16+1]/m[i*16+5]);
if (m[i*16+6]) diff+=abs(JOB->res.lines.m3[i]-m[i*16+2]/m[i*16+6]);
if (m[i*16+7]) diff+=abs(JOB->res.lines.m4[i]-m[i*16+3]/m[i*16+7]);
/* recalculate sureness, empirically */
if (m[i*16+4]*m[i*16+5]*m[i*16+6]*m[i*16+7] > 0)
JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]+100)/2;
else
JOB->res.lines.wt[i]=(JOB->res.lines.wt[i]*90)/100;
// set mean values of sure detected bounds (rounded precisely)
if ( m[i*16+4]) JOB->res.lines.m1[i]=(m[i*16+0]+m[i*16+4]/2)/m[i*16+4];
if ( m[i*16+5]) JOB->res.lines.m2[i]=(m[i*16+1]+m[i*16+5]/2)/m[i*16+5];
if ( m[i*16+6]) JOB->res.lines.m3[i]=(m[i*16+2]+m[i*16+6]/2)/m[i*16+6];
if ( m[i*16+7]) JOB->res.lines.m4[i]=(m[i*16+3]+m[i*16+7]/2)/m[i*16+7];
// care about very small fonts
if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+5]==0 && m[i*16+4])
JOB->res.lines.m2[i]=JOB->res.lines.m1[i]+2;
if (JOB->res.lines.m2[i]-JOB->res.lines.m1[i]<=1 && m[i*16+4]==0 && m[i*16+5])
JOB->res.lines.m1[i]=JOB->res.lines.m2[i]-2;
if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+7]==0 && m[i*16+6])
JOB->res.lines.m4[i]=JOB->res.lines.m3[i]+2;
if (JOB->res.lines.m4[i]-JOB->res.lines.m3[i]<=1 && m[i*16+6]==0 && m[i*16+7])
JOB->res.lines.m3[i]=JOB->res.lines.m4[i]-2;
if ( m[i*16+7]<1 &&
JOB->res.lines.m4[i]
<=JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4 )
JOB->res.lines.m4[i]=
JOB->res.lines.m3[i]+(JOB->res.lines.m3[i]-JOB->res.lines.m2[i])/4;
if ( m[i*16+7]<1 && m[i*16+12+2]>0 && // m4 < max.m3+..
JOB->res.lines.m4[i] < 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2 )
JOB->res.lines.m4[i] = 2*m[i*16+12+2]-JOB->res.lines.m3[i]+2;
if (JOB->res.lines.m4[i]<=JOB->res.lines.m3[i])
JOB->res.lines.m4[i]= JOB->res.lines.m3[i]+1; /* 4x6 */
if (JOB->cfg.verbose & 17)
fprintf(stderr, "\n# line= %3d m= %4d %+3d %+3d %+3d "
" n= %2d %2d %2d %2d w= %3d diff= %d",
i, JOB->res.lines.m1[i],
JOB->res.lines.m2[i] - JOB->res.lines.m1[i],
JOB->res.lines.m3[i] - JOB->res.lines.m1[i],
JOB->res.lines.m4[i] - JOB->res.lines.m1[i],
m[i*16+4],m[i*16+5],m[i*16+6],m[i*16+7],
JOB->res.lines.wt[i], diff);
}
diff=0; // count adjusted chars
#if 1
if (dx)
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->line<=0) continue;
/* check if box was on the wrong line, ToDo: search a better line */
if (2*box2->y0<2*JOB->res.lines.m1[box2->line]
-JOB->res.lines.m4[box2->line]
+JOB->res.lines.m1[box2->line]) box2->line=0;
if (2*box2->y1>2*JOB->res.lines.m4[box2->line]
+JOB->res.lines.m4[box2->line]
-JOB->res.lines.m1[box2->line]) box2->line=0;
/* do adjustments */
if (box2->num_ac>0
&& box2->num_ac > 31 && box2->tac[0] < 127 /* islower(>256) may SIGSEGV */
&& strchr("cCoOpPsSuUvVwWxXyYzZ",(char)box2->tac[0])) { // no_wchar
if (box2->y0-((box2->x1)*dy/dx)
< (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line])/2
&& islower(box2->tac[0])
) { setac(box2,toupper((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
if (box2->y0-((box2->x1)*dy/dx)
> (JOB->res.lines.m1[box2->line]+JOB->res.lines.m2[box2->line]+1)/2
&& isupper(box2->tac[0])
){ setac(box2,tolower((char)box2->tac[0]),(box2->wac[0]+101)/2); diff++; }
}
box2->m1=JOB->res.lines.m1[box2->line]+((box2->x1)*dy/dx);
box2->m2=JOB->res.lines.m2[box2->line]+((box2->x1)*dy/dx);
box2->m3=JOB->res.lines.m3[box2->line]+((box2->x1)*dy/dx);
box2->m4=JOB->res.lines.m4[box2->line]+((box2->x1)*dy/dx);
} end_for_each(&(JOB->res.boxlist));
#endif
free(m);
if(JOB->cfg.verbose) fprintf(stderr,"\n# changed_chars= %d\n",diff);
return(diff);
}
/* ---- measure mean character
* recalculate mean width and high after changes in boxlist
* ToDo: only within a Range?
*/
int calc_average() {
int i = 0, x0, y0, x1, y1;
struct box *box4;
JOB->res.numC = 0;
JOB->res.sumY = 0;
JOB->res.sumX = 0;
for_each_data(&(JOB->res.boxlist)) {
box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
if( box4->c != PICTURE ){
x0 = box4->x0; x1 = box4->x1;
y0 = box4->y0; y1 = box4->y1;
i++;
if (JOB->res.avX * JOB->res.avY > 0) {
if (x1 - x0 + 1 > 4 * JOB->res.avX
&& y1 - y0 + 1 > 4 * JOB->res.avY) continue; /* small picture */
if (4 * (y1 - y0 + 1) < JOB->res.avY || y1 - y0 < 2)
continue; // dots .,-_ etc.
}
if (x1 - x0 + 1 < 4
&& y1 - y0 + 1 < 6 ) continue; /* dots etc */
JOB->res.sumX += x1 - x0 + 1;
JOB->res.sumY += y1 - y0 + 1;
JOB->res.numC++;
}
} end_for_each(&(JOB->res.boxlist));
if ( JOB->res.numC ) { /* avoid div 0 */
JOB->res.avY = (JOB->res.sumY+JOB->res.numC/2) / JOB->res.numC;
JOB->res.avX = (JOB->res.sumX+JOB->res.numC/2) / JOB->res.numC;
}
if (JOB->cfg.verbose){
fprintf(stderr, "# averages: mXmY= %d %d nC= %d n= %d\n",
JOB->res.avX, JOB->res.avY, JOB->res.numC, i);
}
return 0;
}
/* ---- analyse boxes, find pictures and mark (do this first!!!)
*/
int detect_pictures(job_t *job) {
int i = 0, x0, y0, x1, y1, num_h;
struct box *box2, *box4;
if ( job->res.numC == 0 ) {
if (job->cfg.verbose) fprintf(stderr,
"# detect.c L%d Warning: numC=0\n", __LINE__);
return -1;
}
/* ToDo: set Y to uppercase mean value? */
job->res.avY = (job->res.sumY+job->res.numC/2) / job->res.numC;
job->res.avX = (job->res.sumX+job->res.numC/2) / job->res.numC;
/* ToDo: two highest volumes? crosses, on extreme volume + on border */
if (job->cfg.verbose)
fprintf(stderr, "# detect.c L%d pictures, frames, mXmY= %d %d ... ",
__LINE__, job->res.avX, job->res.avY);
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *)list_get_current(&(job->res.boxlist));
if (box2->c == PICTURE) continue;
x0 = box2->x0; x1 = box2->x1;
y0 = box2->y0; y1 = box2->y1;
/* pictures could be of unusual size */
if (x1 - x0 + 1 > 4 * job->res.avX || y1 - y0 + 1 > 4 * job->res.avY) {
/* count objects on same baseline which could be chars */
/* else: big headlines could be misinterpreted as pictures */
num_h=0;
for_each_data(&(job->res.boxlist)) {
box4 = (struct box *)list_get_current(&(job->res.boxlist));
if (box4->c == PICTURE) continue;
if (box4->y1-box4->y0 > 2*(y1-y0)) continue;
if (2*(box4->y1-box4->y0) < y1-y0) continue;
if (box4->y0 > y0 + (y1-y0+1)/2
|| box4->y0 < y0 - (y1-y0+1)/2
|| box4->y1 > y1 + (y1-y0+1)/2
|| box4->y1 < y1 - (y1-y0+1)/2) continue;
// ToDo: continue if numcross() only 1, example: |||IIIll|||
num_h++;
} end_for_each(&(job->res.boxlist));
if (num_h>4) continue;
box2->c = PICTURE;
i++;
}
/* ToDo: pictures could have low contrast=Sum((pixel(p,x,y)-160)^2) */
} end_for_each(&(job->res.boxlist));
// start second iteration
if (job->cfg.verbose) {
fprintf(stderr, " %d - boxes %d\n", i, job->res.numC-i);
}
calc_average();
return 0;
}

View File

@@ -0,0 +1,432 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
sometimes I have written comments in german language, sorry for that
This file was retrieved from pgm2asc.cc of Joerg, in order to have
a library of the ocr-engine from Klaas Freitag
*/
#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#ifdef HAVE_GETTIMEOFDAY
#include <sys/time.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "pnm.h"
#include "pgm2asc.h"
#include "pcx.h"
#include "ocr0.h" /* only_numbers */
#include "progress.h"
#include "version.h"
static void out_version(int v) {
fprintf(stderr, " Optical Character Recognition --- gocr "
version_string " " release_string "\n"
" Copyright (C) 2001-2009 Joerg Schulenburg GPG=1024D/53BDFBE3\n"
" released under the GNU General Public License\n");
/* as recommended, (c) and license should be part of the binary */
/* no email because of SPAM, see README for contacting the author */
if (v)
fprintf(stderr, " use option -h for help\n");
if (v & 2)
exit(1);
return;
}
static void help(void) {
out_version(0);
/* output is shortened to essentials, see manual page for details */
fprintf(stderr,
" using: gocr [options] pnm_file_name # use - for stdin\n"
" options (see gocr manual pages for more details):\n"
" -h, --help\n"
" -i name - input image file (pnm,pgm,pbm,ppm,pcx,...)\n"
" -o name - output file (redirection of stdout)\n"
" -e name - logging file (redirection of stderr)\n"
" -x name - progress output to fifo (see manual)\n"
" -p name - database path including final slash (default is ./db/)\n");
fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
" -f fmt - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)\n"
" -l num - threshold grey level 0<160<=255 (0 = autodetect)\n"
" -d num - dust_size (remove small clusters, -1 = autodetect)\n"
" -s num - spacewidth/dots (0 = autodetect)\n"
" -v num - verbose (see manual page)\n"
" -c string - list of chars (debugging, see manual)\n"
" -C string - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)\n"
" -m num - operation modes (bitpattern, see manual)\n");
fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
" -a num - value of certainty (in percent, 0..100, default=95)\n"
" -u string - output this string for every unrecognized character\n");
fprintf(stderr, /* string length less than 509 bytes for ISO C89 */
" examples:\n"
"\tgocr -m 4 text1.pbm # do layout analyzis\n"
"\tgocr -m 130 -p ./database/ text1.pbm # extend database\n"
"\tdjpeg -pnm -gray text.jpg | gocr - # use jpeg-file via pipe\n"
"\n");
fprintf(stderr, " webpage: http://jocr.sourceforge.net/\n");
exit(0);
}
#ifdef HAVE_GETTIMEOFDAY
/* from the glibc documentation */
static int timeval_subtract (struct timeval *result, struct timeval *x,
struct timeval *y) {
/* Perform the carry for the later subtraction by updating Y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
`tv_usec' is certainly positive. */
result->tv_sec = x->tv_sec - y->tv_sec;
result->tv_usec = x->tv_usec - y->tv_usec;
/* Return 1 if result is negative. */
return x->tv_sec < y->tv_sec;
}
#endif
static void process_arguments(job_t *job, int argn, char *argv[])
{
int i;
char *s1;
assert(job);
if (argn <= 1) {
out_version(1);
exit(0);
}
#ifdef HAVE_PGM_H
pnm_init(&argn, &argv);
#endif
/* process arguments */
for (i = 1; i < argn; i++) {
if (strcmp(argv[i], "--help") == 0)
help(); /* and quits */
if (argv[i][0] == '-' && argv[i][1] != 0) {
s1 = "";
if (i + 1 < argn)
s1 = argv[i + 1];
switch (argv[i][1]) {
case 'h': /* help */
help();
break;
case 'i': /* input image file */
job->src.fname = s1;
i++;
break;
case 'e': /* logging file */
if (s1[0] == '-' && s1[1] == '\0') {
#ifdef HAVE_UNISTD_H
dup2(STDOUT_FILENO, STDERR_FILENO); /* -e /dev/stdout works */
#else
fprintf(stderr, "stderr redirection not possible without unistd.h\n");
#endif
}
else if (!freopen(s1, "w", stderr)) {
fprintf(stderr, "stderr redirection to %s failed\n", s1);
}
i++;
break;
case 'p': /* database path */
job->cfg.db_path=s1;
i++;
break;
case 'o': /* output file */
if (s1[0] == '-' && s1[1] == '\0') { /* default */
}
else if (!freopen(s1, "w", stdout)) {
fprintf(stderr, "stdout redirection to %s failed\n", s1);
};
i++;
break;
case 'f': /* output format */
if (strcmp(s1, "ISO8859_1") == 0) job->cfg.out_format=ISO8859_1; else
if (strcmp(s1, "TeX") == 0) job->cfg.out_format=TeX; else
if (strcmp(s1, "HTML") == 0) job->cfg.out_format=HTML; else
if (strcmp(s1, "XML") == 0) job->cfg.out_format=XML; else
if (strcmp(s1, "SGML") == 0) job->cfg.out_format=SGML; else
if (strcmp(s1, "UTF8") == 0) job->cfg.out_format=UTF8; else
if (strcmp(s1, "ASCII") == 0) job->cfg.out_format=ASCII; else
fprintf(stderr,"Warning: unknown format (-f %s)\n",s1);
i++;
break;
case 'c': /* list of chars (_ = not recognized chars) */
job->cfg.lc = s1;
i++;
break;
case 'C': /* char filter, default: NULL (all chars) */
/* ToDo: UTF8 input, wchar */
job->cfg.cfilter = s1;
i++;
break;
case 'd': /* dust size */
job->cfg.dust_size = atoi(s1);
i++;
break;
case 'l': /* grey level 0<160<=255, 0 for autodetect */
job->cfg.cs = atoi(s1);
i++;
break;
case 's': /* spacewidth/dots (0 = autodetect) */
job->cfg.spc = atoi(s1);
i++;
break;
case 'v': /* verbose mode */
job->cfg.verbose |= atoi(s1);
i++;
break;
case 'm': /* operation modes */
job->cfg.mode |= atoi(s1);
i++;
break;
case 'n': /* numbers only */
job->cfg.only_numbers = atoi(s1);
i++;
break;
case 'x': /* initialize progress output s1=fname */
ini_progress(s1);
i++;
break;
case 'a': /* set certainty */
job->cfg.certainty = atoi(s1);;
i++;
break;
case 'u': /* output marker for unrecognized chars */
job->cfg.unrec_marker = s1;
i++;
break;
default:
fprintf(stderr, "# unknown option use -h for help\n");
}
continue;
}
else /* argument can be filename v0.2.5 */ if (argv[i][0] != '-'
|| argv[i][1] == '\0' ) {
job->src.fname = argv[i];
}
}
}
static void mark_start(job_t *job) {
assert(job);
if (job->cfg.verbose) {
out_version(0);
/* insert some helpful info for support */
fprintf(stderr, "# compiled: " __DATE__ );
#if defined(__GNUC__)
fprintf(stderr, " GNUC-%d", __GNUC__ );
#endif
#ifdef __GNUC_MINOR__
fprintf(stderr, ".%d", __GNUC_MINOR__ );
#endif
#if defined(__linux)
fprintf(stderr, " linux");
#elif defined(__unix)
fprintf(stderr, " unix");
#endif
#if defined(__WIN32) || defined(__WIN32__)
fprintf(stderr, " WIN32");
#endif
#if defined(__WIN64) || defined(__WIN64__)
fprintf(stderr, " WIN64");
#endif
#if defined(__VERSION__)
fprintf(stderr, " version " __VERSION__ );
#endif
fprintf(stderr, "\n");
fprintf(stderr,
"# options are: -l %d -s %d -v %d -c %s -m %d -d %d -n %d -a %d -C \"%s\"\n",
job->cfg.cs, job->cfg.spc, job->cfg.verbose, job->cfg.lc, job->cfg.mode,
job->cfg.dust_size, job->cfg.only_numbers, job->cfg.certainty,
job->cfg.cfilter);
fprintf(stderr, "# file: %s\n", job->src.fname);
#ifdef USE_UNICODE
fprintf(stderr,"# using unicode\n");
#endif
#ifdef HAVE_GETTIMEOFDAY
gettimeofday(&job->tmp.init_time, NULL);
#endif
}
}
static void mark_end(job_t *job) {
assert(job);
#ifdef HAVE_GETTIMEOFDAY
/* show elapsed time */
if (job->cfg.verbose) {
struct timeval end, result;
gettimeofday(&end, NULL);
timeval_subtract(&result, &end, &job->tmp.init_time);
fprintf(stderr,"Elapsed time: %d:%02d:%3.3f.\n", (int)result.tv_sec/60,
(int)result.tv_sec%60, (float)result.tv_usec/1000);
}
#endif
}
static int read_picture(job_t *job) {
int rc=0;
assert(job);
if (strstr(job->src.fname, ".pcx"))
readpcx(job->src.fname, &job->src.p, job->cfg.verbose);
else
rc=readpgm(job->src.fname, &job->src.p, job->cfg.verbose);
return rc; /* 1 for multiple images, 0 else */
}
static int read_picture2(job_t *job, char* buf, long size) {
int rc=0;
assert(job);
rc=readpgmFromBuffer(buf, size, &job->src.p);
return rc; /* 1 for multiple images, 0 else */
}
/* subject of change, we need more output for XML (ToDo) */
void print_output(job_t *job) {
int linecounter = 0;
const char *line;
assert(job);
linecounter = 0;
line = getTextLine(linecounter++);
while (line) {
/* notice: decode() is shiftet to getTextLine since 0.38 */
fputs(line, stdout);
if (job->cfg.out_format==HTML) fputs("<br />",stdout);
if (job->cfg.out_format!=XML) fputc('\n', stdout);
line = getTextLine(linecounter++);
}
free_textlines();
}
/* subject of change, we need more output for XML (ToDo) */
char* print_output2(job_t *job) {
int linecounter = 0;
const char *line;
int len = 1024; // initial buffer length for text line
char *tmp = (char *)malloc(len);
if ( !tmp ) {
fprintf(stderr,"malloc failed!\n"); // ToDo: index_to_error_list
return NULL;
}
*tmp = 0;
assert(job);
linecounter = 0;
line = getTextLine(linecounter++);
while (line) {
/* notice: decode() is shiftet to getTextLine since 0.38 */
tmp = append_to_line(tmp, line, &len);
if (job->cfg.out_format==HTML)
{
tmp = append_to_line(tmp, "<br />", &len);
}
if (job->cfg.out_format!=XML)
{
tmp = append_to_line(tmp, "\n", &len);
}
line = getTextLine(linecounter++);
}
free_textlines();
return tmp;
}
/* FIXME jb: remove JOB; */
job_t *JOB;
char* PNMToText(char* buf, long size, char *outputformat, long graylevel, long dustsize, long spacewidthdots, long certainty) {
int multipnm=1;
job_t job;
char *tmp = NULL;
JOB = &job;
setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
while (multipnm==1) {
job_init(&job);
/* output format */
if (strcmp(outputformat, "ISO8859_1") == 0) job.cfg.out_format=ISO8859_1;
else if (strcmp(outputformat, "TeX") == 0) job.cfg.out_format=TeX;
else if (strcmp(outputformat, "HTML") == 0) job.cfg.out_format=HTML;
else if (strcmp(outputformat, "XML") == 0) job.cfg.out_format=XML;
else if (strcmp(outputformat, "SGML") == 0) job.cfg.out_format=SGML;
else if (strcmp(outputformat, "UTF8") == 0) job.cfg.out_format=UTF8;
else if (strcmp(outputformat, "ASCII") == 0) job.cfg.out_format=ASCII;
/* grey level 0<160<=255, 0 for autodetect */
job.cfg.cs = graylevel;
/* dust size */
job.cfg.dust_size = dustsize;
/* spacewidth/dots (0 = autodetect) */
job.cfg.spc = spacewidthdots;
/* set certainty */
job.cfg.certainty = certainty;
// process_arguments(&job, argn, argv);
mark_start(&job);
// multipnm = read_picture(&job);
multipnm = read_picture2(&job, buf, size);
/* separation of main and rest for using as lib
this will be changed later => introduction of set_option()
for better communication to the engine */
if (multipnm<0) break; /* read error */
/* call main loop */
pgm2asc(&job);
mark_end(&job);
tmp=print_output2(&job);
job_free(&job);
}
return tmp;
}

View File

@@ -0,0 +1,168 @@
/* OCR Aug00 JS
// PGM gray ASCII=P2 RAW=P5
// PPM RGB ASCII=P3 RAW=P6
// PBM B/W ASCII=P1 RAW=P4
// ToDo:
// - pbm-raw to pgm also for x!=0 (mod 8)
// v0.01 bug eliminated
// v0.02 convert renamed into jconv because ImageMagick uses same name
// v0.03 code review bbg
// program is not used anymore, use "convert -verbose -crop 0x0+1+1" instead
*/
// #include <iostream.h>
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "pnm.h"
#ifdef HAVE_PAM_H
# include <pam.h>
#endif
#include "pcx.h"
#include "tga.h"
void help( void ) {
printf("jconv version Aug2000 JS (pnm-raw,pcx8,tga24)\n"
"use: jconv [options] ?infile.pnm? ?outfile.pgm? ?ox? ?oy? ?dx? ?dy?\n"
"options: -shrink -pbm -? -help\n"
"example: jconv -shrink -pbm font.pbm font.pbm 0 0 0 0\n");
exit(1);
}
int main(int argn, char *argv[])
{
char *inam, *onam;
pix bild;
int ox, oy, dx, dy, x, y, i, vvv = 0;
#ifdef HAVE_PAM_H
pnm_init(&argn, argv);
#endif
// skip options
for (i = 1; i < argn; i++) {
if (argv[i][0] != '-')
break;
if (!strcmp(argv[i], "-?"))
help();
else if (!strcmp(argv[i], "-help"))
help();
else if (!strcmp(argv[i], "-shrink"))
vvv |= 2;
else if (!strcmp(argv[i], "-pbm"))
vvv |= 4;
else
printf("unknown option: %s\n", argv[i]);
}
if (argn - i != 6)
help();
inam = argv[i++];
onam = argv[i++];
ox = atoi(argv[i++]);
oy = atoi(argv[i++]);
dx = atoi(argv[i++]);
dy = atoi(argv[i++]);
printf("# in=%s out=%s offs=%d,%d len=%d,%d vvv=%d\n",
inam, onam, ox, oy, dx, dy, vvv);
// ----- read picture
if (strstr(inam, ".pbm") ||
strstr(inam, ".pgm") ||
strstr(inam, ".ppm") ||
strstr(inam, ".pnm") ||
strstr(inam, ".pam"))
readpgm(inam, &bild, 1);
else if (strstr(inam, ".pcx"))
readpcx(inam, &bild, 1);
else if (strstr(inam, ".tga"))
readtga(inam, &bild, ((vvv > 1) ? 0 : 1));
else {
printf("Error: unknown suffix\n");
exit(1);
}
if (ox < 0 || ox >= bild.x)
ox = 0;
if (oy < 0 || ox >= bild.y)
oy = 0;
if (dx <= 0 || ox + dx > bild.x)
dx = bild.x - ox;
if (dy <= 0 || oy + dy > bild.y)
dy = bild.y - oy;
if ((vvv & 2) == 2 && bild.bpp == 1) { // -shrink
int x, y;
printf("# shrinking PGM: offs=%d,%d len=%d,%d\n", ox, oy, dx, dy);
for (y = 0; y < dy; y++) { // shrink upper border
for (x = 0; x < dx; x++)
if (bild.p[x + ox + (y + oy) * bild.x] < 127)
break;
if (x < dx) {
if (y > 0)
y--;
oy += y;
dy -= y;
break;
}
}
for (y = 0; y < dy; y++) { // shrink lower border
for (x = 0; x < dx; x++)
if (bild.p[ox + x + (oy + dy - y - 1) * bild.x] < 127)
break;
if (x < dx) {
if (y > 0)
y--;
dy -= y;
break;
}
}
for (x = 0; x < dx; x++) { // shrink left border
for (y = 0; y < dy; y++)
if (bild.p[x + ox + (y + oy) * bild.x] < 127)
break;
if (y < dy) {
if (x > 0)
x--;
ox += x;
dx -= x;
break;
}
}
for (x = 0; x < dx; x++) { // shrink right border
for (y = 0; y < dy; y++)
if (bild.p[ox + dx - x - 1 + (oy + y) * bild.x] < 127)
break;
if (y < dy) {
if (x > 0)
x--;
dx -= x;
break;
}
}
}
printf("# final dimension: offs=%d,%d len=%d,%d bpp=%d\n",
ox, oy, dx, dy, bild.bpp);
/* bbg: could be changed to memmoves */
// ---- new size
for (y = 0; y < dy; y++)
for (x = 0; x < dx; x++)
for (i = 0; i < 3; i++)
bild.p[i + bild.bpp * (x + dx * y)] =
bild.p[i + bild.bpp * (x + ox + (y + oy) * bild.x)];
bild.x = dx;
bild.y = dy;
// ---- write internal picture of textsite
printf("# write %s\n", onam);
if (strstr(onam, ".pbm"))
writepbm(onam, &bild);
else if (strstr(onam, ".pgm"))
writepgm(onam, &bild);
else if (strstr(onam, ".ppm"))
writeppm(onam, &bild);
else if (strstr(onam, ".pnm"))
writepgm(onam, &bild);
else
printf("Error: unknown suffix");
free( bild.p );
}

View File

@@ -0,0 +1,84 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2006 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for email address */
#include "pgm2asc.h"
#include "gocr.h"
/* initialize job structure */
void job_init(job_t *job) {
/* init source */
job->src.fname = "-";
/* FIXME jb: init pix */
job->src.p.p = NULL;
/* init results */
list_init( &job->res.boxlist );
list_init( &job->res.linelist );
job->res.avX = 5;
job->res.avY = 8;
job->res.sumX = 0;
job->res.sumY = 0;
job->res.numC = 0;
job->res.lines.dy=0;
job->res.lines.num=0;
/* init temporaries */
list_init( &job->tmp.dblist );
job->tmp.n_run = 0;
/* FIXME jb: init ppo */
job->tmp.ppo.p = NULL;
job->tmp.ppo.x = 0;
job->tmp.ppo.y = 0;
/* init cfg */
job->cfg.cs = 0;
job->cfg.spc = 0;
job->cfg.mode = 0;
job->cfg.dust_size = -1; /* auto detect */
job->cfg.only_numbers = 0;
job->cfg.verbose = 0;
job->cfg.out_format = UTF8; /* old: ISO8859_1; */
job->cfg.lc = "_";
job->cfg.db_path = (char*)NULL;
job->cfg.cfilter = (char*)NULL;
job->cfg.certainty = 95;
job->cfg.unrec_marker = "_";
}
/* free job structure */
void job_free(job_t *job) {
/* if tmp is just a copy of the pointer to the original image */
if (job->tmp.ppo.p==job->src.p.p) job->tmp.ppo.p=NULL;
/* FIMXE jb: free lists
* list_free( &job->res.linelist );
* list_free( &job->tmp.dblist );
*/
list_and_data_free(&(job->res.boxlist), (void (*)(void *))free_box);
/* FIXME jb: free pix */
if (job->src.p.p) { free(job->src.p.p); job->src.p.p=NULL; }
/* FIXME jb: free pix */
if (job->tmp.ppo.p) { free(job->tmp.ppo.p); job->tmp.ppo.p=NULL; }
}

View File

@@ -0,0 +1,353 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <assert.h>
#include "pgm2asc.h"
#include "gocr.h"
#include "unicode.h"
const char *getTextLine (int line) {
int i;
Element *elem;
if (line < 0 || line > list_total(&(JOB->res.linelist)))
return NULL;
for ( i = 0, elem = JOB->res.linelist.start.next; i < line && elem != NULL; i++ )
elem = elem->next;
if ( elem != NULL )
return (const char *)elem->data;
return NULL;
}
void free_textlines(void) {
for_each_data(&(JOB->res.linelist)) {
if (list_get_current(&(JOB->res.linelist)))
free(list_get_current(&(JOB->res.linelist)));
} end_for_each(&(JOB->res.linelist));
list_free(&(JOB->res.linelist));
}
/* append a string (s1) to the string buffer (buffer) of length (len)
* if buffer is to small or len==0 realloc buffer, len+=512
*/
char *append_to_line(char *buffer, const char *s1, int *len) {
char *temp;
int slen=0, alen;
if( s1==NULL || s1[0] == 0 ){
fprintf(stderr,"\n#BUG: appending 0 to a line makes no sense!");
return buffer;
}
if ( *len>0 ) slen= strlen(buffer); // used buffer
alen = strlen(s1);
if ( slen+alen+1 >= *len ) {
if(alen+1<=512)
*len+=512;
else
*len+=alen+1;
temp = (char *)realloc(buffer, *len);
if( !temp ) { fprintf(stderr,"realloc failed!\n"); *len-=512; return buffer; }
else buffer = temp; // buffer successfull enlarged
}
temp = buffer + slen; // end of buffered string
memcpy(temp,s1,alen+1); // copy including end sign '\0'
return buffer;
}
int calc_median_gap(struct tlines * lines) {
int gaps[MAXlines], l;
if (lines->num<2) return 0;
for (l = 0; l < lines->num - 1; l++)
gaps[l] = lines->m2[l + 1] - lines->m3[l];
qsort(gaps, lines->num - 1, sizeof(gaps[0]), intcompare);
return gaps[(lines->num - 1) / 2];
}
/*
* Return the indent in pixels of the least-indented line.
* Will be subtracted as base_indent to avoid negativ indent.
*
* This is adjusted to account for an angle on the page as
* a whole. For instance, if the page is rotated clockwise,
* lower lines may be physically closer to the left edge
* than higher lines that are logically less indented.
* We rotate around (0,0). Note that this rotation could
* rotate lines "off the left margin", leading to a negative
* indent.
*
* boxlist -- list of character boxes.
* dx, dy -- rotation angle as vector
*/
int get_least_line_indent(List * boxlist, int dx, int dy) {
int min_indent = INT_MAX;
int adjusted_indent;
struct box * box2;
if (JOB->cfg.verbose)
fprintf(stderr, "get_least_line_indent: rot.vector dxdy %d %d\n",
dx, dy);
for_each_data(boxlist) {
box2 = (struct box *)list_get_current(boxlist);
/* if num == -1, indicates this is a space or newline box,
* inserted in list_insert_spaces. */
if (box2->num != -1) {
adjusted_indent = box2->x0;
if (dx) adjusted_indent += box2->y0 * dy / dx;
if (adjusted_indent < min_indent) {
min_indent = adjusted_indent;
if (dy!=0 && JOB->cfg.verbose)
fprintf(stderr,
"# Line %2d, unadjusted xy %3d %3d, adjusted x %2d\n",
box2->line, box2->x0, box2->y0, adjusted_indent);
}
}
} end_for_each(boxlist);
if (JOB->cfg.verbose)
fprintf(stderr, "# Minimum adjusted x: %d (min_indent)\n", min_indent);
return min_indent;
}
/* collect all the chars from the box tree and write them to a string buffer
mo is the mode: mode&8 means, use chars even if unsure recognized
ToDo: store full text(?), store decoded text+boxes+position chars (v0.4)
(HTML,UTF,ASCII,XML), not wchar incl. descriptions (at<95% in red)
remove decode(*c, job->cfg.out_format) from gocr.c!
XML add alternate-tags, format tags and position tags
ToDo: better output XML to stdout instead of circumstantial store to lines
not all texts/images follow the line concept?
Better use a tree of objects where leafes are chars instead of simple list.
Chars or objects are taken into account. Objects can be text strings
or XML strings.
*/
void store_boxtree_lines(int mo) {
char *buffer; /* temp buffer for text */
int i = 0, j = 0;
int len = 1024; // initial buffer length for text line
struct box *box2;
int median_gap = 0;
int max_single_space_gap = 0;
struct tlines line_info;
int line, line_gap, oldline=-1;
int left_margin;
int i1=0, i2=0;
buffer = (char *)malloc(len);
if ( !buffer ) {
fprintf(stderr,"malloc failed!\n"); // ToDo: index_to_error_list
return;
}
*buffer = 0;
if ( JOB->cfg.verbose&1 )
fprintf(stderr,"# store boxtree to lines ...");
/* wew: calculate the median line gap, to determine line spacing
* for the text output. The line gap used is between one line's
* m3 (baseline) and the next line's m2 (height of non-rising
* lowercase). We use these lines as they are the least likely
* to vary according to actual character content of lines.
*/
median_gap = calc_median_gap(&JOB->res.lines);
if (median_gap <= 0) {
fprintf(stderr, "# Warning: non-positive median line gap of %d\n",
median_gap);
median_gap = 8;
max_single_space_gap = 12; /* arbitrary */
} else {
max_single_space_gap = median_gap * 7 / 4;
}
// Will be subtracted as base_indent to avoid negativ indent.
left_margin = get_least_line_indent(&JOB->res.boxlist,
JOB->res.lines.dx,
JOB->res.lines.dy);
if (JOB->cfg.out_format==XML) { /* subject of change */
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
/* output lot of usefull information for XML filter */
sprintf(s1,"<page x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\">\n",
0,0,0,0);
buffer=append_to_line(buffer,s1,&len);
sprintf(s1,"<block x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\">\n",
0,0,0,0);
buffer=append_to_line(buffer,s1,&len);
}
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
line = box2->line;
line_info = JOB->res.lines;
/* reset the output char if certainty is below the limit v0.44 */
if (box2->num_ac && box2->wac[0]<JOB->cfg.certainty) box2->c=UNKNOWN;
if (line!=oldline) {
if (JOB->cfg.out_format==XML && oldline>-1) { /* subject of change */
buffer=append_to_line(buffer,"</line>\n",&len);
list_app( &(JOB->res.linelist), (void *)strdup(buffer) ); // wcsdup
memset(buffer, 0, len);
j=0; // reset counter for new line
}
if (JOB->cfg.out_format==XML) { /* subject of change */
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
/* output lot of usefull information for XML filter */
sprintf(s1,"<line x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" value=\"%d\">\n",
line_info.x0[line],line_info.m1[line],
line_info.x1[line]-line_info.x0[line]+1,
line_info.m4[line]-line_info.m1[line],line);
buffer=append_to_line(buffer,s1,&len);
}
oldline=line;
}
if (box2->c > ' ' &&
box2->c <= 'z') i1++; /* count non-space chars */
if (box2->c == '\n') {
if (JOB->cfg.out_format!=XML) { /* subject of change */
line_info = JOB->res.lines;
line = box2->line;
if (line > 0) {
line_gap = line_info.m2[line] - line_info.m3[line - 1];
for (line_gap -= max_single_space_gap; line_gap > 0;
line_gap -= median_gap) {
buffer=append_to_line(buffer,"\n",&len);
j++; /* count chars in line */
}
}
list_app( &(JOB->res.linelist), (void *)strdup(buffer) ); // wcsdup
memset(buffer, 0, len);
j=0; // reset counter for new line
}
}
if (box2->c == ' ') // fill large gaps with spaces
{
if (JOB->res.avX) { /* avoid SIGFPE */
if (JOB->cfg.out_format==XML) { /* subject of change */
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
/* output lot of usefull information for XML filter */
sprintf(s1," <space x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" />\n",
box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
buffer=append_to_line(buffer,s1,&len);
} else
for (i = (box2->x1 - box2->x0) / (2 * JOB->res.avX) + 1; i > 0; i--) {
buffer=append_to_line(buffer," ",&len);
j++; /* number of chars in line */
}
}
}
else if (box2->c != '\n') {
if (j==0 && JOB->res.avX) /* first char in new line? */ {
int indent = box2->x0 - JOB->res.lines.x0[box2->line];
/* correct for angle of page as a whole. */
if (JOB->res.lines.dx)
indent += box2->y0 * JOB->res.lines.dy / JOB->res.lines.dx;
/* subtract the base margin. */
indent -= left_margin;
if (JOB->cfg.out_format==XML) { /* subject of change */
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
/* output lot of usefull information for XML filter */
sprintf(s1," <space x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" />\n",
box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
buffer=append_to_line(buffer,s1,&len);
} else
for (i = indent / JOB->res.avX; i > 0; i--) {
buffer=append_to_line(buffer," ",&len); j++;
}
}
if (JOB->cfg.out_format==XML) { /* subject of change */
char s1[255]; /* ToDo: avoid potential buffer overflow !!! */
/* output lot of usefull information for XML filter */
sprintf(s1," <box x=\"%d\" y=\"%d\" dx=\"%d\" dy=\"%d\" value=\"",
box2->x0,box2->y0,box2->x1-box2->x0+1,box2->y1-box2->y0+1);
buffer=append_to_line(buffer,s1,&len);
if (box2->num_ac>1) { /* ToDo: output a list of alternatives */
}
}
if (box2->c != UNKNOWN && box2->c != 0) {
buffer=
append_to_line(buffer,decode(box2->c,JOB->cfg.out_format),&len);
if (box2->c > ' ' &&
box2->c <= 'z') i2++; /* count non-space chars */
} else { /* c == UNKNOWN or 0 */
wchar_t cc; cc=box2->c;
if (box2->num_ac>0 && box2->tas[0]
&& (JOB->cfg.out_format!=XML || box2->tas[0][0]!='<')) {
/* output glued chars or ... (?) Jan08 */
buffer=append_to_line(buffer,box2->tas[0],&len);
j+=strlen(box2->tas[0]);
} else { /* ToDo: leave string empty? set placeholder per option */
/* output dummy string to mark UNKNOWN */
if(JOB->cfg.unrec_marker[0])
buffer = append_to_line(buffer, JOB->cfg.unrec_marker, &len);
}
}
if (JOB->cfg.out_format==XML) {
if (box2->num_ac>0) {
/* output alist ToDo: separate <altbox ...> */
int i1; char s1[256];
sprintf(s1,"\" numac=\"%d\" weights=\"",box2->num_ac);
buffer=append_to_line(buffer,s1,&len);
for (i1=0;i1<box2->num_ac;i1++) {
sprintf(s1,"%d",box2->wac[i1]);
buffer=append_to_line(buffer,s1,&len);
if (i1+1<box2->num_ac) buffer=append_to_line(buffer,",",&len);
}
if (box2->num_ac>1)
buffer=append_to_line(buffer,"\" achars=\"",&len);
for (i1=1;i1<box2->num_ac;i1++) {
if (box2->tas[i1] && box2->tas[i1][0]!='<')
buffer=append_to_line(buffer,box2->tas[i1],&len);
else
buffer=append_to_line(buffer,
decode(box2->tac[i1],JOB->cfg.out_format),&len);
// ToDo: add tas[] (achars->avalues or alternate_strings?
if (i1+1<box2->num_ac) buffer=append_to_line(buffer,",",&len);
}
}
buffer=append_to_line(buffer,"\" />\n",&len);
}
if (box2->num_ac && box2->tas[0]) {
if (box2->tas[0][0]=='<') { /* output special XML object */
buffer=append_to_line(buffer,box2->tas[0],&len);
buffer=append_to_line(buffer,"\n",&len);
j+=strlen(box2->tas[0]);
}
}
j++; /* number of chars in line */
}
i++;
} end_for_each(&(JOB->res.boxlist));
if (JOB->cfg.out_format==XML && oldline>-1) { /* subject of change */
buffer=append_to_line(buffer,"</line>\n",&len);
}
if (JOB->cfg.out_format==XML) { /* subject of change */
buffer=append_to_line(buffer,"</block>\n</page>\n",&len);
}
/* do not forget last line */
// is there no \n in the last line? If there is, delete next line.
list_app( &(JOB->res.linelist), (void *)strdup(buffer) );
free(buffer);
if( JOB->cfg.verbose&1 )
fprintf(stderr,"... %d lines, boxes= %d, chars= %d\n",i,i1,i2);
}

View File

@@ -0,0 +1,334 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2006 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for email address
***********************************IMPORTANT*********************************
Notes to the developers: read the following notes before using these
functions.
* Be careful when using for_each_data() recursively and calling list_del.
It may mangle with the current[] pointers, and possibly segfault or do an
unpredictable or just undesirable behavior. We have been working on a
solution for this problem, and solved some of the biggest problems.
In a few words, the problem is this: when you delete a node, it may be
the current node of a lower level loop. The current code takes care of
access to previous/next elements of the now defunct node. So, if you do
something like:
for_each_data(l) {
for_each_data(l) {
list_del(l, header_data);
free(header_data);
} end_for_each(l);
+ tempnode = list_cur_next(l);
} end_for_each(l);
It will work, even though the current node in the outer loop was deleted.
However, if you replace the line marked with + with the following code:
tempnode = list_next(l, list_get_current(l));
it will break, since list_get_current is likely to return NULL or garbage,
since you deleted header_data().
Conclusion: use list_del carefully. The best way to avoid this problem is
to not use list_del inside a big stack of loops.
* If you have two elements with the same data, the functions will assume
that the first one is the wanted one. Not a bug, a feature. ;-)
* avoid calling list_prev and list_next. They are intensive and slow
functions. Keep the result in a variable or, if you need something more,
use list_get_element_from_data.
*/
#include <stdio.h>
#include <stdlib.h>
#include "list.h"
#include "progress.h"
void list_init( List *l ) {
if ( !l )
return;
l->start.next = &l->stop;
l->stop.previous = &l->start;
l->start.previous = l->stop.next = NULL;
l->start.data = l->stop.data = NULL;
l->current = NULL;
l->level = -1;
l->n = 0;
}
/* inserts data before data_after. If data_after == NULL, appends.
Returns 1 on error, 0 if OK. */
int list_ins( List *l, void *data_after, void *data) {
Element *e, *after_element;
/* test arguments */
if ( !l || !data )
return 1;
if ( !data_after || !l->n )
return list_app(l, data);
/* get data_after element */
if ( !(after_element = list_element_from_data(l, data_after)) )
return 1;
/* alloc a new element */
if( !(e = (Element *)malloc(sizeof(Element))) )
return 1;
e->data = data;
e->next = after_element;
e->previous = after_element->previous;
after_element->previous->next = e;
after_element->previous = e;
l->n++;
return 0;
}
/* appends data to the list. Returns 1 on error, 0 if OK. */
/* same as list_ins(l,NULL,data) ??? */
int list_app( List *l, void *data ) {
Element *e;
if ( !l || !data )
return 1;
if ( !(e = (Element *)malloc(sizeof(Element))) )
return 1;
e->data = data;
e->previous = l->stop.previous;
e->next = l->stop.previous->next;
l->stop.previous->next = e;
l->stop.previous = e;
l->n++;
return 0;
}
/* returns element associated with data. */
Element *list_element_from_data( List *l, void *data ) {
Element *temp;
if ( !l || !data || !l->n)
return NULL;
temp = l->start.next;
while ( temp->data != data ) {
if ( !temp || temp==&l->stop )
return NULL;
temp = temp->next;
}
return temp;
}
/* deletes (first) element with data from list. User must free data.
Returns 0 if OK, 1 on error.
This is the internal version, that shouldn't be called usually. Use the
list_del() macro instead.
*/
int list_del( List *l, void *data ) {
Element *temp;
int i;
if (!data) return 1; /* do not delete start or stop element */
/* find element associated with data */
if ( !(temp = list_element_from_data(l, data)) )
return 1;
/* test if the deleted node is current in some nested loop, and fix it. */
for ( i = l->level; i >= 0; i-- ) {
if ( l->current[i] == temp ) {
l->current[i] = temp->previous;
}
}
temp->previous->next = temp->next;
temp->next->previous = temp->previous;
temp->previous = temp->next = NULL; /* mark as freed */
/*
fprintf(stderr,"\n# list_del=%p start=%p stop=%p",temp,&l->start,&l->stop);
*/
/* and free stuff */
free(temp); /* element pointing to data, fixed mem-leak 0.41 */
l->n--;
return 0;
}
/* frees list. See also list_and_data_free() */
void list_free( List *l ) {
Element *temp, *temp2;
if ( !l || !l->n )
return;
if ( l->current ) {
free(l->current);
}
l->current = NULL;
temp = l->start.next;
while ( temp && temp!=&l->stop) {
temp2 = temp->next;
free(temp);
temp = temp2;
}
l->start.next = &l->stop;
l->stop.previous = &l->start;
}
/* setup a new level of for_each */
int list_higher_level( List *l ) {
Element **newcur;
if ( !l ) return(1);
/*
Security-check: NULL pointer passed to realloc.
ANSI allows this, but it may cause portability problems.
*/
newcur = (Element **)realloc(l->current, (l->level+2)*sizeof(Element *));
if (newcur) {
l->current = newcur;
l->level++;
l->current[l->level] = l->start.next;
}
g_debug(fprintf(stderr, " level++=%d current[]=%p\n",
l->level, l->current);)
if ( !newcur ) {
fprintf(stderr, " realloc failed! abort\n"); return(1);
}
return 0;
}
void list_lower_level( List *l ) {
if ( !l )
return;
if (!l->level) {
free(l->current); /* calm -lefence */
l->current = NULL; /* could be important */
} else {
l->current = (Element **)realloc(l->current, l->level*sizeof(Element *));
}
l->level--;
g_debug(fprintf(stderr, " level--=%d current[]=%p\n", l->level,
l->current);)
}
/* returns the next item data */
void *list_next( List *l, void *data ) {
Element *temp;
if ( !l || !(temp = list_element_from_data(l, data)) )
return NULL;
if( !temp->next ) return NULL;
return (temp->next->data);
}
/* returns the previous item data */
void *list_prev( List *l, void *data ) {
Element *temp;
if ( !l || !(temp = list_element_from_data(l, data)) )
return NULL;
if( !temp->previous ) return NULL;
return (temp->previous->data);
}
/* Similar to qsort. Sorts list, using the (*compare) function, which is
provided by the user. The comparison function must return an integer less
than, equal to, or greater than zero if the first argument is considered to
be respectively less than, equal to, or greater than the second.
Uses the bubble sort algorithm.
*/
void list_sort( List *l, int (*compare)(const void *, const void *) ) {
Element *temp, *prev;
int i, sorted;
progress_counter_t *pc = NULL;
if ( !l )
return;
/* start progress meter, sorting is slow for huge number of elements */
/* l->n is the worst case, real time is less or equal estimated time */
pc = open_progress(l->n,"list_sort");
for (i = 0; i < l->n; i++ ) {
sorted = 1; /* Flag for early break */
for ( temp = l->start.next->next;
temp != NULL && temp != &l->stop; temp = temp->next ) {
if ( temp->previous == &l->start ) continue;
if ( compare((const void *)temp->previous->data,
(const void *)temp->data) > 0 ) {
sorted = 0; /* rest flag */
/* swap with the previous node */
prev = temp->previous;
prev->previous->next = temp;
temp->next->previous = prev;
temp->previous = prev->previous;
prev->next = temp->next;
prev->previous = temp;
temp->next = prev;
/* and make sure the node in the for loop is correct */
temp = prev;
#ifdef SLOWER_BUT_KEEP_BY_NOW
/* this is a slower version, but guaranteed to work */
void *data;
data = temp->data;
prev = temp->previous;
list_del(l, data);
list_ins(l, prev->data, data);
temp = prev;
#endif
}
}
if (sorted) break;
progress(i,pc); /* progress meter */
}
close_progress(pc);
g_debug(fprintf(stderr, "list_sort()\n");)
}
/* calls free_data() for each data in list l,
* before free list with list_free() */
int list_and_data_free( List *l, void (*free_data)(void *data)) {
void *data;
if ( !l ) return 0;
if ( !free_data ) return 1;
for_each_data(l) {
if ((data = list_get_current(l)))
free_data(data);
} end_for_each(l);
list_free(l);
g_debug(fprintf(stderr, "list_and_data_free()\n");)
return 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,85 @@
// test routines - faster to compile
#include <stdlib.h>
#include <stdio.h>
#include "pgm2asc.h"
#include "unicode.h"
#include "amiga.h"
#include "gocr.h"
// for learn_mode/analyze_mode high, with, yoffset, num of pattern_i,
// - holes (center,radius in relative coordinates) etc. => cluster analyze
// num_hole => min-volume, tolerance border
// pattern: @@ @. @@
// .@ @. ..
// regular filter for large resolutions to make edges more smooth (on boxes)
// extra-filter (only if not recognized?)
// map + same color to (#==change)
// - anti color
// . not used
// strongest neighbour pixels (3x3) => directions
// second/third run with more and more tolerance!?
/* FIXME jb: following is unused */
#if 0
struct lobj { // line-object (for fitting to near lines)
int x0,y0; // starting point (left up)
int x1,y1; // end point (right down)
int mt; // minimum thickness
int q; // quality, overlapp
};
/* FIXME jb global */
struct lobj obj1;
#endif
// that is the first draft of feature extraction
// detect main lines and bows
// seems bad implemented, looking for better algorithms (ToDo: use autotrace)
#define MAXL 10
void ocr2(pix *b,int cs){
int x1,y1,x2,y2,l,i,j,xa[MAXL],ya[MAXL],xb[MAXL],yb[MAXL],ll[MAXL];
for(i=0;i<MAXL;i++)xa[i]=ya[i]=xb[i]=yb[i]=ll[i]=0;
for(x1=0;x1<b->x;x1++) // very slowly, but simple to program
for(y1=0;y1<b->y;y1++) // brute force
for(x2=0;x2<b->x;x2++)
for(y2=y1+1;y2<b->y;y2++)
{
if( get_line2(x1,y1,x2,y2,b,cs,100)>99 )
{ // line ???
l=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1); // len
for(i=0;i<MAXL;i++)
{ // remove similar lines (same middle point) IMPROVE IT !!!!!! ???
if(
abs(x1+x2-xa[i]-xb[i])<1+b->x/2
&& abs(y1+y2-ya[i]-yb[i])<1+b->y/2
&& abs(y1-ya[i])<1+b->y/4
&& abs(x1-xa[i])<1+b->x/4
)
{
if( l>ll[i] )
{
for(j=i;j<MAXL-1;j++)
{ // shift table
xa[j]=xa[j+1];ya[j]=ya[j+1];
xb[j]=xb[j+1];yb[j]=yb[j+1];ll[j]=ll[j+1];
}
ll[MAXL-1]=0;
}
else break; // forget it if shorter
}
if( l>ll[i] ){ // insert if larger
for(j=MAXL-1;j>i;j--){ // shift table
xa[j]=xa[j-1];ya[j]=ya[j-1];
xb[j]=xb[j-1];yb[j]=yb[j-1];ll[j]=ll[j-1];
}
xa[i]=x1;ya[i]=y1;xb[i]=x2;yb[i]=y2;ll[i]=l;
break;
}
}
}
}
for(i=0;i<MAXL;i++){
printf(" %2d %2d %2d %2d %3d\n",xa[i],ya[i],xb[i],yb[i],ll[i]);
}
}

View File

@@ -0,0 +1,289 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
the following code was send by Ryan Dibble <dibbler@umich.edu>
The algorithm is very simple but works good hopefully.
Compare the grayscale histogram with a mass density diagram:
I think the algorithm is a kind of
divide a body into two parts in a way that the mass
centers have the largest distance from each other,
the function is weighted in a way that same masses have a advantage
- otsu algorithm is failing on diskrete multi color images
TODO:
RGB: do the same with all colors (CMYG?) seperately
test: hardest case = two colors
bbg: test done, using a two color gray file. Output:
# threshold: Value = 43 gmin=43 gmax=188
my changes:
- float -> double
- debug option added (vvv & 1..2)
- **image => *image, &image[i][1] => &image[i*cols+1]
- do only count pixels near contrast regions
this makes otsu much better for shadowed fonts or multi colored text
on white background
(m) Joerg Schulenburg (see README for email address)
ToDo:
- measure contrast
- detect low-contrast regions
*/
#include <stdio.h>
#include <string.h>
#define Abs(x) ((x<0)?-(x):x)
/*======================================================================*
* global thresholding routine *
* takes a 2D unsigned char array pointer, number of rows, and *
* number of cols in the array. returns the value of the threshold *
* x0,y0,x0+dx,y0+dy are the edgepoints of the interesting region *
* vvv is the verbosity for debugging purpose *
*======================================================================*/
int
otsu (unsigned char *image, int rows, int cols,
int x0, int y0, int dx, int dy, int vvv) {
unsigned char *np; // pointer to position in the image we are working with
unsigned char op1, op2; // predecessor of pixel *np (start value)
int maxc=0; // maximum contrast (start value)
int thresholdValue=1; // value we will threshold at
int ihist[256]; // image histogram
int chist[256]; // contrast histogram
int i, j, k; // various counters
int is, i1, i2, ns, n1, n2, gmin, gmax;
double m1, m2, sum, csum, fmax, sb;
// zero out histogram ...
memset(ihist, 0, sizeof(ihist));
memset(chist, 0, sizeof(chist));
op1=op2=0;
gmin=255; gmax=0; k=dy/512+1;
// v0.43 first get max contrast, dont do it together with next step
// because it failes if we have pattern as background (on top)
for (i = 0; i < dy ; i+=k) {
np = &image[(y0+i)*cols+x0];
for (j = 0; j < dx ; j++) {
ihist[*np]++;
if(*np > gmax) gmax=*np;
if(*np < gmin) gmin=*np;
if (Abs(*np-op1)>maxc) maxc=Abs(*np-op1); /* new maximum contrast */
if (Abs(*np-op2)>maxc) maxc=Abs(*np-op2); /* new maximum contrast */
/* we hope that maxc will be find its maximum very fast */
op2=op1; /* shift old pixel to next older */
op1=*np; /* store old pixel for contrast check */
np++; /* next pixel */
}
}
// generate the histogram
// Aug06 images with large white or black homogeneous
// areas give bad results, so we only add pixels on contrast edges
for (i = 0; i < dy ; i+=k) {
np = &image[(y0+i)*cols+x0];
for (j = 0; j < dx ; j++) {
if (Abs(*np-op1)>maxc/4
|| Abs(*np-op2)>maxc/4)
chist[*np]++; // count only relevant pixels
op2=op1; /* shift old pixel to next older */
op1=*np; /* store old pixel for contrast check */
np++; /* next pixel */
}
}
// set up everything
sum = csum = 0.0;
ns = 0;
is = 0;
for (k = 0; k <= 255; k++) {
sum += (double) k * (double) chist[k]; /* x*f(x) cmass moment */
ns += chist[k]; /* f(x) cmass */
is += ihist[k]; /* f(x) imass */
// Debug: output to out_hist.dat?
// fprintf(stderr,"\chistogram %3d %6d (brightness weight)", k, ihist[k]);
}
if (!ns) {
// if n has no value we have problems...
fprintf (stderr, "NOT NORMAL, thresholdValue = 160\n");
return (160);
}
// ToDo: only care about extremas in a 3 pixel environment
// check if there are more than 2 mass centers (more colors)
// return object colors and color radius instead of threshold value
// also the reagion, where colored objects are found
// what if more than one background color? no otsu at all?
// whats background? box with lot of other boxes in it
// threshold each box (examples/invers.png,colors.png)
// get maximum white and minimum black pixel color (possible range)
// check range between them for low..high contrast ???
// typical scenes (which must be covered):
// - white page with text of different colors (gray values)
// - binear page: background (gray=1) + black text (gray=0)
// - text mixed with big (dark) images
// ToDo: recursive clustering for maximum multipol moments?
// idea: normalize ihist to max=1024 before otsu?
// do the otsu global thresholding method
if ((vvv&1)) // Debug
fprintf(stderr,"# threshold: value ihist chist mass_dipol_moment\n");
fmax = -1.0;
n1 = 0;
for (k = 0; k < 255; k++) {
n1 += chist[k]; // left mass (integration)
if (!n1) continue; // we need at least one foreground pixel
n2 = ns - n1; // right mass (num pixels - left mass)
if (n2 == 0) break; // we need at least one background pixel
csum += (double) k *chist[k]; // left mass moment
m1 = csum / n1; // left mass center (black chars)
m2 = (sum - csum) / n2; // right mass center (white background)
// max. dipol moment?
// orig: sb = (double) n1 *(double) n2 * (m1 - m2) * (m1 - m2);
sb = (double) n1 *(double) n2 * (m2 - m1); // seems to be better Aug06
/* bbg: note: can be optimized. */
if (sb > fmax) {
fmax = sb;
thresholdValue = k + 1;
// thresholdValue = (m1 + 3 * m2) / 4;
}
if ((vvv&1) && ihist[k]) // Debug
fprintf(stderr,"# threshold: %3d %6d %6d %8.2f\n",
k, ihist[k], chist[k],
sb/(dx*dy)); /* normalized dipol moment */
}
// ToDo: error = left/right point where sb is 90% of maximum?
// now we count all pixels for background detection
i1 = 0;
for (k = 0; k < thresholdValue; k++) {
i1 += ihist[k]; // left mass (integration)
}
i2 = is - i1; // right mass (num pixels - left mass)
// at this point we have our thresholding value
// black_char: value<cs, white_background: value>=cs
// can it happen? check for sureness
if (thresholdValue > gmax) {
fprintf(stderr,"# threshold: Value >gmax\n");
thresholdValue = gmax;
}
if (thresholdValue <= gmin) {
fprintf(stderr,"# threshold: Value<=gmin\n");
thresholdValue = gmin+1;
}
// debug code to display thresholding values
if ( vvv & 1 )
fprintf(stderr,"# threshold: Value = %d gmin=%d gmax=%d cmax=%d"
" b/w= %d %d\n",
thresholdValue, gmin, gmax, maxc, i1, i2);
// this is a primitive criteria for inversion and should be improved
// old: i1 >= 4*i2, but 0811qemu1.png has a bit above 1/4
if (2*i1 > 7*i2) { // more black than white, obviously black is background
if ( vvv & 1 )
fprintf(stderr,"# threshold: invert the image\n");
// we do inversion here (no data lost)
for (i = 0; i < dy ; i++) {
np = &image[(y0+i)*cols+x0];
for (j = 0; j < dx ; j++) {
*np=255-*np;
np++; /* next pixel */
}
}
thresholdValue=255-thresholdValue+1;
}
return(thresholdValue);
/* range: 0 < thresholdValue <= 255, example: 1 on b/w images */
/* 0..threshold-1 is foreground */
/* threshold..255 is background */
/* ToDo: min=blackmasscenter/2,thresh,max=(whitemasscenter+255)/2 */
}
/*======================================================================*/
/* thresholding the image (set threshold to 128+32=160=0xA0) */
/* now we have a fixed thresholdValue good to recognize on gray image */
/* - so lower bits can used for other things (bad design?) */
/* ToDo: different foreground colors, gray on black/white background */
/*======================================================================*/
int
thresholding (unsigned char *image, int rows, int cols,
int x0, int y0, int dx, int dy, int thresholdValue) {
unsigned char *np; // pointer to position in the image we are working with
int i, j; // various counters
int gmin=255,gmax=0;
int nmin=255,nmax=0;
// calculate min/max (twice?)
for (i = y0 + 1; i < y0 + dy - 1; i++) {
np = &image[i*cols+x0+1];
for (j = x0 + 1; j < x0 + dx - 1; j++) {
if(*np > gmax) gmax=*np;
if(*np < gmin) gmin=*np;
np++; /* next pixel */
}
}
/* allowed_threshold=gmin+1..gmax v0.43 */
if (thresholdValue<=gmin || thresholdValue>gmax){
thresholdValue=(gmin+gmax+1)/2; /* range=0..1 -> threshold=1 */
fprintf(stderr,"# thresholdValue out of range %d..%d, reset to %d\n",
gmin, gmax, thresholdValue);
}
/* b/w: min=0,tresh=1,max=1 v0.43 */
// actually performs the thresholding of the image...
// later: grayvalues should also be used, only rescaling threshold=160=0xA0
// sometimes images have no contrast (thresholdValue == gmin)
for (i = y0; i < y0+dy; i++) {
np = &image[i*cols+x0];
for (j = x0; j < x0+dx; j++) {
*np = (unsigned char) (*np >= thresholdValue || thresholdValue == gmin ?
(255-(gmax - *np)* 80/(gmax - thresholdValue + 1)) :
( 0+(*np - gmin)*150/(thresholdValue - gmin )) );
if(*np > nmax) nmax=*np;
if(*np < nmin) nmin=*np;
np++;
}
}
// fprintf(stderr,"# thresholding: nmin=%d nmax=%d\n", nmin, nmax);
return(128+32); // return the new normalized threshold value
/* 0..159 is foreground */
/* 160..255 is background */
}

View File

@@ -0,0 +1,289 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL address
*/
#include <string.h>
#include "unicode.h"
#include "output.h"
#include "pcx.h"
#include "gocr.h" /* extern job_t JOB; */
/* function is only for debugging and for developing
it prints out a part of pixmap b at point x0,y0 to stderr
using dots .,; if no pixel, and @xoO for pixels
modify n_run and print out what would happen on 2nd, 3th loop!
new: output original and copied pixmap in the same figure
*/
void out_b(struct box *px, pix *b, int x0, int y0, int dx, int dy, int cs ){
int x,y,x2,y2,yy0,tx,ty,n1,i;
char c1, c2;
yy0=y0;
if (px) { /* overwrite rest of arguments */
if (!b) {
b=px->p;
x0=px->x0; dx=px->x1-px->x0+1;
y0=px->y0; dy=px->y1-px->y0+1; yy0=y0;
}
if(cs==0) cs=JOB->cfg.cs;
fprintf(stderr,"\n# list box x= %4d %4d d= %3d %3d r= %3d %3d"
" nrun=%d p=%p", /* ToDo: r,nrun is obsolete */
px->x0, px->y0, px->x1 - px->x0 + 1, px->y1 - px->y0 + 1,
px->x - px->x0, px->y - px->y0, JOB->tmp.n_run, (void*)px);
fprintf(stderr,"\n# dots=%d boxes=%d subboxes=%d c=%s mod=%s"
" line=%d m= %d %d %d %d",
px->dots, px->num_boxes, px->num_subboxes,
decode(px->c,ASCII), decode(px->modifier,ASCII), px->line,
px->m1 - px->y0, px->m2 - px->y0, px->m3 - px->y0, px->m4 - px->y0);
if (px->num_frames) {
int i,j,jo;
fprintf(stderr,"\n# frames= %d (sumvects=%d)",px->num_frames,
((px->num_frames)?px->num_frame_vectors[px->num_frames-1]:-1));
for (jo=j=i=0; i<px->num_frames; i++, jo=j) {
fprintf(stderr,"\n# frame %d (%+4d,%3d,%2d) ",
i, px->frame_vol[i], px->frame_per[i],
px->num_frame_vectors[i]-jo);
/* print only the first vectors of each frame */
for (;j<px->num_frame_vectors[i] && j<MaxFrameVectors; j++)
fprintf(stderr," #%02d %2d %2d", j,
px->frame_vector[j][0] - px->x0,
px->frame_vector[j][1] - px->y0);
}
}
if (px->num_ac){ /* output table of chars and its probabilities */
fprintf(stderr,"\n# list box char: ");
for(i=0;i<px->num_ac && i<NumAlt;i++)
/* output the (xml-)string (picture position, barcodes, glyphs, ...) */
if (px->tas[i])
fprintf(stderr," %s(%d)", px->tas[i] ,px->wac[i]);
else
fprintf(stderr," %s(%d)",decode(px->tac[i],ASCII),px->wac[i]);
}
fprintf(stderr,"\n");
if (px->m2 && px->m1<y0 && (px->dots || y0>px->m2)) {
yy0=px->m1; dy=px->y1-yy0+1;
}
}
tx=dx/80+1;
ty=dy/40+1; /* step, usually 1, but greater on large maps */
fprintf(stderr,"# list pattern x= %4d %4d d= %3d %3d t= %d %d yy0= %d\n",
x0,y0,dx,dy,tx,ty,yy0);
if (dx>0)
for(y=yy0;y<yy0+dy;y+=ty) { /* reduce the output to max 78x40 */
/* first image is the copied and modified bitmap of the box */
if (px)
for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
n1=0; c1='.';
for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
for(x2=x;x2<x+tx && x2<x0+dx;x2++)
{
if((getpixel(px->p,x2-x0+px->x0,
y2-y0+px->y0)<cs)) c1='@';
}
if (px->num_frames) { /* mark vectors */
int i;
if (c1!='$' && c1!='S') /* dont mark twice */
for (i=0;i<px->num_frame_vectors[px->num_frames-1];i++)
if ((px->frame_vector[i][0]-px->x0)/tx==(x-x0)/tx
&& (px->frame_vector[i][1]-px->y0)/ty==(y-y0)/ty)
{ c1=((c1=='@')?'$':'S'); break; }
}
fprintf(stderr,"%c", c1 );
}
/* 2nd image is the boxframe in the original bitmap */
if (dx<40) fprintf(stderr," ");
if (dx<40) /* do it only, if we have enough place */
for(x=x0;x<x0+dx;x+=tx){ /* by merging sub-pixels */
c1='.';
for(y2=y;y2<y+ty && y2<y0+dy;y2++) /* sub-pixels */
for(x2=x;x2<x+tx && x2<x0+dx;x2++)
{ if((getpixel(b,x2,y2)<cs)) c1='@'; }
fprintf(stderr,"%c", c1 );
}
c1=c2=' ';
/* mark lines with < */
if (px) if (y-y0+px->y0==px->m1 || y-y0+px->y0==px->m2
|| y-y0+px->y0==px->m3 || y-y0+px->y0==px->m4) c1='<';
if (y==y0 || y==yy0+dy-1) c2='-'; /* boxmarks */
fprintf(stderr,"%c%c\n",c1,c2);
}
}
/* same as out_b, but for faster use, only a box as argument
*/
void out_x(struct box *px) {
out_b(px,NULL,0, 0, 0, 0, JOB->cfg.cs);
}
/* print out two boxes side by side, for debugging comparision algos */
void out_x2(struct box *box1, struct box *box2){
int x,y,i,tx,ty,dy;
/*FIXME jb static*/static char *c1="OXXXXxx@.,,,,,,,";
pix *b=&JOB->src.p;
dy=(box1->y1-box1->y0+1);
if(dy<box2->y1-box2->y0+1)dy=box2->y1-box2->y0+1;
tx=(box1->x1-box1->x0)/40+1;
ty=(box1->y1-box1->y0)/40+1; /* step, usually 1, but greater on large maps */
if(box2)fprintf(stderr,"\n# list 2 patterns");
for(i=0;i<dy;i+=ty) { /* reduce the output to max 78x40??? */
fprintf(stderr,"\n"); y=box1->y0+i;
for(x=box1->x0;x<=box1->x1;x+=tx)
fprintf(stderr,"%c", c1[ ((getpixel(b,x,y)<JOB->cfg.cs)?0:8)+marked(b,x,y) ] );
if(!box2) continue;
fprintf(stderr," "); y=box2->y0+i;
for(x=box2->x0;x<=box2->x1;x+=tx)
fprintf(stderr,"%c", c1[ ((getpixel(b,x,y)<JOB->cfg.cs)?0:8)+marked(b,x,y) ] );
}
}
/* ---- list output ---- for debugging ---
* list all boxes where the results can be found within the c-option
*/
int output_list(job_t *job) {
int i = 0, j;
struct box *box2;
pix *pp = &job->src.p;
char *lc = job->cfg.lc;
fprintf(stderr,"\n# list shape for charlist %s",lc);
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *) list_get_current(&(JOB->res.boxlist));
for (j=0; j<box2->num_ac; j++)
if (!lc || (box2->tac[j] && strchr(lc, box2->tac[j]))
|| (box2->tas[j] && strstr(lc, box2->tas[j]))) break;
if (j<box2->num_ac)
fprintf(stderr,"\n# box found in charlist");
if (!lc || (strchr(lc, box2->c) && box2->c < 256 && box2->c)
|| (strchr(lc, '_') && box2->c==UNKNOWN) /* for compability */
|| j<box2->num_ac ){ /* also list alternative chars */
if (!pp) pp=box2->p;
fprintf(stderr,
"\n# list shape %3d x=%4d %4d d= %3d %3d vf=%d ac=%d %04x %s",
i, box2->x0, box2->y0,
box2->x1 - box2->x0 + 1,
box2->y1 - box2->y0 + 1,
box2->num_frames, box2->num_ac,
(int)box2->c, /* wchar_t -> char ???? */
decode(box2->c,ASCII) );
if (JOB->cfg.verbose & 4) out_x(box2);
}
i++;
} end_for_each(&(JOB->res.boxlist));
fprintf(stderr,"\n");
return 0;
}
/* --- output of image incl. corored lines usefull for developers ---
* debugging
* bit 0+1 is used for color coding (optical marker)
* color/gray: 0x01=red, 0x02=blue, 0x04=green???
* opt: 1 - mark unknown boxes red (first pass)
* 2 - mark unknown boxes more red (final pass)
* 4 - mark lines blue
* 8 - reset coloring (remove old marker)
*/
int debug_img(char *fname, struct job_s *job, int opt) {
struct box *box2;
int x, y, ic, dx, i, j, col;
unsigned char *np;
pix *pp = &job->tmp.ppo;
if ( opt & 8 ) { /* clear debug bits in image */
for(y=0;y<pp->y;y++) {
np=&pp->p[(pp->x)*y];
for(x=0;x<pp->x;x++) {
*np = *np & 0xF1;
np++;
}
}
}
/* mark longest line which was used to estimate the rotation angle */
if ((job->cfg.verbose&32) && job->res.lines.dx)
for(i=0;i<pp->x;i++) {
y=pp->y/2;
if (job->res.lines.dx) y+=job->res.lines.dy*i/job->res.lines.dx;
x=i;
if (x<0 || x>=pp->x || y<0 || y>=pp->y) continue;
np=&pp->p[x + (pp->x)*y];
if (*np<160) continue;
if((x&7)<5 && !(x&1)) /* dotted line */
put(pp,x,y,255,8);
}
ic = ((opt & 2) ? 1 : 2); /* obsolete */
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *) list_get_current(&(job->res.boxlist));
/* mark boxes in 32=0x40=blue */
if (box2->c == ' ' || box2->c == '\n') continue;
/* mark chars with green left and under line */
col = 4; /* green */
if (box2->c == UNKNOWN && (opt & 3)) col=2; /* red */
if (box2->x0>1)
for (y = box2->y0; y <= box2->y1; y++) {
np=&pp->p[box2->x0-1 + y * pp->x]; if (*np<160) continue; *np|=col; }
if (box2->y1+1<pp->y)
for (x = box2->x0; x <= box2->x1; x++) {
np=&pp->p[x + (box2->y1+1) * pp->x]; if (*np<160) continue; *np|=col; }
/* mark pictures by green cross */
if (box2->c == PICTURE)
for (x = 0; x < box2->x1-box2->x0+1; x++){
y=(box2->y1-box2->y0+1)*x/(box2->x1-box2->x0+1);
pp->p[(box2->x0+x) + (box2->y0+y) * pp->x] |= 4;
pp->p[(box2->x1-x) + (box2->y0+y) * pp->x] |= 4;
}
} end_for_each(&(job->res.boxlist));
if( opt & 4 )
{
struct tlines *lines = &job->res.lines;
int yr;
if (job->cfg.verbose)
fprintf(stderr, "# mark lines for %s.ppm\n", fname);
/* or read from outside??? */
for (i = 0; i < lines->num; i++) { /* mark lines by 0x08 = blue */
dx = lines->x1[i] - lines->x0[i] + 1;
for (j = -1; j < dx+1; j++) {
x = lines->x0[i] + j;
if (x<0 || x>=pp->x) continue;
for (y=lines->m1[i];y<=lines->m4[i];y++) {
/* box arround m2-m3 */
if (y>=lines->m2[i] && y<=lines->m3[i] && j>-1 && j<dx) continue;
yr = y; /* y.rotated */
if (lines->dx) yr += lines->dy * x / (lines->dx);
if (yr<0 || yr>=pp->y) continue;
np = &(pp->p[x + (pp->x)*yr]);
if (*np<160) continue; /* do not touch dark pixels */
if ((*np&6)!=0) continue; /* only change white pixels */
put(pp, x, yr, 255, 6); /* UPN: 255 and 6 or */
}
}
}
}
if (job->cfg.verbose&1)
fprintf(stderr,"# writing %s.ppm\n", fname);
writeppm(fname, pp);
return 0;
}

View File

@@ -0,0 +1,153 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 1999 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
*/
/* plan: use popen("ppm2pcx -packed ...","w"); for writing pcx */
#include <stdio.h>
#include <stdlib.h>
/* #include <assert.h> */
#include "pcx.h"
typedef unsigned char byte;
#define ERR(x) { fprintf(stderr,"ERROR "__FILE__" L%d: " x "\n",__LINE__);exit(1);}
int err;
/* --- needed for reading PCX-files */
unsigned char read_b(FILE *f1){
unsigned char c=0; c=fgetc(f1); if(feof(f1) || ferror(f1))err=1; return c;
}
/* something here is wrong! */
void readpcx(char *name,pix *p,int vvv){ /* see pcx.format.txt */
int page,pages,nx,ny,i,j,b,x,y,bpl,bits,pal[256][3];
FILE *f1;
unsigned char *pic,h[128],bb,b1,b2,b3;
err=0;
for(i=0;i<256;i++)for(j=0;j<3;j++)pal[i][j]=i;
f1=fopen(name,"rb"); if(!f1) ERR("open");
if(fread(h,1,128,f1)!=128)ERR("read PCX header"); /* 128 Byte lesen -> h[] */
if(h[0]!=10)ERR("no ZSoft sign"); /* ZSoft sign */
if(h[2]> 1)ERR("unknown coding"); /* run length encoding */
bits = h[3]; /* 1 or 8 */
if(bits!=1 && bits!=8)ERR("only 1 or 8 bits supported");
nx = h[ 9]*256+h[ 8] - h[ 5]*256-h[ 4] +1; /* Xmax-Xmin */
ny = h[11]*256+h[10] - h[ 7]*256-h[ 6] +1; /* Ymax-Ymin */
pages=h[65]; bpl=h[66]+256*h[67]; /* bytes per line */
if(vvv)
fprintf(stderr,"# PCX version=%d bits=%d x=%d y=%d HRes=%d VRes=%d\n"
"# NPlanes=%d BytesPerLine=%d Palette=%s",
h[1],bits,nx,ny,h[12]+256*h[13],h[14]+256*h[15],
pages,bpl,((h[68]==1)?"1=color/bw":"2=gray"));
/* line1(NP=4): RRRRR...,GGGG....,BBBBB...,IIII...., line2: RRRR...,GGGG.... */
/* C4 EF = (C4&3F)*EF = EF EF EF EF */
fflush(stdout);
/* palette: for(i=0;i<16;i++) for(j=0;j<3;j++) h[16+3*i+j] */
if(pages>1)for(b=0;b<16;b++) for(i=0;i<16;i++)
for(j=0;j< 3;j++) pal[b*16+i][j]=h[16+3*i+j]>>2;
if(bits>7){
fseek(f1,-3*256,2); if(fread(pal,3,256,f1)!=256)ERR("read palette");
for(i=0;i<256;i++) for(j=0;j<3;j++) pal[i][j]>>=2;
}
fseek(f1,128,0);
pic=(unsigned char *)malloc( nx*ny );
if(pic==NULL)ERR("no memory"); /* no memory */
x=y=0;
do {
for(page=0;page<pages;page++) /* 192 == 0xc0 => b1=counter */
do {
b1=1; bb=read_b(f1); b2=bb; if(b1==192)fprintf(stderr,"?");
if((b2>=192) && (h[2]==1)){b1=b2&63;bb=read_b(f1);b2=bb;}
if(err){fprintf(stderr,"\nread error x=%d y=%d\n",x,y);x=nx;y=ny;break;}
for(b3=0;b3<b1;b3++)for(b=0;b<8;b+=bits,x++)if(x<nx){
bb=(b2>>(8-bits-b)) & ~((~0)<<bits);
if(bits==1 && bb==1) bb=240;
if(page==0) pic[x+nx*y] =(byte)bb;
else pic[x+nx*y]|=(byte)bb<<(page*bits);
}
} while(x<(9-bits)*bpl); x=0; y++;
} while(y<ny);
/* */
fclose(f1);
p->p=pic; p->x=nx; p->y=ny; p->bpp=1;
if(vvv)fprintf(stderr,"\n");
}
/* -----------------------------------------------------------------------
// write bmp 8bit palette no RLE
// bit 2+3 used for color coding (markers)
// replaced by writeppm (ppm.gz) and is obsolate now, removed later
*/
void writebmp(char *name,pix p,int vvv){ /* see pcx.format.txt */
int nx,ny,i,y,rest[4]={0,0,0,0};
FILE *f1;
/*FIXME jb static*/static unsigned char *pic, h[54+4*256];
long fs,fo,hs,is; /* filesize, offset, headersize, imagesize */
nx=p.x; ny=p.y; pic=p.p;
if (nx&3) nx+=4-(nx&3); /* must be mod4 ? */
hs=40; /* bmi headersize fix */
is=nx*ny; /* imagesize */
fo=14+hs+4*256;
fs=fo+is;
for(i=0;i<54;i++){ h[i]=0; }
/* BITMAPFILEHEADER */
h[ 0]='B'; h[ 1]='M'; /* type of file BMP */
h[ 2]= fs &255; h[ 3]=(fs>> 8)&255;
h[ 4]=(fs>>16)&255; h[ 5]=(fs>>24)&255; /* size of file */
h[10]= fo &255; h[11]=(fo>> 8)&255;
h[12]=(fo>>16)&255; h[13]=(fo>>24)&255; /* offset to image data */
/* BITMAPINFO (BITMAPCOREHEADER not used here) */
/* 14 - HEADER */
h[14]= hs &255; h[15]=(hs>> 8)&255;
h[16]=(hs>>16)&255; h[17]=(hs>>24)&255; /* bmi-header size */
h[18]= nx &255; h[19]=(nx>> 8)&255;
h[20]=(0l>>16)&255; h[21]=(0l>>24)&255; /* WIDTH/pixel */
h[22]= ny &255; h[23]=(ny>> 8)&255;
h[24]=(0l>>16)&255; h[25]=(0l>>24)&255; /* HIGH/pixel */
h[26]=1; /* planes */
h[28]=8; /* bits/pixel 1,4,8,24 */
h[30]=0; /* compression */
h[34]= is &255; h[35]=(is>> 8)&255;
h[36]=(is>>16)&255; h[37]=(is>>24)&255; /* sizeImage (can be 0 if ~RLE) */
h[38]=0;h[39]=1; /* ca 100dpi, x/meter */
h[42]=0;h[43]=1; /* y/meter */
h[46]=0;h[47]=1; /* colorused (0=maximum) */
h[50]=0;h[51]=1; /* colorimportand (0=all) */
/* 54 - endofheader */
for(i=0;i<256;i++){
h[54+4*i+0]=((~((i & 2)*64)) & (i & (128+64)))|63;
h[54+4*i+1]=((~((i & 2)*64)) & (~((i & 4)*32)) & (i & (128+64)))|63;
h[54+4*i+2]=( ((i & 2)* 8) | ((~((i & 4)*32)) & (i & (128+64)))|63);
} /* blue-green-red */
f1=fopen(name,"wb"); if(!f1) fprintf(stderr," error opening file\n");
if(!f1)ERR("open"); /* open-error */
if(fwrite(h,1,54+4*256,f1)!=54+4*256)ERR("write head");
if(vvv) fprintf(stderr,"# write BMP x=%d y=%d\n",nx,ny);
for(y=ny-1;y>=0;y--){
if(((int)fwrite(pic+p.x*y,1,p.x,f1))!=p.x)ERR("write");
if(nx>p.x)
if(((int)fwrite(rest,1,nx-p.x,f1))!=nx-p.x)ERR("write");
}
fclose(f1);
}
/* ---------------------------------------------------------------------- */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,537 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2006 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Joerg.Schulenburg@physik.uni-magdeburg.de */
/* Filter by tree, filter by number methods added by
* William Webber, william@williamwebber.com. */
#include "pgm2asc.h"
#include <assert.h>
#include <string.h>
/*
* Defining this causes assert() calls to be turned off runtime.
*
* This is normally taken care of by make.
*/
/* #define NDEBUG */
// ------------------ (&~7)-pixmap-functions ------------------------
/* test if pixel marked?
* Returns: 0 if not marked, least 3 bits if marked.
*/
int marked (pix * p, int x, int y) {
if (x < 0 || y < 0 || x >= p->x || y >= p->y)
return 0;
return (pixel_atp(p, x, y) & 7);
}
#define Nfilt3 6 /* number of 3x3 filter */
/*
* Filters to correct possible scanning or image errors.
*
* Each of these filters represents a 3x3 pixel area.
* 0 represents a white or background pixel, 1 a black or
* foreground pixel, and 2 represents a pixel of either value.
* Note that this differs from the meaning of pixel values in
* the image, where a high value means "white" (background),
* and a low value means "black" (foreground).
*
* These filters are applied to the 3x3 environment of a pixel
* to be retrieved from the image, centered around that pixel
* (that is, the to-be-retrieved pixel corresponds with the
* the fifth position of the filter).
* If the filter matches that pixel environment, then
* the returned value of the pixel is inverted (black->white
* or white->black).
*
* So, for instance, the second filter below matches this
* pattern:
*
* 000
* X0X
* 000
*
* and "fills in" the middle (retrieved) pixel to rejoin a line
* that may have been broken by a scanning or image error.
*/
const char filt3[Nfilt3][9]={
{0,0,0, 0,0,1, 1,0,0}, /* (-1,-1) (0,-1) (1,-1) (-1,0) (0,0) ... */
{0,0,0, 1,0,1, 0,0,0},
{1,0,0, 0,0,1, 0,0,0},
{1,1,0, 0,1,0, 2,1,1},
{0,0,1, 0,0,0, 2,1,0},
{0,1,0, 0,0,0, 1,2,0}
};
/* 2=ignore_pixel, 0=white_background, 1=black_pixel */
/*
* Filter by matrix uses the above matrix of filters directly. Pixel
* environments to be filtered are compared pixel by pixel against
* these filters.
*
* Filter by number converts these filters into integer representations
* and stores them in a table. Pixel environments are similarly
* converted to integers, and looked up in the table.
*
* Filter by tree converts these filters into a binary tree. Pixel
* environments are matched by traversing the tree.
*
* A typical performance ratio for these three methods is 20:9:7
* respectively (i.e., the tree method takes around 35% of the
* time of the matrix method).
*/
#define FILTER_BY_MATRIX 0
#define FILTER_BY_NUMBER 1
#define FILTER_BY_TREE 2
#define FILTER_METHOD FILTER_BY_TREE
/*
* Defining FILTER_CHECKED causes filter results from either the tree
* or the number method to be checked against results of the other
* two methods to ensure correctness. This is for bug checking purposes
* only.
*/
/* #define FILTER_CHECKED */
/*
* Defining FILTER_STATISTICS causes statistics to be kept on how many
* times the filters are tried, how many times a filter matches, and
* of these matches how many flip a black pixel to white, and how many
* the reverse. These statistics are printed to stderr at the end of
* the program run. Currently, statistics are only kept if the tree
* filter method is being used.
*/
/* #define FILTER_STATISTICS */
#ifdef FILTER_STATISTICS
static int filter_tries = 0;
static int filter_matches = 0;
static int filter_blackened = 0;
static int filter_whitened = 0;
#endif
#ifdef FILTER_STATISTICS
void print_filter_stats() {
fprintf(stderr, "\n# Error filter statistics: tries %d, matches %d, "
"blackened %d, whitened %d\n",
filter_tries, filter_matches, filter_blackened, filter_whitened);
}
#endif
#if FILTER_METHOD == FILTER_BY_MATRIX || defined(FILTER_CHECKED)
/*
* Filter the pixel at (x,y) by directly applying the matrix.
*/
int pixel_filter_by_matrix(pix * p, int x, int y) {
int i;
static char c33[9];
memset(c33, 0, sizeof(c33));
/* copy environment of a point (only highest bit)
bbg: FASTER now. It has 4 ifs less at least, 8 at most. */
if (x > 0) { c33[3] = pixel_atp(p,x-1, y )>>7;
if (y > 0) c33[0] = pixel_atp(p,x-1,y-1)>>7;
if (y+1 < p->y) c33[6] = pixel_atp(p,x-1,y+1)>>7;
}
if (x+1 < p->x) { c33[5] = pixel_atp(p,x+1, y )>>7;
if (y > 0) c33[2] = pixel_atp(p,x+1,y-1)>>7;
if (y+1 < p->y) c33[8] = pixel_atp(p,x+1,y+1)>>7;
}
if (y > 0) c33[1] = pixel_atp(p, x ,y-1)>>7;
c33[4] = pixel_atp(p, x , y )>>7;
if (y+1 < p->y) c33[7] = pixel_atp(p, x ,y+1)>>7;
/* do filtering */
for (i = 0; i < Nfilt3; i++)
if( ( (filt3[i][0]>>1) || c33[0]!=(1 & filt3[i][0]) )
&& ( (filt3[i][1]>>1) || c33[1]!=(1 & filt3[i][1]) )
&& ( (filt3[i][2]>>1) || c33[2]!=(1 & filt3[i][2]) )
&& ( (filt3[i][3]>>1) || c33[3]!=(1 & filt3[i][3]) )
&& ( (filt3[i][4]>>1) || c33[4]!=(1 & filt3[i][4]) )
&& ( (filt3[i][5]>>1) || c33[5]!=(1 & filt3[i][5]) )
&& ( (filt3[i][6]>>1) || c33[6]!=(1 & filt3[i][6]) )
&& ( (filt3[i][7]>>1) || c33[7]!=(1 & filt3[i][7]) )
&& ( (filt3[i][8]>>1) || c33[8]!=(1 & filt3[i][8]) ) ) {
return ((filt3[i][4])?JOB->cfg.cs:0);
}
return pixel_atp(p, x, y) & ~7;
}
#endif
#if FILTER_METHOD == FILTER_BY_NUMBER || defined(FILTER_CHECKED)
#define NUM_TABLE_SIZE 512 /* max value of 9-bit value */
/*
* Recursively generates entries in the number table for a matrix filter.
*
* gen_num_filt is the number representation of the matrix filter.
* This generation is handled recursively because this is the easiest
* way to handle 2 (either value) entries in the filter, which lead
* to 2 distinct entries in the number table (one for each alternate
* value).
*/
void rec_generate_number_table(char * num_table, const char * filter,
int i, unsigned short gen_num_filt) {
if (i == 9) {
/* Invert the value of the number representation, to reflect the
* fact that the "white" is 0 in the filter, 1 (high) in the image. */
gen_num_filt = ~gen_num_filt;
gen_num_filt &= 0x01ff;
assert(gen_num_filt < NUM_TABLE_SIZE);
num_table[gen_num_filt] = 1;
} else {
if (filter[i] == 0 || filter[i] == 2)
rec_generate_number_table(num_table, filter, i + 1, gen_num_filt);
if (filter[i] == 1 || filter[i] == 2) {
gen_num_filt |= (1 << (8 - i));
rec_generate_number_table(num_table, filter, i + 1, gen_num_filt);
}
}
}
/*
* Filter the pixel at (x, y) using a number table.
*
* Each filter can be converted into a 9-bit representation, where
* filters containing 2 (either value) pixels are converted into
* a separate numerical representation for each pixel, where position
* i in the filter corresponds to bit i in the number. Each resulting
* numerical representation N is represented as a 1 value in the Nth
* position of a lookup table. A pixel's environment is converted in
* the same way to a numeric representation P, and that environment
* matches a filter if num_table[P] == 1.
*/
int pixel_filter_by_number(pix * p, int x, int y) {
unsigned short val = 0;
static char num_table[NUM_TABLE_SIZE];
static int num_table_generated = 0;
if (!num_table_generated) {
int f;
memset(num_table, 0, sizeof(num_table));
for (f = 0; f < Nfilt3; f++)
rec_generate_number_table(num_table, filt3[f], 0, 0);
num_table_generated = 1;
}
/* calculate a numeric value for the 3x3 square around the pixel. */
if (x > 0) { val |= (pixel_atp(p,x-1, y )>>7) << (8 - 3);
if (y > 0) val |= (pixel_atp(p,x-1,y-1)>>7) << (8 - 0);
if (y+1 < p->y) val |= (pixel_atp(p,x-1,y+1)>>7) << (8 - 6);
}
if (x+1 < p->x) { val |= (pixel_atp(p,x+1, y )>>7) << (8 - 5);
if (y > 0) val |= (pixel_atp(p,x+1,y-1)>>7) << (8 - 2);
if (y+1 < p->y) val |= (pixel_atp(p,x+1,y+1)>>7) << (8 - 8);
}
if (y > 0) val |= (pixel_atp(p, x ,y-1)>>7) << (8 - 1);
val |= (pixel_atp(p, x , y )>>7) << (8 - 4);
if (y+1 < p->y) val |= (pixel_atp(p, x ,y+1)>>7) << (8 - 7);
assert(val < NUM_TABLE_SIZE);
if (num_table[val])
return (val & (1 << 4)) ? 0 : JOB->cfg.cs;
else
return pixel_atp(p, x, y) & ~7;
}
#endif
#if FILTER_METHOD == FILTER_BY_TREE || defined(FILTER_CHECKED)
#define TREE_ARRAY_SIZE 1024
/* 1+ number of nodes in a complete binary tree of height 10 */
/*
* Recursively generate a tree representation of a filter.
*/
void rec_generate_tree(char * tree, const char * filter, int i, int n) {
assert(i >= 0 && i <= 9);
assert(n < TREE_ARRAY_SIZE);
if (i == 9) {
if (filter[4] == 0)
tree[n] = 2;
else
tree[n] = 1;
return;
}
/* first iteration has n == -1, does not set any values of the tree,
just to find whether to start to the left or the right */
if (n != -1)
tree[n] = 1;
if (filter[i] == 0)
rec_generate_tree(tree, filter, i + 1, n * 2 + 2);
else if (filter[i] == 1)
rec_generate_tree(tree, filter, i + 1, n * 2 + 3);
else {
rec_generate_tree(tree, filter, i + 1, n * 2 + 2);
rec_generate_tree(tree, filter, i + 1, n * 2 + 3);
}
}
/*
* Filter the pixel at (x, y) using the tree method.
*
* Each filter is represented by a single branch of a binary
* tree, except for filters contain "either value" entries, which
* bifurcate at that point in the branch. Each white pixel in the filter
* is a left branch in the tree, each black pixel a right branch. The
* final node of a branch indicates whether this filter turns a white
* pixel black, or a black one white.
*
* We match a pixel's environment against this tree by similarly
* using the pixels in that environment to traverse the tree. If
* we run out of nodes before getting to the end of a branch, then
* the environment doesn't match against any of the filters represented
* by the tree. Otherwise, we return the value specified by the
* final node.
*
* Since the total tree size, even including missing nodes, is small
* (2 ^ 10), we can use a standard array representation of a binary
* tree, where for the node tree[n], the left child is tree[2n + 2],
* and the right tree[2n + 3]. The only information we want
* from a non-leaf node is whether it exists (that is, is part of
* a filter-representing branch). We represent this with the value
* 1 at the node's slot in the array, the contrary by 0. For the
* leaf node, 0 again represents non-existence, 1 that the filter
* represented by this branch turns a black pixel white, and 2 a
* white pixel black.
*/
int pixel_filter_by_tree(pix * p, int x, int y) {
static char tree[TREE_ARRAY_SIZE];
static int tree_generated = 0;
int n;
int pixel_val = pixel_atp(p, x, y) & ~7;
#ifdef FILTER_STATISTICS
static int registered_filter_stats = 0;
if (!registered_filter_stats) {
atexit(print_filter_stats);
registered_filter_stats = 1;
}
filter_tries++;
#endif /* FILTER_STATISTICS */
if (!tree_generated) {
int f;
memset(tree, 0, sizeof(tree));
for (f = 0; f < Nfilt3; f++) {
const char * filter = filt3[f];
rec_generate_tree(tree, filter, 0, -1);
}
tree_generated = 1;
}
n = -1;
/* Note that for the image, low is black, high is white, whereas
* for the filter, 0 is white, 1 is black. For the image, then,
* high (white) means go left, low (black) means go right. */
#define IS_BLACK(_dx,_dy) !(pixel_atp(p, x + (_dx), y + (_dy)) >> 7)
#define IS_WHITE(_dx,_dy) (pixel_atp(p, x + (_dx), y + (_dy)) >> 7)
#define GO_LEFT n = n * 2 + 2
#define GO_RIGHT n = n * 2 + 3
#define CHECK_NO_MATCH if (tree[n] == 0) return pixel_val
/* Top row */
if (y == 0) {
/* top 3 pixels off edge == black == right
n = 2 * (2 * (2 * -1 + 3) + 3) + 3 = 13 */
n = 13;
} else {
if (x == 0 || IS_BLACK(-1, -1))
GO_RIGHT;
else
GO_LEFT;
if (IS_WHITE(0, -1))
GO_LEFT;
else
GO_RIGHT;
CHECK_NO_MATCH;
if (x + 1 == p->x || IS_BLACK(+1, -1))
GO_RIGHT;
else
GO_LEFT;
CHECK_NO_MATCH;
}
/* Second row */
if (x == 0 || IS_BLACK(-1, 0))
GO_RIGHT;
else
GO_LEFT;
CHECK_NO_MATCH;
if (IS_WHITE(0, 0))
GO_LEFT;
else
GO_RIGHT;
CHECK_NO_MATCH;
if (x + 1 == p->x || IS_BLACK(+1, 0))
GO_RIGHT;
else
GO_LEFT;
CHECK_NO_MATCH;
/* bottom row */
if (y + 1 == p->y) {
/* bottom 3 pixels off edge == black == right
n' = 2 * (2 * (2n + 3) + 3) + 3
= 2 * (4n + 9) + 3
= 8n + 21 */
n = 8 * n + 21;
} else {
if (x == 0 || IS_BLACK(-1, +1))
GO_RIGHT;
else
GO_LEFT;
CHECK_NO_MATCH;
if (IS_WHITE(0, 1))
GO_LEFT;
else
GO_RIGHT;
CHECK_NO_MATCH;
if (x + 1 == p->x || IS_BLACK(+1, +1))
GO_RIGHT;
else
GO_LEFT;
}
assert(n < TREE_ARRAY_SIZE);
assert(tree[n] == 0 || tree[n] == 1 || tree[n] == 2);
CHECK_NO_MATCH;
#ifdef FILTER_STATISTICS
filter_matches++;
#endif
if (tree[n] == 1) {
#ifdef FILTER_STATISTICS
if (pixel_atp(p, x, y) < JOB->cfg.cs)
filter_whitened++;
#endif
return JOB->cfg.cs;
} else {
#ifdef FILTER_STATISTICS
if (pixel_atp(p, x, y) >= JOB->cfg.cs)
filter_blackened++;
#endif
return 0;
}
}
#endif /* FILTER_METHOD == FILTER_BY_TREE */
/*
* This simple filter attempts to correct "fax"-like scan errors.
*/
int pixel_faxfilter(pix *p, int x, int y) {
int r; // filter
r = pixel_atp(p,x,y)&~7;
/* {2,2,2, 2,0,1, 2,1,0} */
if ((r&128) && (~pixel_atp(p,x+1, y )&128)
&& (~pixel_atp(p, x ,y+1)&128)
&& ( pixel_atp(p,x+1,y+1)&128))
r = 64; /* faxfilter */
else
/* {2,2,2, 1,0,2, 0,1,2} */
if ((r&128) && (~pixel_atp(p,x-1, y )&128)
&& (~pixel_atp(p, x ,y+1)&128)
&& ( pixel_atp(p,x-1,y+1)&128))
r = 64; /* faxfilter */
return r & ~7;
}
#ifdef FILTER_CHECKED
/*
* Print out the 3x3 environment of a pixel as a 9-bit binary.
*
* For debugging purposes only.
*/
void print_pixel_env(FILE * out, pix * p, int x, int y) {
int x0, y0;
for (y0 = y - 1; y0 < y + 2; y0++) {
for (x0 = x - 1; x0 < x + 2; x0++) {
if (x0 < 0 || x0 >= p->x || y0 < 0 || y0 >= p->y)
fputc('?', out);
else if (pixel_atp(p, x0, y0) >> 7)
fputc('0', out);
else
fputc('1', out);
}
}
}
#endif
/* this function is heavily used
* test if pixel was set, remove low bits (marks) --- later with error-correction
* result depends on n_run, if n_run>0 filter are used
* Returns: pixel-color (without marks)
*/
int getpixel(pix *p, int x, int y){
if ( x < 0 || y < 0 || x >= p->x || y >= p->y )
return 255 & ~7;
/* filter will be used only once later, when vectorization replaces pixel
* processing
*/
if (JOB->tmp.n_run > 0) { /* use the filters (correction of errors) */
#if FILTER_METHOD == FILTER_BY_NUMBER
int pix = pixel_filter_by_number(p, x, y);
#ifdef FILTER_CHECKED
int pix2 = pixel_filter_by_matrix(p, x, y);
if (pix != pix2) {
fprintf(stderr,
"# BUG: pixel_filter: by number: %d; by matrix: %d, "
"by atp %d; env: ", pix, pix2, pixel_atp(p, x, y) & ~7);
print_pixel_env(stderr, p, x, y);
fputc('\n', stderr);
}
#endif /* FILTER_CHECKED */
return pix;
#elif FILTER_METHOD == FILTER_BY_MATRIX
return pixel_filter_by_matrix(p, x, y);
#elif FILTER_METHOD == FILTER_BY_TREE
int pix = pixel_filter_by_tree(p, x, y);
#ifdef FILTER_CHECKED
int pix2 = pixel_filter_by_matrix(p, x, y);
int pix3 = pixel_filter_by_number(p, x, y);
if (pix != pix2 || pix != pix3) {
fprintf(stderr,
"# BUG: pixel_filter: tree: %d; matrix: %d, "
"number: %d, atp %d; env: ", pix, pix2, pix3,
pixel_atp(p, x, y) & ~7);
print_pixel_env(stderr, p, x, y);
fputc('\n', stderr);
}
#endif /* FILTER_CHECKED */
return pix;
#else
#error FILTER_METHOD not defined
#endif /* FILTER_BY_NUMBER */
}
return (pixel_atp(p,x,y) & ~7);
}
/* modify pixel, test if out of range */
void put(pix * p, int x, int y, int ia, int io) {
if (x < p->x && x >= 0 && y >= 0 && y < p->y)
pixel_atp(p, x, y) = (pixel_atp(p, x, y) & ia) | io;
}

View File

@@ -0,0 +1,733 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
v0.1.0 initial version (stdin added)
v0.2.0 popen added
v0.2.7 review by Bruno Barberi Gnecco
v0.39 autoconf
v0.41 fix integer and heap overflow, change color output
v0.46 fix blank spaces problem in filenames
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#ifdef HAVE_UNISTD_H
/* #include <unistd.h> */
#endif
/* Windows needs extra code to work fine, ^Z in BMP's will stop input else.
* I do not have any idea when this text mode will be an advantage
* but the MS community seems to like to do simple things in a complex way. */
#if defined(O_BINARY) && (defined(__WIN32) || defined(__WIN32__)\
|| defined(__WIN64) || defined(__WIN64__) || defined(__MSDOS__))
# include <fcntl.h>
# define SET_BINARY(_f) do {if (!isatty(_f)) setmode (_f, O_BINARY);} while (0)
#else
# define SET_BINARY(f) (void)0
#endif
#include "pnm.h"
#ifdef HAVE_PAM_H
# include <pam.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <fcntl.h>
#else
# include <ctype.h>
#endif
#define EE() fprintf(stderr,"\nERROR "__FILE__" L%d: ",__LINE__)
#define E0(x0) {EE();fprintf(stderr,x0 "\n"); }
#define F0(x0) {EE();fprintf(stderr,x0 "\n"); exit(1);}
#define F1(x0,x1) {EE();fprintf(stderr,x0 "\n",x1); exit(1);}
/*
* Weights to use for the different colours when converting a ppm
* to greyscale. These weights should sum to 1.0
*
* The below values have been chosen to reflect the fact that paper
* goes a reddish-yellow as it ages.
*
* v0.41: for better performance, we use integer instead of double
* this integer value divided by 1024 (2^10) gives the factor
*/
#define PPM_RED_WEIGHT 511 /* .499 */
#define PPM_GREEN_WEIGHT 396 /* .387 */
#define PPM_BLUE_WEIGHT 117 /* .114 */
/*
feel free to expand this list of usable converting programs
Note 1: the last field must be NULL.
Note 2: "smaller" extensions must come later: ".pnm.gz" must come
before ".pnm".
calling external programs is a security risk
ToDo: for better security replace gzip by /usr/bin/gzip !
*/
char *xlist[]={
".pnm.gz", "gzip -cd", /* compressed pnm-files, gzip package */
".pbm.gz", "gzip -cd",
".pgm.gz", "gzip -cd",
".ppm.gz", "gzip -cd",
".pnm.bz2", "bzip2 -cd",
".pbm.bz2", "bzip2 -cd",
".pgm.bz2", "bzip2 -cd",
".ppm.bz2", "bzip2 -cd",
".jpg", "djpeg -gray -pnm", /* JPG/JPEG, jpeg package */
".jpeg", "djpeg -gray -pnm",
".gif", "giftopnm -image=all", /* GIF, netpbm package */
".bmp", "bmptoppm",
".tiff", "tifftopnm",
".png", "pngtopnm", /* Portable Network Graphics (PNG) format */
".ps", "pstopnm -stdout -portrait -pgm", /* postscript */
".eps", "pstopnm -stdout -portrait -pgm", /* encapsulated postscript */
/* gs -sDEVICE=pgmraw -sOutputFile=- -g609x235 -r141x141 -q -dNOPAUSE */
".fig", "fig2dev -L ppm -m 3", /* xfig files, transfig package */
NULL
};
/* return a pointer to command converting file to pnm or NULL */
char *testsuffix(char *name){
int i; char *rr;
for(i = 0; xlist[i] != NULL; i += 2 ) {
if((rr=strstr(name, xlist[i])) != NULL)
if(strlen(rr)==strlen(xlist[i])) /* handle *.eps.pbm correct */
return xlist[i+1];
}
return NULL;
}
char read_char(FILE *f1){ // filter #-comments
char c;
int m;
for(m=0;;){
c=fgetc(f1);
if( feof(f1) ) E0("read feof");
if( ferror(f1) ) F0("read ferror");
if( c == '#' ) { m = 1; continue; }
if( m == 0 ) return c;
if( c == '\n' ) m = 0;
}
}
/*
* read char from buffer
* buf: pointer to buffer
* pos: pointer to current pos in buffer
* size: size of buffer
*
*/
int fgetc2(char* buf, long* pos, long size)
{
if(*pos>size)
return EOF;
return buf[(*pos)++];
}
/*
* get end of buffer
* pos: current pos in buffer
* size: size of buffer
*
*/
int feof2(long pos, long size)
{
return (pos > size);
}
char read_char2(char *buf, long* pos, long size){ // filter #-comments
char c;
int m;
for(m=0;;){
c=fgetc2(buf, pos, size);
if( feof2(*pos, size) ) E0("read feof");
if( c == '#' ) { m = 1; continue; }
if( m == 0 ) return c;
if( c == '\n' ) m = 0;
}
}
/*
for simplicity only PAM of netpbm is used, the older formats
PBM, PGM and PPM can be handled implicitly by PAM routines (js05)
v0.43: return 1 if multiple file (hold it open), 0 otherwise
*/
#ifdef HAVE_PAM_H
int readpgm(char *name, pix * p, int vvv) {
static FILE *fp=NULL;
static char *pip;
char magic1, magic2;
int i, j, sample, minv = 0, maxv = 0, eofP=0;
struct pam inpam;
tuple *tuplerow;
assert(p);
if (!fp) { // fp!=0 for multi-pnm and idx>0
/* open file; test if conversion is needed. */
if (name[0] == '-' && name[1] == '\0') {
fp = stdin;
SET_BINARY (fileno(fp)); // Windows needs it for correct work
}
else {
pip = testsuffix(name);
if (!pip) {
fp = fopen(name, "rb");
if (!fp)
F1("opening file %s", name);
}
else {
char *buf = (char *)malloc((strlen(pip)+strlen(name)+4));
sprintf(buf, "%s \"%s\"", pip, name); /* allow spaces in filename */
if (vvv) {
fprintf(stderr, "# popen( %s )\n", buf);
}
#ifdef HAVE_POPEN
/* potential security vulnerability, if name contains tricks */
/* example: gunzip -c dummy | rm -rf * */
/* windows needs "rb" for correct work, linux not, cygwin? */
/* ToDo: do you have better code to go arround this? */
#if defined(__WIN32) || defined(__WIN32__) || defined(__WIN64) || defined(__WIN64__)
fp = popen(buf, "rb"); /* ToDo: may fail, please report */
if (!fp) fp = popen(buf, "r"); /* 2nd try, the gnu way */
#else
fp = popen(buf, "r");
#endif
#else
F0("sorry, compile with HAVE_POPEN to use pipes");
#endif
if (!fp)
F1("opening pipe %s", buf);
free(buf);
}
}
}
/* netpbm 0.10.36 tries to write a comment to nonzero char** comment_p */
/* patch by C.P.Schmidt 21Nov06 */
memset (&inpam, 0, sizeof(inpam));
/* read pgm-header */
/* struct pam may change between netpbm-versions, causing problems? */
#ifdef PAM_STRUCT_SIZE /* ok for netpbm-10.35 */
/* new-and-better? but PAM_STRUCT_SIZE is not defined in netpbm-10.18 */
pnm_readpaminit(fp, &inpam, PAM_STRUCT_SIZE(tuple_type));
#else /* ok for netpbm-10.18 old-and-bad for new netpbms */
pnm_readpaminit(fp, &inpam, sizeof(inpam));
#endif
p->x = inpam.width;
p->y = inpam.height;
magic1=(inpam.format >> 8) & 255; /* 'P' for PNM,PAM */
magic2=(inpam.format ) & 255; /* '7' for PAM */
minv=inpam.maxval;
if (vvv) {
fprintf(stderr, "# readpam: format=0x%04x=%c%c h*w(d*b)=%d*%d(%d*%d)\n",
inpam.format, /* magic1*256+magic2 */
((magic1>31 && magic1<127)?magic1:'.'),
((magic2>31 && magic2<127)?magic2:'.'),
inpam.height,
inpam.width,
inpam.depth,
inpam.bytes_per_sample);
}
if ( (1.*(p->x*p->y))!=((1.*p->x)*p->y) )
F0("Error integer overflow");
if ( !(p->p = (unsigned char *)malloc(p->x*p->y)) )
F1("Error at malloc: p->p: %d bytes", p->x*p->y);
tuplerow = pnm_allocpamrow(&inpam);
for ( i=0; i < inpam.height; i++ ) {
pnm_readpamrow(&inpam, tuplerow); /* exit on error */
for ( j = 0; j < inpam.width; j++ ) {
if (inpam.depth>=3)
/* tuplerow is unsigned long (see pam.h sample) */
/* we expect 8bit or 16bit integers,
no overflow up to 32-10-2=20 bits */
sample
= ((PPM_RED_WEIGHT * tuplerow[j][0] + 511)>>10)
+ ((PPM_GREEN_WEIGHT * tuplerow[j][1] + 511)>>10)
+ ((PPM_BLUE_WEIGHT * tuplerow[j][2] + 511)>>10);
else
sample = tuplerow[j][0];
sample = 255 * sample / inpam.maxval; /* normalize to 8 bit */
p->p[i*inpam.width+j] = sample;
if (maxv<sample) maxv=sample;
if (minv>sample) minv=sample;
}
}
pnm_freepamrow(tuplerow);
pnm_nextimage(fp,&eofP);
if (vvv)
fprintf(stderr,"# readpam: min=%d max=%d eof=%d\n", minv, maxv, eofP);
p->bpp = 1;
if (eofP) {
if (!pip) fclose(fp);
#ifdef HAVE_POPEN
else pclose(fp); /* close pipe (v0.43) */
#endif
fp=NULL; return 0;
}
return 1; /* multiple image = concatenated pnm */
}
#else
/*
if PAM not installed, here is the fallback routine,
which is not so powerful but needs no dependencies from other libs
*/
static int fread_num(char *buf, int bps, FILE *f1) {
int mode, j2, j3; char c1;
for (j2=0;j2<bps;j2++) buf[j2]=0; // initialize value to zero
for(mode=0;!feof(f1);){ // mod=0 means skip leading spaces, 1 scan digits
c1=read_char(f1);
if (isspace(c1)) { if (mode==0) continue; else break; }
mode=1; // digits scan mode
if( !isdigit(c1) ) F0("unexpected char");
for (j3=j2=0;j2<bps;j2++) { // multiply bps*bytes by 10
j3 = buf[j2]*10 + j3; // j3 is used as result and carry
buf[j2]=j3 & 255; j3>>=8;
}
buf[0] += c1-'0';
}
return 0;
}
/*
if PAM not installed, here is the fallback routine,
which is not so powerful but needs no dependencies from other libs
*/
static int fread_num2(char *buf, int bps, char *buffer, long *pos, long size) {
int mode, j2, j3; char c1;
for (j2=0;j2<bps;j2++) buf[j2]=0; // initialize value to zero
for(mode=0;!feof2(*pos, size);){ // mod=0 means skip leading spaces, 1 scan digits
c1=read_char2(buffer, pos, size);
if (isspace(c1)) { if (mode==0) continue; else break; }
mode=1; // digits scan mode
if( !isdigit(c1) ) F0("unexpected char");
for (j3=j2=0;j2<bps;j2++) { // multiply bps*bytes by 10
j3 = buf[j2]*10 + j3; // j3 is used as result and carry
buf[j2]=j3 & 255; j3>>=8;
}
buf[0] += c1-'0';
}
return 0;
}
/*
* read image file, used to read the OCR-image and database images,
* image file can be PBM/PGM/PPM in RAW or TEXT
* name: filename of image (input)
* p: pointer where to store the loaded image (input)
* vvv: verbose mode (input)
* return: 0=ok, 1=further image follows (multiple image), -1 on error
* this is the fall back routine if libpnm cant be used
*/
int readpgm(char *name, pix *p, int vvv){
static char c1, c2; /* magic bytes, file type */
static char *pip; // static to survive multiple calls
int nx,ny,nc,mod,i,j; // buffer
static FILE *f1=NULL; // trigger read new file or multi image file
unsigned char *pic;
char buf[512];
int lx, ly, dx;
int bps=1; /* bytes per sample (0..255..65535...) */
if (!f1) { /* first of multiple image, on MultipleImageFiles c1 was read */
pip=NULL;
if (name[0]=='-' && name[1]==0) {
f1=stdin; /* is this correct ??? */
SET_BINARY (fileno(f1)); // Windows needs it for correct work
} else {
pip=testsuffix(name);
if (!pip) {
f1=fopen(name,"rb"); if (!f1) F1("opening file %s",name);
} else {
sprintf(buf,"%s \"%s\"",pip,name); /* ToDo: how to prevent OVL ? */
if (vvv) { fprintf(stderr,"# popen( %s )\n",buf); }
#ifdef HAVE_POPEN
#if defined(__WIN32) || defined(__WIN32__) || defined(__WIN64) || defined(__WIN64__)
f1 = popen(buf, "rb"); /* ToDo: may fail, please report */
if (!f1) f1 = popen(buf, "r"); /* 2nd try, the gnu way */
#else
f1=popen(buf,"r");
#endif
#else
F0("only PNM files supported (compiled without HAVE_POPEN)");
#endif
if (!f1) F1("opening pipe %s",buf);
}
}
c1=fgetc(f1); if (feof(f1)) { E0("unexpected EOF"); return -1; }
}
c2=fgetc(f1); if (feof(f1)) { E0("unexpected EOF"); return -1; }
// check the first two bytes of the PNM file
// PBM PGM PPM
// TXT P1 P2 P3
// RAW P4 P5 P6
if (c1!='P' || c2 <'1' || c2 >'6') {
fprintf(stderr,"\nread-PNM-error: file number is %2d,"
" position %ld", fileno(f1), ftell(f1));
fprintf(stderr,"\nread-PNM-error: bad magic bytes, expect 0x50 0x3[1-6]"
" but got 0x%02x 0x%02x", 255&c1, 255&c2);
if (f1) fclose(f1); f1=NULL; return(-1);
}
nx=ny=nc=0; if (c2=='4' || c2=='1') nc=1;
for(mod=0;((c2=='5' || c2=='2') && (mod&7)<6)
|| ((c2=='6' || c2=='3') && (mod&7)<6)
|| ((c2=='4' || c2=='1') && (mod&7)<4);)
{ // mode: 0,2,4=[ |\t|\r|\n]
// 1=nx 3=ny 5=nc 8-13=#rem
c1=read_char(f1); // former: # mod|=8
if( (mod & 1)==0 ) // whitespaces
if( !isspace(c1) ) mod++;
if( (mod & 1)==1 ) {
if( !isdigit(c1) ) {
if( !isspace(c1) )F0("unexpected character");
mod++; }
else if(mod==1) nx=nx*10+c1-'0';
else if(mod==3) ny=ny*10+c1-'0';
else if(mod==5) nc=nc*10+c1-'0';
}
}
if(vvv)
fprintf(stderr,"# PNM P%c h*w=%d*%d c=%d head=%ld",c2,ny,nx,nc,ftell(f1));
if( c2=='4' && (nx&7)!=0 ){
/* nx=(nx+7)&~7;*/ if(vvv)fprintf(stderr," PBM2PGM nx %d",(nx+7)&~7);
}
if (nc>> 8) bps=2; // bytes per color and pixel
if (nc>>16) bps=3;
if (nc>>24) bps=4;
fflush(stdout);
if ( (1.*(nx*ny))!=((1.*nx)*ny) )
F0("Error integer overflow");
pic=(unsigned char *)malloc( nx*ny );
if(pic==NULL)F0("memory failed"); // no memory
for (i=0;i<nx*ny;i++) pic[i]=255; // init to white if reading fails
/* this is a slow but short routine for P1 to P6 formats */
if( c2=='5' || c2=='2' ) /* slow PGM-RAW/ASC read pixelwise */
for (i=0;i<nx*ny;i++) {
if (c2=='5') { if(bps!=(int)fread(buf,1,bps,f1)) {
fprintf(stderr," ERROR reading at head+%d*%d\n", bps, i); break; } }
else for (j=0;j<3;j++) fread_num(buf+j*bps, bps, f1);
pic[i]=buf[bps-1]; /* store the most significant byte */
}
// we want to normalize brightness to 0..255
if (c2=='6' || c2=='3') { // PPM-RAW/ASC
for (i=0;i<nx*ny;i++) {
if (c2=='6') { if (3*bps!=(int)fread(buf,1,3*bps,f1)){
fprintf(stderr," ERROR reading at head+3*%d*%d\n", bps, i); break; } }
else for (j=0;j<3;j++) fread_num(buf+j*bps, bps, f1);
pic[i]
= ((PPM_RED_WEIGHT * (unsigned char)buf[ bps-1] + 511)>>10)
+ ((PPM_GREEN_WEIGHT * (unsigned char)buf[2*bps-1] + 511)>>10)
+ ((PPM_BLUE_WEIGHT * (unsigned char)buf[3*bps-1] + 511)>>10);
/* normalized to 0..255 */
}
}
if( c2=='1' )
for(mod=j=i=0,nc=255;i<nx*ny && !feof(f1);){ // PBM-ASCII 0001100
c1=read_char(f1);
if( isdigit(c1) ) { pic[i]=((c1=='0')?255:0); i++; }
else if( !isspace(c1) )F0("unexpected char");
}
if( c2=='4' ){ // PBM-RAW
dx=(nx+7)&~7; // dx (mod 8)
if(ny!=(int)fread(pic,dx>>3,ny,f1))F0("read"); // read all bytes
for(ly=ny-1;ly>=0;ly--)
for(lx=nx-1;lx>=0;lx--)
pic[lx+ly*nx]=( (128 & (pic[(lx+ly*dx)>>3]<<(lx & 7))) ? 0 : 255 );
nc=255;
}
{
int minc=255, maxc=0;
for (i=0;i<nx*ny;i++) {
if (pic[i]>maxc) maxc=pic[i];
if (pic[i]<minc) minc=pic[i];
}
if (vvv) fprintf(stderr," min=%d max=%d", minc, maxc);
}
p->p=pic; p->x=nx; p->y=ny; p->bpp=1;
if (vvv) fprintf(stderr,"\n");
c1=0; c1=fgetc(f1); /* needed to trigger feof() */
if (feof(f1) || c1!='P') { /* EOF ^Z or not 'P' -> single image */
if (vvv) fprintf(stderr,"# PNM EOF\n");
if(name[0]!='-' || name[1]!=0){ /* do not close stdin */
if(!pip) fclose(f1);
#ifdef HAVE_POPEN
else pclose(f1); /* close pipe (Jul00) */
#endif
}
f1=NULL; /* set file is closed flag */
return 0;
}
return 1; /* multiple image = concatenated pnm's */
}
#endif /* HAVE_PAM_H */
/*
* read path of buffer
* buf: pointer to out buffer
* size: size objects to read
* count: count objects to read
* buf: pointer to in buffer
* pos: pointer to current pos in buffer
* sizeb : size of buffer
*
*/
size_t fread2(void *bufOut, size_t size, size_t count, char* bufIn, long* pos, long sizeb)
{
long countTmp = (long)count;
char* p = bufOut;
while (0 != countTmp)
{
if (*pos + (long)size > sizeb)
{
memcpy(p, bufIn + *pos, sizeb - *pos);
*pos += sizeb - *pos;
return count;
}
memcpy(p, bufIn + *pos, size);
p += size;
*pos += size;
--countTmp;
}
return count;
}
/*
* read image file, used to read the OCR-image and database images,
* image file can be PBM/PGM/PPM in RAW or TEXT
* buffer: pointer to buffer of image (input)
* size: size buffer of image (input)
* p: pointer where to store the loaded image (input)
* return: 0=ok, 1=further image follows (multiple image), -1 on error
* this is the fall back routine if libpnm cant be used
*/
int readpgmFromBuffer(char* buffer, long size, pix *p){
static char c1, c2; /* magic bytes, file type */
int nx,ny,nc,mod,i,j; // buffer
unsigned char *pic;
char buf[512];
int lx, ly, dx;
int bps=1; /* bytes per sample (0..255..65535...) */
long pos = 0;
c1=fgetc2(buffer, &pos, size); if (feof2(pos, size)) { E0("unexpected EOF"); return -1; }
c2=fgetc2(buffer, &pos, size); if (feof2(pos, size)) { E0("unexpected EOF"); return -1; }
// check the first two bytes of the PNM file
// PBM PGM PPM
// TXT P1 P2 P3
// RAW P4 P5 P6
if (c1!='P' || c2 <'1' || c2 >'6') {
return(-1);
}
nx=ny=nc=0; if (c2=='4' || c2=='1') nc=1;
for(mod=0;((c2=='5' || c2=='2') && (mod&7)<6)
|| ((c2=='6' || c2=='3') && (mod&7)<6)
|| ((c2=='4' || c2=='1') && (mod&7)<4);)
{ // mode: 0,2,4=[ |\t|\r|\n]
// 1=nx 3=ny 5=nc 8-13=#rem
c1=read_char2(buffer, &pos, size); // former: # mod|=8
if( (mod & 1)==0 ) // whitespaces
if( !isspace(c1) ) mod++;
if( (mod & 1)==1 ) {
if( !isdigit(c1) ) {
if( !isspace(c1) )F0("unexpected character");
mod++; }
else if(mod==1) nx=nx*10+c1-'0';
else if(mod==3) ny=ny*10+c1-'0';
else if(mod==5) nc=nc*10+c1-'0';
}
}
if (nc>> 8) bps=2; // bytes per color and pixel
if (nc>>16) bps=3;
if (nc>>24) bps=4;
fflush(stdout);
if ( (1.*(nx*ny))!=((1.*nx)*ny) )
F0("Error integer overflow");
pic=(unsigned char *)malloc( nx*ny );
if(pic==NULL)F0("memory failed"); // no memory
for (i=0;i<nx*ny;i++)pic[i]=255; // init to white if reading fails
/* this is a slow but short routine for P1 to P6 formats */
if( c2=='5' || c2=='2' ) /* slow PGM-RAW/ASC read pixelwise */
for (i=0;i<nx*ny;i++) {
if (c2=='5') { if(bps!=(int)fread2(buf,1,bps,buffer,&pos,size)) {
fprintf(stderr," ERROR reading at head+%d*%d\n", bps, i); break; } }
else for (j=0;j<3;j++) fread_num2(buf+j*bps, bps, buffer, &pos, size);
pic[i]=buf[bps-1]; /* store the most significant byte */
}
// we want to normalize brightness to 0..255
if (c2=='6' || c2=='3') { // PPM-RAW/ASC
for (i=0;i<nx*ny;i++) {
if (c2=='6') { if (3*bps!=(int)fread2(buf,1,3*bps,buffer,&pos,size)){
fprintf(stderr," ERROR reading at head+3*%d*%d\n", bps, i); break; } }
else for (j=0;j<3;j++) fread_num2(buf+j*bps, bps, buffer, &pos, size);
pic[i]
= ((PPM_RED_WEIGHT * (unsigned char)buf[ bps-1] + 511)>>10)
+ ((PPM_GREEN_WEIGHT * (unsigned char)buf[2*bps-1] + 511)>>10)
+ ((PPM_BLUE_WEIGHT * (unsigned char)buf[3*bps-1] + 511)>>10);
/* normalized to 0..255 */
}
}
if( c2=='1' )
for(mod=j=i=0,nc=255;i<nx*ny && !feof2(pos, size);){ // PBM-ASCII 0001100
c1=read_char2(buffer, &pos, size);
if( isdigit(c1) ) { pic[i]=((c1=='0')?255:0); i++; }
else if( !isspace(c1) )F0("unexpected char");
}
if( c2=='4' ){ // PBM-RAW
dx=(nx+7)&~7; // dx (mod 8)
if(ny!=(int)fread2(pic,dx>>3,ny,buffer,&pos,size))F0("read"); // read all bytes
for(ly=ny-1;ly>=0;ly--)
for(lx=nx-1;lx>=0;lx--)
pic[lx+ly*nx]=( (128 & (pic[(lx+ly*dx)>>3]<<(lx & 7))) ? 0 : 255 );
nc=255;
}
{
int minc=255, maxc=0;
for (i=0;i<nx*ny;i++) {
if (pic[i]>maxc) maxc=pic[i];
if (pic[i]<minc) minc=pic[i];
}
}
p->p=pic; p->x=nx; p->y=ny; p->bpp=1;
c1=0; c1=fgetc2(buffer, &pos, size); /* needed to trigger feof() */
if (feof2(pos, size) || c1!='P') { /* EOF ^Z or not 'P' -> single image */
return 0;
}
return 1; /* multiple image = concatenated pnm's */
}
int writepgm(char *nam,pix *p){// P5 raw-pgm
FILE *f1;int a,x,y;
f1=fopen(nam,"wb");if(!f1)F0("open"); // open-error
fprintf(f1,"P5\n%d %d\n255\n",p->x,p->y);
if(p->bpp==3)
for(y=0;y<p->y;y++)
for(x=0;x<p->x;x++){ // set bit
a=x+y*p->x;
p->p[a]=(p->p[3*a+0]+p->p[3*a+1]+p->p[3*a+2])/3;
}
if(p->y!=(int)fwrite(p->p,p->x,p->y,f1))F0("write"); // write all lines
fclose(f1);
return 0;
}
/* adding colours, care about range */
void addrgb(unsigned char rgb[3], int sr, int sg, int sb) {
int add[3], i;
/* add colour on dark pixels, subtract on white pixels */
add[0]=2*sr; add[1]=2*sg; add[2]=2*sb;
if (((int)rgb[0])+((int)rgb[1])+((int)rgb[2])>=3*160)
{ add[0]=(-sg-sb); add[1]=(-sr-sb); add[2]=(-sr-sg); } // rgb/2?
/* care about colour range */
for (i=0;i<3;i++)
if (add[i]<0) rgb[i]-=(( rgb[i]<-add[i])? rgb[i]:-add[i]);
else rgb[i]+=((255-rgb[i]< add[i])?255-rgb[i]: add[i]);
}
/*
* pgmtoppm or pnmtopng, use last 3 bits for farbcoding
* replaces old writebmp variant
*/
int writeppm(char *nam, pix *p){ /* P6 raw-ppm */
FILE *f1=NULL; int x,y,f1t=0; unsigned char rgb[3], gray, bits;
char buf[128];
if (strchr(nam,'|')) return -1; /* no nasty code */
if (strstr(nam,".ppm")) { f1=fopen(nam,"wb"); }
#ifdef HAVE_POPEN
/* be sure that nam contains hacker code like "dummy | rm -rf *" */
if (!f1) {
strncpy(buf,"pnmtopng > ",12); /* no spaces within filenames allowed! */
strncpy(buf+11,nam,111); buf[123]=0;
strncpy(buf+strlen(buf),".png",5);
/* we dont care about win "wb" here, never debug on win systems */
f1 = popen(buf, "w"); if(f1) f1t=1; else E0("popen pnmtopng");
}
if (!f1) {
strncpy(buf,"gzip -c > ",11);
strncpy(buf+10,nam,109); buf[120]=0;
strncpy(buf+strlen(buf),".ppm.gz",8);
/* we dont care about win "wb" here, never debug on win systems */
f1 = popen(buf, "w"); if(f1) f1t=1; else E0("popen gzip -c");
}
#endif
if (!f1) {
strncpy(buf,nam,113); buf[114]=0;
strncpy(buf+strlen(buf),".ppm",5);
f1=fopen(buf,"wb");
}
if (!f1) F0("open"); /* open-error */
fprintf(f1,"P6\n%d %d\n255\n",p->x,p->y);
if ( p->bpp==1 )
for (y=0;y<p->y;y++)
for (x=0;x<p->x;x++){
gray=p->p[x+y*p->x];
bits=(gray&0x0F); /* save marker bits */
/* replace used bits to get max. contrast, 160=0xA0 */
gray = ((gray<160) ? (gray&~0x0F)>>1 : 0xC3|(gray>>1) );
rgb[0] = rgb[1] = rgb[2] = gray;
if ((bits & 1)==1) { addrgb(rgb,0,0,8+8*((x+y)&1)); } /* dark blue */
if ((bits & 8)==8) { addrgb(rgb,0,0, 16); } /* blue (low priority) */
if ((bits & 6)==6) { addrgb(rgb,0,0, 32); } /* blue */
if ((bits & 6)==4) { addrgb(rgb,0,48,0); } /* green */
if ((bits & 6)==2) { addrgb(rgb,32,0,0); } /* red */
if ( 1!=(int)fwrite(rgb,3,1,f1) ) { E0("write"); y=p->y; break; }
}
if ( p->bpp==3 )
if ( p->y!=(int)fwrite(p->p,3*p->x,p->y,f1) ) E0("write");
#ifdef HAVE_POPEN
if (f1t) { pclose (f1); f1=NULL; }
#endif
if (f1) fclose(f1);
return 0;
}
// high bit = first,
int writepbm(char *nam,pix *p){// P4 raw-pbm
FILE *f1;int x,y,a,b,dx,i;
dx=(p->x+7)&~7; // enlarge to a factor of 8
for(y=0;y<p->y;y++)
for(x=0;x<p->x;x++){ // set bit
a=(x+y*dx)>>3;b=7-(x&7); // adress an bitisnumber
i=x+y*p->x;
if(p->bpp==3) i=(p->p[3*i+0]+p->p[3*i+1]+p->p[3*i+2])/3;
else i= p->p[ i ];
i=((i>127)?0:1);
p->p[a]=(p->p[a] & (~1<<b)) | (i<<b);
}
f1=fopen(nam,"wb");if(!f1)F0("open"); // open-error
fprintf(f1,"P4\n%d %d\n",p->x,p->y);
if(p->y!=(int)fwrite(p->p,dx>>3,p->y,f1))F0("write"); // write all lines
fclose(f1);
return 0;
}
// ------------------------------------------------------------------------

View File

@@ -0,0 +1,87 @@
/* ---------------------------- progress output ---------------------- */
#include <stdlib.h>
#include <stdio.h>
#include "progress.h"
FILE *fp=NULL; /* output stream for progress info */
time_t printinterval = 10; /* approx. seconds between printouts, 1.. */
/* initialization of progress output, fname="<fileID>","<filename>","-" */
int ini_progress(char *fname){
int fd;
if (fp) { fclose(fp); fp=NULL; }
if (fname) if (fname[0]) {
fd=atoi(fname);
if(fd>255 || fname[((fd>99)?3:((fd>9)?2:1))]) fd=-1; /* be sure */
if (fname[0]=='-' && fname[1]==0) { fp=stdout; }
#ifdef __USE_POSIX
else if (fd>0) { fp=fdopen(fd,"w"); } /* not sure that "w" is ok ???? */
#endif
else { fp=fopen(fname,"w");if(!fp)fp=fopen(fname,"a"); }
if (!fp) {
fprintf(stderr,"could not open %s for progress output\n",fname);
return -1; /* no success */
}
}
/* fprintf(stderr,"# progress: fd=%d\n",fileno(fp)); */
return 0; /* no error */
}
progress_counter_t *open_progress(int maxcount, const char *name){
progress_counter_t *pc;
pc = (progress_counter_t*) malloc( sizeof(progress_counter_t) );
if (!pc) return 0; /* nonfatal */
pc->starttime = time(NULL);
pc->maxcount = maxcount;
pc->numskip = 0;
pc->lastprintcount = -1;
pc->name = name;
pc->lastprinttime = pc->starttime;
return pc;
}
/* free counter */
int close_progress(progress_counter_t *counter){
if (counter) free(counter);
return 0;
}
/* progress meter output
* only 1output/10s, + estimated endtime (test on pixelfields)
* ToDo: to stderr by default? remove subprogress, ini_progress? rm_progress?
* test on tcl
*/
int progress(int counter, progress_counter_t *pc){
/* we try to save computing time, so we skip early */
if ((!fp) || counter - pc->lastprintcount <= pc->numskip) return 0;
{
char cr='\n';
time_t now = time(NULL);
#if 0 /* debugging */
if (counter)
fprintf(fp," progress %s %3d / %d time %d skip %d\n",
pc->name,counter,pc->maxcount,(int)(now - pc->starttime),
pc->numskip); fflush(fp);
#endif
if (5*(now - pc->lastprinttime) < 2*printinterval
&& counter - pc->lastprintcount >= pc->numskip) { /* save for tests */
if (pc->numskip < 1024) pc->numskip += pc->numskip+1;
}
if (3*(now - pc->lastprinttime) < 2*printinterval ) {
return 0; /* to early for printing */
}
if (2*(now - pc->lastprinttime) > 3*printinterval ) {
pc->numskip >>= 1; /* to late for printing */
}
if (fileno(fp)<3) cr='\r'; /* may be choosen in ini? */
if (counter)
fprintf(fp," progress %s %5d / %d time[s] %5d / %5d (skip=%d)%c",
pc->name,counter,pc->maxcount,
(int)(now - pc->starttime), /* time gone since start */
(int)(now - pc->starttime)*pc->maxcount/(counter), /* estimated */
pc->numskip, cr);
fflush(fp);
pc->lastprintcount=counter;
pc->lastprinttime=now;
}
return 0; /* no error */
}
/* --------------------- end of progress output ---------------------- */

View File

@@ -0,0 +1,703 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 2000-2009 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
*/
#include <stdlib.h>
#include <stdio.h>
#include "pgm2asc.h"
#include "gocr.h"
#include "progress.h"
/* measure mean thickness as an criteria for big chars */
int mean_thickness( struct box *box2 ){
int mt=0, i, y, dx=box2->x1-box2->x0+1, dy;
for (y=box2->y0+1; y<box2->y1; y++) {
i=loop(box2->p,box2->x0+0,y,dx,JOB->cfg.cs,0,RI);
i=loop(box2->p,box2->x0+i,y,dx,JOB->cfg.cs,1,RI);
mt+=i;
}
dy = box2->y1 - box2->y0 - 1;
if (dy) mt=(mt+dy/2)/dy;
return mt;
}
/* ---- remove dust ---------------------------------
What is dust? I think, this is a very small pixel cluster without
neighbours. Of course not all dust clusters can be detected correct.
This feature should be possible to switch off via option.
-> may be, all clusters should be stored here?
speed is very slow, I know, but I am happy that it is working well
*/
int remove_dust( job_t *job ){
/* new dust removing */
/* FIXME jb:remove pp */
pix *pp = &job->src.p;
int i1,i,j,x,y,x0,x1,y0,y1,nC,sX,sY,sP, cs,vvv=job->cfg.verbose;
struct box *box2;
#define HISTSIZE 220 /* histogramm size */
int histo[HISTSIZE];
cs=job->cfg.cs; sP=sX=sY=nC=0;
/*
* count number of black pixels within a box and store it in .dots
* later .dots is re-used for number of objects belonging to the character
* should be done in the flood-fill algorithm
* volume of white pixels is estimated to big here (left/right rot)
* ToDo: mean thickness of char lines?
* or interval nesting (minP..maxP) to remove outriders
*/
j=0;
for (i1=0;i1<HISTSIZE;i1++) histo[i1]=0;
/* mean value over every black object which is big enough */
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *)list_get_current(&(job->res.boxlist));
if (!box2->num_frames) continue;
if (box2->frame_vol[0]<0) continue; /* don't count inner holes */
j = abs(box2->frame_vol[0]);
if ((box2->y1-box2->y0+1)>3) {
nC++; /* only count potential chars v0.42 */
sX+=box2->x1 - box2->x0 + 1;
sY+=box2->y1 - box2->y0 + 1;
sP+=j;
}
if (j<HISTSIZE) histo[j]++;
} end_for_each(&(job->res.boxlist));
if (job->cfg.dust_size < 0 && nC > 0) { /* auto detection */
/* this formula is empirically, high resolution scans have bigger dust */
/* maximum allowed dustsize (min=4*7 ca. 32)
* does not work for background pattern!
*/
job->cfg.dust_size = ( ( sX/nC ) * ( sY/nC ) + 16) / 32;
if (vvv) fprintf(stderr, "# remove.c remove_dust(): ");
if (vvv) fprintf(stderr, "\n# dust size detection, vol num"
" #obj=%d maxDust=%d mpixel= %3d mxy= %2d %2d",
nC, job->cfg.dust_size, sP/nC, sX/nC, sY/nC);
/* we assume that for random dust applies histo[i+1]<histo[i] */
for (i=1;i+3<HISTSIZE;i++){
if (vvv) fprintf(stderr,"\n# dust size histogram %3d %5d",i,histo[i]);
if (histo[i]>=nC) continue; /* v0.42 lot of pixels -> bg pattern < 3 */
if (i>=job->cfg.dust_size) break; /* maximum = mean size / 32 */
if (histo[i/*+1*/]==0) break; /* bad statistic */
if ((histo[i+2]+histo[i+3])
>=(histo[i] +histo[i+1])) break; /* no noise, but to late? */
if ( histo[i-1] > 1024*histo[i] &&
2*histo[i+1] >=histo[i]) break; /* bg pattern */
}
if (vvv) fprintf(stderr," break");
if (vvv) for (i1=0,j=i+1;j<HISTSIZE;j++) {
/* compressed, output only if something is changing */
if (j==HISTSIZE-1 || histo[j]!=histo[j-1] || histo[j]!=histo[j+1]) {
fprintf(stderr,"\n# dust size histogram %3d %5d",j,histo[j]);
if (++i1>20) break; /* dont do excessive output */
}
}
job->cfg.dust_size=i-1;
/* what is the statistic of random dust?
* if we have p pixels on a x*y image we should have
* (p/(x*y))^1 * (x*y) = p singlets
* (p/(x*y))^2 * (x*y) = p^2/(x*y) doublets and
* (p/(x*y))^3 * (x*y) = p^3/(x*y)^2 triplets
*/
if (vvv) fprintf(stderr,"\n# auto dust size = %d nC= %3d .. %3d"
" avD= %2d %2d .. %2d %2d\n",
job->cfg.dust_size, nC, job->res.numC,
(job->res.sumX+job->res.numC/2)/job->res.numC,
(job->res.sumY+job->res.numC/2)/job->res.numC, sX/nC, sY/nC);
}
if (job->cfg.dust_size)
{ i=0;
if(vvv){
fprintf(stderr,"# remove dust of size %2d",job->cfg.dust_size);
/* Warning: better use (1/(x*y))^2 as 1/((x*y)^2),
* because (x*y)^2 may overflow */
fprintf(stderr," histo=%d,%d(?=%d),%d(?=%d),...\n# ...",
histo[1],histo[2],histo[1]*histo[1]/(pp->x*pp->y),
histo[3], histo[1]*histo[1]/(pp->x*pp->y)
*histo[1]/(pp->x*pp->y));
}
i = 0;
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *)list_get_current(&(job->res.boxlist));
x0=box2->x0;x1=box2->x1;y0=box2->y0;y1=box2->y1; /* box */
j=abs(box2->frame_vol[0]);
if(j<=job->cfg.dust_size) /* remove this tiny object */
{ /* here we should distinguish dust and i-dots,
* may be we should sort out dots to a seperate dot list and
* after line detection decide, which is dust and which not
* dust should be removed to make recognition easier (ToDo)
*/
#if 0
if(get_bw((3*x0+x1)/4,(x0+3*x1)/4,y1+y1-y0+1,y1+8*(y1-y0+1),pp,cs,1))
continue; /* this idea was to simple, see kscan003.jpg sample */
#endif
/* remove from average */
job->res.numC--;
job->res.sumX-=x1-x0+1;
job->res.sumY-=y1-y0+1;
/* remove pixels (should only be done with dust) */
for(x=x0;x<=x1;x++)
for(y=y0;y<=y1;y++){ put(pp,x,y,0,255&~7); }
/* remove from list */
list_del(&(job->res.boxlist),box2);
/* free memory */
free_box(box2);
i++; /* count as dust particle */
continue;
}
} end_for_each(&(job->res.boxlist));
if(vvv)fprintf(stderr," %3d cluster removed, nC= %3d\n",i,job->res.numC);
}
/* reset dots to 0 and remove white pixels (new) */
i=0;
for_each_data(&(job->res.boxlist)) {
box2 = ((struct box *)list_get_current(&(job->res.boxlist)));
if (box2->frame_vol[0]<0) continue; /* for black areas only */
x0=box2->x0;x1=box2->x1;y0=box2->y0;y1=box2->y1; /* box */
if (x1-x0>16 && y1-y0>30) /* only on large enough chars */
for(x=x0+1;x<=x1-1;x++)
for(y=y0+1;y<=y1-1;y++){
if( pixel_atp(pp,x ,y )>=cs
&& pixel_atp(pp,x-1,y ) <cs
&& pixel_atp(pp,x+1,y ) <cs
&& pixel_atp(pp,x ,y-1) <cs
&& pixel_atp(pp,x ,y+1) <cs ) /* remove it */
{
put(pp,x,y,0,0); i++; /* (x and 0) or 0 */
}
}
} end_for_each(&(job->res.boxlist));
if (vvv) fprintf(stderr,"# ... %3d white pixels removed, cs=%d nC= %3d\n",
i,cs,job->res.numC);
return 0;
}
/* ---- smooth big chars ---------------------------------
* Big chars often do not have smooth borders, which let fail
* the engine. Here we smooth the borders of big chars (>7x16).
* Smoothing is important for b/w scans, where we often have
* comb like pattern on a vertikal border. I also received
* samples with lot of white pixels (sample: 04/02/25).
* ToDo: obsolete if vector code is complete
*/
int smooth_borders( job_t *job ){
pix *pp = &job->src.p;
int ii=0,x,y,x0,x1,y0,y1,dx,dy,cs,i0,i1,i2,i3,i4,n1,n2,
cn[8],cm,vvv=job->cfg.verbose; /* dust found */
struct box *box2;
cs=job->cfg.cs; n1=n2=0;
if(vvv){ fprintf(stderr,"# smooth big chars 7x16 cs=%d",cs); }
/* filter for each big box */
for_each_data(&(job->res.boxlist)) { n2++; /* count boxes */
box2 = (struct box *)list_get_current(&(job->res.boxlist));
/* do not touch small characters! but how we define small characters? */
if (box2->x1-box2->x0+1<7 || box2->y1-box2->y0+1<16 ) continue;
if (box2->c==PICTURE) continue;
if (mean_thickness(box2)<3) continue;
n1++; /* count boxes matching big-char criteria */
x0=box2->x0; y0=box2->y0;
x1=box2->x1; y1=box2->y1;
dx=x1-x0+1; dy=y1-y0-1;
/* out_x(box2);
* dont change to much! only change if absolutely sure!
* ....... 1 2 3
* ex: .?##### 0 * 4
* ....... 7 6 5
* we should also avoid removing lines by sytematic remove
* from left end to the right, so we concern also about distance>1
*/
for(x=box2->x0;x<=box2->x1;x++)
for(y=box2->y0;y<=box2->y1;y++){ /* filter out high frequencies */
/* this is a very primitive solution, only for learning */
cn[0]=getpixel(pp,x-1,y);
cn[4]=getpixel(pp,x+1,y); /* horizontal */
cn[2]=getpixel(pp,x,y-1);
cn[6]=getpixel(pp,x,y+1); /* vertical */
cn[1]=getpixel(pp,x-1,y-1);
cn[3]=getpixel(pp,x+1,y-1); /* diagonal */
cn[7]=getpixel(pp,x-1,y+1);
cn[5]=getpixel(pp,x+1,y+1);
cm=getpixel(pp,x,y);
/* check for 5 other and 3 same surrounding pixels */
for (i0=0;i0<8;i0++)
if ((cn[i0 ]<cs)==(cm<cs)
&& (cn[(i0+7) & 7]<cs)!=(cm<cs)) break; /* first same */
for (i1=0;i1<8;i1++)
if ((cn[(i0+i1) & 7]<cs)!=(cm<cs)) break; /* num same */
for (i2=0;i2<8;i2++)
if ((cn[(i0+i1+i2) & 7]<cs)==(cm<cs)) break; /* num other */
cn[0]=getpixel(pp,x-2,y);
cn[4]=getpixel(pp,x+2,y); /* horizontal */
cn[2]=getpixel(pp,x,y-2);
cn[6]=getpixel(pp,x,y+2); /* vertical */
cn[1]=getpixel(pp,x-2,y-2);
cn[3]=getpixel(pp,x+2,y-2); /* diagonal */
cn[7]=getpixel(pp,x-2,y+2);
cn[5]=getpixel(pp,x+2,y+2);
/* check for 5 other and 3 same surrounding pixels */
for (i0=0;i0<8;i0++)
if ((cn[i0 ]<cs)==(cm<cs)
&& (cn[(i0+7) & 7]<cs)!=(cm<cs)) break; /* first same */
for (i3=0;i3<8;i3++)
if ((cn[(i0+i3) & 7]<cs)!=(cm<cs)) break; /* num same */
for (i4=0;i4<8;i4++)
if ((cn[(i0+i3+i4) & 7]<cs)==(cm<cs)) break; /* num other */
if (i1<=3 && i2>=5 && i3>=3 && i4>=3) { /* change only on borders */
ii++; /* white : black */
put(pp,x,y,7,((cm<cs)?(cs|32):cs/2)&~7);
#if 0
printf(" x y i0 i1 i2 i3 i4 cm new cs %3d %3d"
" %3d %3d %3d %3d %3d %3d %3d %3d\n",
x-box2->x0,y-box2->y0,i0,i1,i2,i3,i3,cm,getpixel(pp,x,y),cs);
#endif
}
}
#if 0 /* debugging */
out_x(box2);
#endif
} end_for_each(&(job->res.boxlist));
if(vvv)fprintf(stderr," ... %3d changes in %d of %d\n",ii,n1,n2);
return 0;
}
/* test if a corner of box1 is within box2 */
int box_nested( struct box *box1, struct box *box2){
/* box1 in box2, +1..-1 frame for pixel-patterns */
if ( ( ( box1->x0>=box2->x0-1 && box1->x0<=box2->x1+1 )
|| ( box1->x1>=box2->x0-1 && box1->x1<=box2->x1+1 ) )
&& ( ( box1->y0>=box2->y0-1 && box1->y0<=box2->y1+1 )
|| ( box1->y1>=box2->y0-1 && box1->y1<=box2->y1+1 ) ) )
return 1;
return 0;
}
/* test if box1 is within box2 */
int box_covered( struct box *box1, struct box *box2){
/* box1 in box2, +1..-1 frame for pixel-patterns */
if ( ( box1->x0>=box2->x0-1 && box1->x1<=box2->x1+1 )
&& ( box1->y0>=box2->y0-1 && box1->y1<=box2->y1+1 ) )
return 1;
return 0;
}
/* ---- remove pictures ------------------------------------------
* may be, not deleting or moving to another list is much better!
* should be renamed to remove_pictures and border boxes
*/
int remove_pictures( job_t *job){
struct box *box4,*box2;
int j=0, j2=0, num_del=0;
if (job->cfg.verbose)
fprintf(stderr, "# "__FILE__" L%d: remove pictures\n# ...",
__LINE__);
/* ToDo: output a list for picture handle scripts */
j=0; j2=0;
if(job->cfg.verbose)
for_each_data(&(job->res.boxlist)) {
box4 = (struct box *)list_get_current(&(job->res.boxlist));
if (box4->c==PICTURE) j++; else j2++;
} end_for_each(&(job->res.boxlist));
if (job->cfg.verbose)
fprintf(stderr," status: pictures= %d other= %d nC= %d\n# ...",
j, j2, job->res.numC);
/* remove table frames */
if (job->res.numC > 8)
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *)list_get_current(&(job->res.boxlist));
if (box2->c==PICTURE
&& box2->num_ac==0 /* dont remove barcodes */
&& box2->x1-box2->x0+1>box2->p->x/2 /* big table? */
&& box2->y1-box2->y0+1>box2->p->y/2 ){ j=0;
/* count boxes nested with the picture */
for_each_data(&(job->res.boxlist)) {
box4 = (struct box *)list_get_current(&(job->res.boxlist));
if( box4 != box2 ) /* not count itself */
if (box_nested(box4,box2)) j++; /* box4 in box2 */
} end_for_each(&(job->res.boxlist));
if( j>8 ){ /* remove box if more than 8 chars are within box */
list_del(&(job->res.boxlist), box2); /* does not work proper ?! */
free_box(box2); num_del++;
}
}
} end_for_each(&(job->res.boxlist));
if (job->cfg.verbose)
fprintf(stderr, " deleted= %d pictures (table frames)\n# ...",
num_del);
num_del=0;
/* remove dark-border-boxes (typical for hard copy of book site,
* or spam random border) */
if (job->res.numC > 1) /* dont remove the only char */
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *)list_get_current(&(job->res.boxlist));
if (box2->c!=PICTURE) continue; // ToDo: PICTUREs set already?
if ( box2->x1-box2->x0+1 > box2->p->x/2
&& box2->y1-box2->y0+1 > box2->p->y/2 ) continue;
j=0;
if (box2->x0==0) j++;
if (box2->y0==0) j++; /* on border? */
if (box2->x1==box2->p->x-1) j++;
if (box2->y1==box2->p->y-1) j++;
if (j>2){ /* ToDo: check corner pixel */
int cs=job->cfg.cs;
j=0;
if (getpixel(box2->p,box2->x0,box2->y0)<cs) j++;
if (getpixel(box2->p,box2->x1,box2->y0)<cs) j++;
if (getpixel(box2->p,box2->x0,box2->y1)<cs) j++;
if (getpixel(box2->p,box2->x1,box2->y1)<cs) j++;
if (j>2) {
list_del(&(job->res.boxlist), box2);
free_box(box2); num_del++;
}
}
} end_for_each(&(job->res.boxlist));
if (job->cfg.verbose)
fprintf(stderr, " deleted= %d pictures (on border)\n# ...",
num_del);
num_del=0;
j=0; j2=0;
if(job->cfg.verbose)
for_each_data(&(job->res.boxlist)) {
box4 = (struct box *)list_get_current(&(job->res.boxlist));
if( box4->c==PICTURE ) j++; else j2++;
} end_for_each(&(job->res.boxlist));
if (job->cfg.verbose)
fprintf(stderr," status: pictures= %d other= %d nC= %d\n# ...",
j, j2, job->res.numC);
for(j=1;j;){ j=0; /* this is only because list_del does not work */
/* can be slow on gray images */
for_each_data(&(job->res.boxlist)) {
box2 = (struct box *)list_get_current(&(job->res.boxlist));
if( box2->c==PICTURE && box2->num_ac==0)
for(j=1;j;){ /* let it grow to max before leave */
j=0; box4=NULL;
/* find boxes nested with the picture and remove */
/* its for pictures build by compounds */
for_each_data(&(job->res.boxlist)) {
box4 = (struct box *)list_get_current(&(job->res.boxlist));
if( box4!=box2 /* not destroy self */
&& (box4->num_ac==0) /* dont remove barcodes etc. */
&& (/* box4->c==UNKNOWN || */
box4->c==PICTURE) ) /* dont remove valid chars */
if(
/* box4 in box2, +1..-1 frame for pixel-patterns */
box_nested(box4,box2)
/* or box2 in box4 */
|| box_nested(box2,box4) /* same? */
)
if ( box4->x1-box4->x0+1>2*job->res.avX
|| box4->x1-box4->x0+1<job->res.avX/2
|| box4->y1-box4->y0+1>2*job->res.avY
|| box4->y1-box4->y0+1<job->res.avY/2
|| box_covered(box4,box2) ) /* box4 completely within box2 */
/* dont remove chars! see rotate45.fig */
{
/* do not remove boxes in inner loop (bug?) ToDo: check why! */
/* instead we leave inner loop and mark box4 as valid */
if( box4->x0<box2->x0 ) box2->x0=box4->x0;
if( box4->x1>box2->x1 ) box2->x1=box4->x1;
if( box4->y0<box2->y0 ) box2->y0=box4->y0;
if( box4->y1>box2->y1 ) box2->y1=box4->y1;
j=1; /* mark box4 as valid */
break; /* and leave inner loop */
}
} end_for_each(&(job->res.boxlist));
if (j!=0 && box4!=NULL) { /* check for valid box4 */
/* ToDo: melt */
list_del(&(job->res.boxlist), box4); /* does not work proper ?! */
free_box(box4); /* break; ToDo: necessary to leave after del??? */
num_del++;
}
}
} end_for_each(&(job->res.boxlist));
}
if (job->cfg.verbose)
fprintf(stderr, " deleted= %d nested pictures\n# ...", num_del);
/* output a list for picture handle scripts */
j=0; j2=0;
if(job->cfg.verbose)
for_each_data(&(job->res.boxlist)) {
box4 = (struct box *)list_get_current(&(job->res.boxlist));
if( box4->c==PICTURE ) {
fprintf(stderr," found picture at %4d %4d size %4d %4d\n# ...",
box4->x0, box4->y0, box4->x1-box4->x0+1, box4->y1-box4->y0+1 );
j++;
} else j2++;
} end_for_each(&(job->res.boxlist));
if (job->cfg.verbose)
fprintf(stderr," status: pictures= %d other= %d nC= %d\n",
j, j2, job->res.numC);
return 0;
}
/* ---- remove melted serifs --------------------------------- v0.2.5
>>v<<
##########.######## <-y0
################### like X VW etc.
...###.......###... <-y
...###......###....
j1 j2 j3
- can generate new boxes if two characters were glued
*/
int remove_melted_serifs( pix *pp ){
int x,y,j1,j2,j3,j4,i2,i3,i,ii,ni,cs,x0,x1,xa,xb,y0,y1,vvv=JOB->cfg.verbose;
struct box *box2, *box3;
progress_counter_t *pc = NULL;
cs=JOB->cfg.cs; i=0; ii=0; ni=0;
for_each_data(&(JOB->res.boxlist)) {
ni++;
} end_for_each(&(JOB->res.boxlist));
pc = open_progress(ni,"remove_melted_serifs");
ni = 0;
if(vvv){ fprintf(stderr,"# searching melted serifs ..."); }
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->c != UNKNOWN) continue; /* dont try on pictures */
x0=box2->x0; x1=box2->x1;
y0=box2->y0; y1=box2->y1; /* box */
/* upper serifs */
for(j1=x0;j1+4<x1;){
j1+=loop(pp,j1,y0 ,x1-x0,cs,0,RI);
x =loop(pp,j1,y0 ,x1-x0,cs,1,RI); if(j1+x>x1+1) break;
y =loop(pp,j1,y0+1,x1-x0,cs,1,RI); if(y>x) x=y; if(j1+x>x1+1) break;
/* measure mean thickness of serif pos: (j1,y0)-(j1+x,y0) */
for(j2=j3=j4=0,i2=j1;i2<j1+x;i2++){
/* 2009-07: bug, j1 used instead of i2 */
i3 =loop(pp,i2,y0 ,y1-y0,cs,0,DO); if(8*i3>y1-y0) break;
i3+=loop(pp,i2,y0+i3,y1-y0,cs,1,DO); if(8*i3>y1-y0) continue;
if(8*i3<y1-y0){ j2+=i3; j3++; } /* sum vert. thickness */
} if(j3==0){ j1+=x; continue; } /* no serif, skip this object */
y = y0+(j2+j3-1)/j3+(y1-y0+1)/32; /* y0 + mean thickness + dy/32 + 1 */
if (vvv&1)
fprintf(stderr, "\n# upper serif x0,y0,j1-x0+x,y-y0 %4d %4d %2d+%2d %2d",
x0,y0,j1-x0,x,y-y0);
/* check if really melted serifs */
if (loop(pp,j1,y,x1-x0,cs,0,RI)<1) { j1+=x; continue; }
if(num_cross(j1 ,j1+x,y,y,pp,cs) < 2 ){ j1+=x;continue; }
if (vvv&1)
fprintf(stderr, " ok1");
j2 = j1 + loop(pp,j1,y,x1-x0,cs,0,RI);
j2 = j2 + loop(pp,j2,y,x1-x0,cs,1,RI);
i3 = loop(pp,j2,y,x1-x0,cs,0,RI); if(i3<2){j1+=x;continue;}
j2 += i3/2;
j3 = j2 + loop(pp,j2,y ,x1-j2,cs,0,RI);
i3 = j2 + loop(pp,j2,y+1,x1-j2,cs,0,RI); if(i3>j3)j3=i3;
j3 = j3 + loop(pp,j3,y ,x1-j3,cs,1,RI);
i3 = loop(pp,j3,y ,x1-j3,cs,0,RI);
if(i3<2 || j3>=j1+x){j1+=x;continue;}
j3 += i3/2;
if(x>5)
{
i++; /* snip! */
for(y=0;y<(y1-y0+1+4)/8;y++)put(pp,j2,y0+y,255,128+64); /* clear highest bit */
if(vvv&4){
fprintf(stderr,"\n");
out_x(box2);
fprintf(stderr,"# melted serifs corrected on %d %d j1=%d j3=%d",
j2-x0, y, j1-x0, j3-x0);
// ToDo: vector cut with line from xa,ya to xb,yb
// two frames of double melted MN become one frame if cut one
// of the melted serifs (new function cut_frames_at_line())
}
for(xb=0,xa=0;xa<(x1-x0+4)/8;xa++){ /* detect vertical gap */
i3=y1;
if(box2->m3>y0 && 2*y1>box2->m3+box2->m4) i3=box2->m3; /* some IJ */
if( loop(pp,j2-xa,i3,i3-y0,cs,0,UP) > (y1-y0+1)/2
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,LE) >=xa ){ xb=-xa; break; }
if( loop(pp,j2+xa,i3,i3-y0,cs,0,UP) > (y1-y0+1)/2
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,RI) >=xa ){ xb= xa; break; }
}
if( get_bw(j2 ,j2 ,y0,(y0+y1)/2,pp,cs,1) == 0
&& get_bw(j2+xb,j2+xb,(y0+y1)/2,i3,pp,cs,1) == 0 )
{ /* divide */
box3=malloc_box(box2);
box3->x1=j2-1;
box2->x0=j2+1; x1=box2->x1;
cut_box(box2); /* cut vectors outside the box, see box.c */
cut_box(box3);
box3->num=JOB->res.numC;
list_ins(&(JOB->res.boxlist),box2,box3); JOB->res.numC++; ii++; /* insert box3 before box2 */
if(vvv&4) fprintf(stderr," => splitted");
j1=x0=box2->x0; x=0; /* hopefully ok, UVW */
}
}
j1+=x;
}
/* same on lower serifs -- change this later to better function
// #### ###
// #### v ### # <-y
// #################### <-y1
// j1 j2 j3
*/
for(j1=x0;j1<x1;){
j1+=loop(pp,j1,y1 ,x1-x0,cs,0,RI);
x =loop(pp,j1,y1 ,x1-x0,cs,1,RI); if(j1+x>x1+1) break;
y =loop(pp,j1,y1-1,x1-x0,cs,1,RI); if(y>x) x=y; if(j1+x>x1+1) break;
/* measure mean thickness of serif */
for(j2=j3=j4=0,i2=j1;i2<j1+x;i2++){
/* 2009-07: bug, j1 used instead of i2 */
i3 =loop(pp,i2,y1 ,y1-y0,cs,0,UP); if(8*i3>y1-y0) break;
i3+=loop(pp,i2,y1-i3,y1-y0,cs,1,UP); if(8*i3>y1-y0) continue;
if(8*i3<y1-y0){ j2+=i3; j3++; }
} if(j3==0){ j1+=x; continue; }
y = y1-(j2+j3-1)/j3-(y1-y0+1)/32;
if (vvv&1)
fprintf(stderr, "\n# lower serif x0,y0,j1-x0+x,y1-y %4d %4d %2d+%2d %2d",
x0,y0,j1-x0,x,y1-y);
/* check if really melted serifs */
if( loop(pp,j1,y,x1-x0,cs,0,RI)<1 ) { j1+=x; continue; }
if(num_cross(j1 ,j1+x,y,y,pp,cs) < 2 ){ j1+=x;continue; }
if (vvv&1) fprintf(stderr, " ok1");
j2 = j1 + loop(pp,j1,y,x1-x0,cs,0,RI);
j2 = j2 + loop(pp,j2,y,x1-x0,cs,1,RI);
i3 = loop(pp,j2,y,x1-x0,cs,0,RI); if(i3<2){j1+=x;continue;}
j2 += i3/2;
j3 = j2 + loop(pp,j2,y ,x1-j2,cs,0,RI);
i3 = j2 + loop(pp,j2,y-1,x1-j2,cs,0,RI); if(i3>j3)j3=i3;
j3 = j3 + loop(pp,j3,y ,x1-j3,cs,1,RI);
i3 = loop(pp,j3,y,x1-j3,cs,0,RI);
if(i3<2 || j3>=j1+x){j1+=x;continue;}
j3 += i3/2;
/* y =y1-(y1-y0+1+4)/8; */
if(x>5)
{
i++; /* snip! */
for(i3=0;i3<(y1-y0+1+4)/8;i3++)
put(pp,j2,y1-i3,255,128+64); /* clear highest bit */
if(vvv&4){
fprintf(stderr,"\n");
out_x(box2);
fprintf(stderr,"# melted serifs corrected on %d %d j1=%d j3=%d",j2-x0,y-y0,j1-x0,j3-x0);
}
for(xb=0,xa=0;xa<(x1-x0+4)/8;xa++){ /* detect vertical gap */
if( loop(pp,j2-xa,y0,y1-y0,cs,0,DO) > (y1-y0+1)/2
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,LE) >=xa ){ xb=-xa; break; }
if( loop(pp,j2+xa,y0,y1-y0,cs,0,DO) > (y1-y0+1)/2
&& loop(pp,j2,(y0+y1)/2,xa+1,cs,0,RI) >=xa ){ xb= xa; break; }
}
if( get_bw(j2 ,j2 ,(y0+y1)/2,y1,pp,cs,1) == 0
&& get_bw(j2+xb,j2+xb,y0,(y0+y1)/2,pp,cs,1) == 0 )
{ /* divide */
box3=malloc_box(box2);
box3->x1=j2-1;
box2->x0=j2; x1=box2->x1;
cut_box(box2); /* cut vectors outside the box */
cut_box(box3);
box3->num=JOB->res.numC;
list_ins(&(JOB->res.boxlist),box2,box3); JOB->res.numC++; ii++;
/* box3,box2 in correct order??? */
if(vvv&4) fprintf(stderr," => splitted");
j1=x0=box2->x0; x=0; /* hopefully ok, NMK */
}
}
j1+=x;
}
progress(ni++,pc);
} end_for_each(&(JOB->res.boxlist));
close_progress(pc);
if(vvv)fprintf(stderr," %3d cluster corrected, %d new boxes\n",i,ii);
return 0;
}
/* remove black borders often seen on bad scanned copies of books
- dust around the border
*/
int remove_rest_of_dust() {
int i1, i2, vvv = JOB->cfg.verbose, x0, x1, y0, y1, cnt=0;
struct box *box2, *box4;
progress_counter_t *pc = NULL;
i1 = i2 = 0; /* counter for removed boxes */
if (vvv)
fprintf(stderr, "# detect dust (avX,nC), ... ");
/* remove fragments from border */
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (box2->c == UNKNOWN) {
x0 = box2->x0; x1 = box2->x1;
y0 = box2->y0; y1 = box2->y1; /* box */
/* box in char ??? */
if ( 2 * JOB->res.numC * (y1 - y0 + 1) < 3 * JOB->res.sumY
&& ( y1 < box2->p->y/4 || y0 > 3*box2->p->y/4 ) /* not single line */
&& JOB->res.numC > 1 /* do not remove everything */
&& ( box2->m4 == 0 ) ) /* remove this */
{
JOB->res.numC--; /* ToDo: dont count tiny pixels */
/* ToDo: res.sumX,Y must also be corrected */
i1++;
list_del(&(JOB->res.boxlist), box2);
free_box(box2);
}
}
} end_for_each(&(JOB->res.boxlist));
pc = open_progress(JOB->res.boxlist.n,"remove_dust2");
for_each_data(&(JOB->res.boxlist)) {
box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
progress(cnt++,pc);
if (box2->c == PICTURE) continue;
x0 = box2->x0; x1 = box2->x1;
y0 = box2->y0; y1 = box2->y1; /* box */
/* remove tiny box2 if to far away from bigger boxes */
/* ToDo: remove clouds of tiny pixels (count near small, compare with num bigger) */
/* 0.42: remove far away pixel? ToDo: do it at earlier? */
if (x1-x0+1<3 && y1-y0+1<3){
int xn, yn, xs, ys;
int found=0; /* nearest bigger box */
/* search near bigger box */
for_each_data(&(JOB->res.boxlist)) {
box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
if (found || box4 == box2) continue;
if (box4->x1-box4->x0+1<3 && box4->y1-box4->y0+1<3) continue;
xs = box4->x1-box4->x0+1;
ys = box4->y1-box4->y0+1;
xn = abs((box4->x0+box4->x1)/2 - box2->x0);
yn = abs((box4->y0+box4->y1)/2 - box2->y0);
if (2*xn < 3*xs && 2*yn < 3*ys) { found=1; }
} end_for_each(&(JOB->res.boxlist));
if (!found) { /* found nothing, box2 to far from big boxes */
i2++;
list_del(&(JOB->res.boxlist), box2);
free_box(box2);
}
}
} end_for_each(&(JOB->res.boxlist));
close_progress(pc);
if (vvv)
fprintf(stderr, " %3d + %3d boxes deleted, nC= %d ?\n",
i1, i2, JOB->res.numC);
return 0;
}

View File

@@ -0,0 +1,87 @@
/*
This is a Optical-Character-Recognition program
Copyright (C) 1999 Joerg Schulenburg
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
see README for EMAIL-address
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "tga.h"
typedef unsigned char byte;
// --- needed for reading TGA-files
#if 0
char read_b(FILE *f1){ // filter #-comments
char c;
c=fgetc(f1); assert(!feof(f1)); assert(!ferror(f1));
return c;
}
#endif
//byte tga[18]={ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,24,32};
/* header_hex= 00 00 02 00 00 00 00 00 00 00 00 00 xl xh yl yh
* 18 20 -- -- -- -- -- -- -- -- -- -- -- -- -- -- */
void readtga(char *name,pix *p,int mode){ // see pcx.format.txt
// mode: 0=gray,1=RGB
int nx,ny,i,x,y;
FILE *f1;
unsigned char *pic,h[18];
f1=fopen(name,"rb"); if(!f1) fprintf(stderr," error opening file\n");
assert(f1); // open-error
assert(fread(h,1,18,f1)==18); /* 18 Byte lesen -> h[] */
assert(h[ 0]== 0); // TGA0
assert(h[ 1]== 0); // TGA1
assert(h[ 2]== 2); // TGA2 no run length encoding
for(i=3;i<12;i++)
assert(h[ i]== 0); // ???
assert(h[16]==0x18); // TGA16
assert(h[17]==0x20); // TGA17
nx = h[12] + (h[13]<<8); /* x-dimension low high */
ny = h[14] + (h[15]<<8); /* y-dimension low high */
fprintf(stderr,"# TGA version=%d x=%d y=%d", h[2],nx,ny );
fflush(stdout);
pic=(unsigned char *)malloc( 3*nx*ny );
assert(pic!=NULL); // no memory
assert(ny==(int)fread(pic,3*nx,ny,f1)); // read all lines BGR
if(mode==0)
{
for(y=0;y<ny;y++) /* BGR => gray */
for(x=0;x<nx;x++)
{ i=x+y*nx; pic[i]=(pic[i*3+0]+pic[i*3+1]+pic[i*3+2])/3; }
}
else
if(mode==1)
{
byte b;
for(y=0;y<ny;y++) /* BGR => RGB */
for(x=0;x<nx;x++)
{ i=x+y*nx; b=pic[i*3+0]; pic[i*3+0]=pic[i*3+2]; pic[i*3+2]=b; }
}
else assert(0); // wrong mode
fclose(f1);
p->p=pic; p->x=nx; p->y=ny; p->bpp=1+2*mode;
fprintf(stderr," mode=%d\n",mode);
}
// ------------------------------------------------------------------------

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,206 @@
<?xml version="1.0" encoding="windows-1251"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9,00"
Name="PNM"
ProjectGUID="{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}"
Keyword="AtlProj"
TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/PNM.tlb"
HeaderFileName="PNM.h"
DLLDataFileName=""
InterfaceIdentifierFileName="PNM_i.c"
ProxyFileName="PNM_p.c"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="headers"
PreprocessorDefinitions="WIN32;_DEBUG"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
RuntimeTypeInfo="false"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Debug/pnm.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/PNM.tlb"
HeaderFileName="PNM.h"
DLLDataFileName=""
InterfaceIdentifierFileName="PNM_i.c"
ProxyFileName="PNM_p.c"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Release/pnm.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
Description=""
CommandLine=""
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\src\ImageToPNM.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\headers\ImageToPNM.h"
>
</File>
<File
RelativePath=".\headers\pm_c_util.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,357 @@
<?xml version="1.0" encoding="windows-1251"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8,00"
Name="PNM"
ProjectGUID="{56BDD4BE-4F4B-458C-BAA4-5E058BE94E60}"
RootNamespace="PNM"
Keyword="AtlProj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="Debug"
IntermediateDirectory="Debug"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/PNM.tlb"
HeaderFileName="PNM.h"
DLLDataFileName=""
InterfaceIdentifierFileName="PNM_i.c"
ProxyFileName="PNM_p.c"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="headers"
PreprocessorDefinitions="WIN32;_DEBUG"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
RuntimeTypeInfo="false"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Debug/pnm.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="Release"
IntermediateDirectory="Release"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/PNM.tlb"
HeaderFileName="PNM.h"
DLLDataFileName=""
InterfaceIdentifierFileName="PNM_i.c"
ProxyFileName="PNM_p.c"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Release/pnm.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="ReleaseASC|Win32"
OutputDirectory="ReleaseASC"
IntermediateDirectory="ReleaseASC"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/PNM.tlb"
HeaderFileName="PNM.h"
DLLDataFileName=""
InterfaceIdentifierFileName="PNM_i.c"
ProxyFileName="PNM_p.c"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Release/pnm.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG; ASCBUILD"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
<Configuration
Name="ReleaseOpenSource|Win32"
OutputDirectory="$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="4"
UseOfATL="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="false"
TargetEnvironment="1"
GenerateStublessProxies="true"
TypeLibraryName="$(IntDir)/PNM.tlb"
HeaderFileName="PNM.h"
DLLDataFileName=""
InterfaceIdentifierFileName="PNM_i.c"
ProxyFileName="PNM_p.c"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="headers"
PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_ATL_ATTRIBUTES;BUILD_CONFIG_OPENSOURCE_VERSION"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\$(ConfigurationName)/pnm.pch"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1049"
AdditionalIncludeDirectories="$(IntDir)"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
CommandLine=""
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\src\ImageToPNM.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\headers\ImageToPNM.h"
>
</File>
<File
RelativePath=".\headers\pm_c_util.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@@ -0,0 +1,3 @@
#pragma once
char* convertToPNM ( unsigned char* const Image, unsigned long Width, unsigned long Height, unsigned long BitCount, bool ColorPresent, bool GrayPresent, long &Count );

View File

@@ -0,0 +1,97 @@
#ifndef PM_C_UTIL_INCLUDED
#define PM_C_UTIL_INCLUDED
/* Magic constants. */
#define PPM_MAGIC1 'P'
#define PPM_MAGIC2 '3'
#define RPPM_MAGIC2 '6'
#define PPM_FORMAT (PPM_MAGIC1 * 256 + PPM_MAGIC2)
#define RPPM_FORMAT (PPM_MAGIC1 * 256 + RPPM_MAGIC2)
#define PPM_TYPE PPM_FORMAT
/* Magic constants. */
#define PBM_MAGIC1 'P'
#define PBM_MAGIC2 '1'
#define RPBM_MAGIC2 '4'
#define PBM_FORMAT (PBM_MAGIC1 * 256 + PBM_MAGIC2)
#define RPBM_FORMAT (PBM_MAGIC1 * 256 + RPBM_MAGIC2)
#define PBM_TYPE PBM_FORMAT
/* Magic constants. */
#define PGM_MAGIC1 'P'
#define PGM_MAGIC2 '2'
#define RPGM_MAGIC2 '5'
#define PGM_FORMAT (PGM_MAGIC1 * 256 + PGM_MAGIC2)
#define RPGM_FORMAT (PGM_MAGIC1 * 256 + RPGM_MAGIC2)
#define PGM_TYPE PGM_FORMAT
#define PPM_ASSIGN(p,red,grn,blu) \
do { (p).r = (red); (p).g = (grn); (p).b = (blu); } while (0)
#define PNM_ASSIGN(x,r,g,b) PPM_ASSIGN(x,r,g,b)
/* Macro for turning a format number into a type number. */
#define PBM_FORMAT_TYPE(f) \
((f) == PBM_FORMAT || (f) == RPBM_FORMAT ? PBM_TYPE : -1)
/* Macro for turning a format number into a type number. */
#define PGM_FORMAT_TYPE(f) ((f) == PGM_FORMAT || (f) == RPGM_FORMAT ? PGM_TYPE : PBM_FORMAT_TYPE(f))
/* Macro for turning a format number into a type number. */
#define PPM_FORMAT_TYPE(f) \
((f) == PPM_FORMAT || (f) == RPPM_FORMAT ? PPM_TYPE : PGM_FORMAT_TYPE(f))
#define PNM_FORMAT_TYPE(f) PPM_FORMAT_TYPE(f)
#define pbm_packed_bytes(cols) (((cols)+7)/8)
typedef unsigned int gray;
typedef gray pixval;
typedef struct {
pixval r, g, b;
} pixel;
#define PPM_GETR(p) ((p).r)
#define PPM_GETG(p) ((p).g)
#define PPM_GETB(p) ((p).b)
typedef pixel xel;
typedef pixval xelval;
#define PNM_GET1(x) PPM_GETB(x)
typedef unsigned char bit;
#define PBM_WHITE 0
#define PBM_BLACK 1
/* NOTE: do not use "bool" as a type in an external interface. It could
have different definitions on either side of the interface. Even if both
sides include this interface header file, the conditional compilation
here means one side may use the typedef below and the other side may
use some other definition. For an external interface, be safe and just
use "int".
*/
/* We used to assume that if TRUE was defined, then bool was too.
However, we had a report on 2001.09.21 of a Tru64 system that had
TRUE but not bool and on 2002.03.21 of an AIX 4.3 system that was
likewise. So now we define bool all the time, unless the macro
HAVE_BOOL is defined. If someone is using the Netpbm libraries and
also another library that defines bool, he can either make the
other library define/respect HAVE_BOOL or just define HAVE_BOOL in
the file that includes pm_config.h or with a compiler option. Note
that C++ always has bool.
A preferred way of getting booleans is <stdbool.h>. But it's not
available on all platforms, and it's easy to reproduce what it does
here.
*/
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#endif

View File

@@ -0,0 +1,781 @@
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "pm_c_util.h"
#include "ImageToPNM.h"
static xelval const bmpMaxval = 255;
/* The maxval for intensity values in a BMP image -- either in a
truecolor raster or in a colormap
*/
/* MAXCOLORS is the maximum size of a color map in a BMP image */
#define MAXCOLORS 256
enum rowOrder {BOTTOMUP, TOPDOWN};
struct bitPosition {
/* mask and shift count to describe a set of bits in a binary value.
Example: if 16 bits are laid out as XRRRRRGGGGGBBBBB then the shift
count for the R component is 10 and the mask is 0000000000011111.
*/
unsigned int shift;
/* How many bits right you have to shift the value to get the subject
bits in the least significant bit positions.
*/
unsigned int mask;
/* Has one bits in positions where the subject bits are after
shifting.
*/
};
struct pixelformat {
/* The format of a pixel representation from the raster. i.e. which
bits apply to red, green, blue, and transparency
*/
struct bitPosition red;
struct bitPosition blu;
struct bitPosition grn;
struct bitPosition trn;
bool conventionalBgr;
/* This means that the above bit positions are just the conventional
BGR format -- one byte Blue, one byte Green, one byte Red,
no alpha. Though it's totally redundant with the members above,
this member speeds up computation: We've never actually seen
a BMP file that doesn't use conventional BGR, and it doesn't
require any masking or shifting at all to interpret.
*/
};
/* append a string (s1) to the string buffer (buffer) of length (len)
* if buffer is to small or len==0 realloc buffer, len+=512
*/
char *append_to_buffer(char *&buffer, const char *s1, long *lenbuffer, long lenline, long *pos) {
char *temp;
int slen=*pos;
if( s1==NULL )
{
return buffer;
}
if ( slen+lenline >= *lenbuffer ) {
if(lenline<=512)
*lenbuffer+=512;
else
*lenbuffer+=lenline;
temp = (char *)realloc(buffer, *lenbuffer);
if( !temp ) { *lenbuffer-=512; return buffer; }
else buffer = temp; // buffer successfull enlarged
}
temp = buffer + slen; // end of buffered string
memcpy(temp,s1,lenline); // copy including end sign '\0'
*pos += lenline;
return buffer;
}
/* append a string (s1) to the string buffer (buffer) of length (len)
* if buffer is to small or len==0 realloc buffer, len+=512
*/
unsigned char *append_to_buffer(unsigned char *&buffer, const unsigned char *s1, long *lenbuffer, long lenline, long *pos) {
unsigned char *temp;
int slen=*pos;
if( s1==NULL )
{
return buffer;
}
if ( slen+lenline >= *lenbuffer ) {
if(lenline<=512)
*lenbuffer+=512;
else
*lenbuffer+=lenline;
temp = (unsigned char *)realloc(buffer, *lenbuffer);
if( !temp ) { *lenbuffer-=512; return buffer; }
else buffer = temp; // buffer successfull enlarged
}
temp = buffer + slen; // end of buffered string
memcpy(temp,s1,lenline); // copy including end sign '\0'
*pos += lenline;
return buffer;
}
static struct pixelformat
defaultPixelformat(unsigned int const bitCount) {
struct pixelformat retval = { 0 };
switch (bitCount) {
case 16:
retval.conventionalBgr = FALSE;
retval.red.shift = 10;
retval.grn.shift = 5;
retval.blu.shift = 0;
retval.trn.shift = 0;
retval.red.mask = 0x1f; /* 5 bits */
retval.grn.mask = 0x1f; /* 5 bits */
retval.blu.mask = 0x1f; /* 5 bits */
retval.trn.mask = 0;
break;
case 24:
case 32:
retval.conventionalBgr = TRUE;
retval.red.shift = 16;
retval.grn.shift = 8;
retval.blu.shift = 0;
retval.trn.shift = 0;
retval.red.mask = 0xff; /* 8 bits */
retval.grn.mask = 0xff; /* 8 bits */
retval.blu.mask = 0xff; /* 8 bits */
retval.trn.mask = 0;
break;
default:
/* colormapped - masks are undefined */
break;
}
return retval;
}
static void
extractBitFields(unsigned int const rasterval,
struct pixelformat const pixelformat,
pixval const maxval,
pixval * const rP,
pixval * const gP,
pixval * const bP,
pixval * const aP) {
unsigned int const rbits =
(rasterval >> pixelformat.red.shift) & pixelformat.red.mask;
unsigned int const gbits =
(rasterval >> pixelformat.grn.shift) & pixelformat.grn.mask;
unsigned int const bbits =
(rasterval >> pixelformat.blu.shift) & pixelformat.blu.mask;
unsigned int const abits =
(rasterval >> pixelformat.trn.shift) & pixelformat.trn.mask;
*rP = pixelformat.red.mask ?
(unsigned int) rbits * maxval / pixelformat.red.mask : 0;
*gP = pixelformat.grn.mask ?
(unsigned int) gbits * maxval / pixelformat.grn.mask : 0;
*bP = pixelformat.blu.mask ?
(unsigned int) bbits * maxval / pixelformat.blu.mask : 0;
*aP = pixelformat.trn.mask ?
(unsigned int) abits * maxval / pixelformat.trn.mask : 0;
}
static void
convertRow16(unsigned char const bmprow[],
xel xelrow[],
int const cols,
struct pixelformat const pixelformat) {
/* It's truecolor. */
unsigned int col;
unsigned int cursor;
cursor = 0;
for (col=0; col < cols; ++col) {
unsigned short const rasterval = (unsigned short)
bmprow[cursor+1] << 8 | bmprow[cursor+0];
pixval r, g, b, a;
extractBitFields(rasterval, pixelformat, 255, &r, &g, &b, &a);
PNM_ASSIGN(xelrow[col], r, g, b);
cursor += 2;
}
}
static void
convertRow24(unsigned char const bmprow[],
xel xelrow[],
int const cols,
struct pixelformat const pixelformat) {
/* It's truecolor */
/* There is a document that gives a much different format for
24 bit BMPs. But this seems to be the de facto standard, and is,
with a little ambiguity and contradiction resolved, defined in the
Microsoft BMP spec.
*/
unsigned int col;
unsigned int cursor;
cursor = 0;
for (col = 0; col < cols; ++col) {
pixval r, g, b, a;
if (pixelformat.conventionalBgr) {
r = bmprow[cursor+2];
g = bmprow[cursor+1];
b = bmprow[cursor+0];
a = 0;
} else {
unsigned int const rasterval =
(bmprow[cursor+0] << 16) +
(bmprow[cursor+1] << 8) +
(bmprow[cursor+2] << 0);
extractBitFields(rasterval, pixelformat, 255, &r, &g, &b, &a);
}
PNM_ASSIGN(xelrow[col], r, g, b);
cursor += 3;
}
}
static void
convertRow32(unsigned char const bmprow[],
xel xelrow[],
int const cols,
struct pixelformat const pixelformat) {
/* It's truecolor */
unsigned int col;
unsigned int cursor;
cursor = 0;
for (col = 0; col < cols; ++col) {
pixval r, g, b, a;
if (pixelformat.conventionalBgr) {
/* bmprow[cursor+3] is just padding */
r = bmprow[cursor+2];
g = bmprow[cursor+1];
b = bmprow[cursor+0];
a = 0;
} else {
unsigned int const rasterval =
(bmprow[cursor+0] << 24) +
(bmprow[cursor+1] << 16) +
(bmprow[cursor+2] << 8) +
(bmprow[cursor+3] << 0);
extractBitFields(rasterval, pixelformat, 255, &r, &g, &b, &a);
}
PNM_ASSIGN(xelrow[col],
bmprow[cursor+2], bmprow[cursor+1], bmprow[cursor+0]);
cursor += 4;
}
}
static void
convertRow(unsigned char const bmprow[],
xel xelrow[],
int const cols,
unsigned int const cBitCount,
struct pixelformat const pixelformat,
xel const colormap[]
) {
/*----------------------------------------------------------------------------
Convert a row in raw BMP raster format bmprow[] to a row of xels xelrow[].
Use maxval 255 for the output xels.
The BMP image has 'cBitCount' bits per pixel.
If the image is colormapped, colormap[] is the colormap
(colormap[i] is the color with color index i).
-----------------------------------------------------------------------------*/
if (cBitCount == 24)
convertRow24(bmprow, xelrow, cols, pixelformat);
else if (cBitCount == 16)
convertRow16(bmprow, xelrow, cols, pixelformat);
else if (cBitCount == 32)
convertRow32(bmprow, xelrow, cols, pixelformat);
else if (cBitCount == 8) {
/* It's a whole byte colormap index */
unsigned int col;
for (col = 0; col < cols; ++col)
xelrow[col] = colormap[bmprow[col]];
} else if (cBitCount < 8) {
/* It's a bit field color index */
unsigned char const mask = ( 1 << cBitCount ) - 1;
unsigned int col;
for (col = 0; col < cols; ++col) {
unsigned int const cursor = (col*cBitCount)/8;
unsigned int const shift = 8 - ((col*cBitCount) % 8) - cBitCount;
unsigned int const index =
(bmprow[cursor] & (mask << shift)) >> shift;
xelrow[col] = colormap[index];
}
}
}
static void
format1bpsRow(const pixel * const pixelrow,
unsigned int const cols,
unsigned char * const rowBuffer) {
/* single byte samples. */
unsigned int col;
unsigned int bufferCursor;
bufferCursor = 0;
for (col = 0; col < cols; ++col) {
rowBuffer[bufferCursor++] = PPM_GETR(pixelrow[col]);
rowBuffer[bufferCursor++] = PPM_GETG(pixelrow[col]);
rowBuffer[bufferCursor++] = PPM_GETB(pixelrow[col]);
}
}
static void
format1bpsRow(const gray * const grayrow,
unsigned int const cols,
unsigned char * const rowBuffer) {
/* single byte samples. */
unsigned int col;
unsigned int bufferCursor;
bufferCursor = 0;
for (col = 0; col < cols; ++col)
rowBuffer[bufferCursor++] = grayrow[col];
}
static void
format2bpsRow(const pixel * const pixelrow,
unsigned int const cols,
unsigned char * const rowBuffer) {
/* two byte samples. */
unsigned int col;
unsigned int bufferCursor;
bufferCursor = 0;
for (col = 0; col < cols; ++col) {
pixval const r = PPM_GETR(pixelrow[col]);
pixval const g = PPM_GETG(pixelrow[col]);
pixval const b = PPM_GETB(pixelrow[col]);
rowBuffer[bufferCursor++] = r >> 8;
rowBuffer[bufferCursor++] = (unsigned char)r;
rowBuffer[bufferCursor++] = g >> 8;
rowBuffer[bufferCursor++] = (unsigned char)g;
rowBuffer[bufferCursor++] = b >> 8;
rowBuffer[bufferCursor++] = (unsigned char)b;
}
}
static void
format2bpsRow(const gray * const grayrow,
unsigned int const cols,
unsigned char * const rowBuffer) {
/* two byte samples. */
unsigned int col;
unsigned int bufferCursor;
bufferCursor = 0;
for (col = 0; col < cols; ++col) {
gray const val = grayrow[col];
rowBuffer[bufferCursor++] = val >> 8;
rowBuffer[bufferCursor++] = (unsigned char) val;
}
}
static void
ppm_writeppmrowraw(unsigned char *& PNMImage,
long * pos,
long * lenbuffer,
const pixel * const pixelrow,
unsigned int const cols,
pixval const maxval ) {
unsigned int const bytesPerSample = maxval < 256 ? 1 : 2;
unsigned int const bytesPerRow = cols * 3 * bytesPerSample;
unsigned char * rowBuffer;
rowBuffer = (unsigned char *)malloc(bytesPerRow);
if (rowBuffer == NULL)
return;
if (maxval < 256)
format1bpsRow(pixelrow, cols, rowBuffer);
else
format2bpsRow(pixelrow, cols, rowBuffer);
PNMImage = append_to_buffer ( PNMImage, rowBuffer, lenbuffer, bytesPerRow, pos );
free(rowBuffer);
}
static void
writepgmrowraw(unsigned char *& PNMImage,
long * pos,
long * lenbuffer,
const gray * const grayrow,
unsigned int const cols,
gray const maxval) {
unsigned int const bytesPerSample = maxval < 256 ? 1 : 2;
unsigned int const bytesPerRow = cols * bytesPerSample;
unsigned char * rowBuffer;
rowBuffer = (unsigned char *)malloc(bytesPerRow);
if (rowBuffer == NULL)
return;
if (maxval < 256)
format1bpsRow(grayrow, cols, rowBuffer);
else
format2bpsRow(grayrow, cols, rowBuffer);
PNMImage = append_to_buffer ( PNMImage, rowBuffer, lenbuffer, bytesPerRow, pos );
free(rowBuffer);
}
static void
writePbmRowPlain(char *& PNMImage,
long * pos,
long * lenbuffer,
bit * const bitrow,
int const cols) {
int col, charcount;
charcount = 0;
for (col = 0; col < cols; ++col) {
if (charcount >= 70)
{
PNMImage = append_to_buffer ( PNMImage, "\n", lenbuffer, 1, pos );
charcount = 0;
}
PNMImage = append_to_buffer ( PNMImage, bitrow[col] ? "1" : "0", lenbuffer, 1, pos );
++charcount;
}
PNMImage = append_to_buffer ( PNMImage, "\n", lenbuffer, 1, pos );
}
void
ppm_writeppmrow(unsigned char *& PNMImage,
long * pos,
long * lenbuffer,
pixel * const pixelrow,
int const cols,
pixval const maxval) {
ppm_writeppmrowraw(PNMImage, pos, lenbuffer, pixelrow, cols, maxval);
}
void
pgm_writepgmrow(unsigned char *& PNMImage,
long * pos,
long * lenbuffer,
const gray * const grayrow,
int const cols,
gray const maxval) {
writepgmrowraw(PNMImage, pos, lenbuffer, grayrow, cols, maxval);
}
void
pbm_writepbmrow(char *& PNMImage,
long * pos,
long * lenbuffer,
bit * const bitrow,
int const cols) {
writePbmRowPlain(PNMImage, pos, lenbuffer, bitrow, cols);
}
void
pnm_writepnmrow(char *& PNMImage,
long * pos,
long * lenbuffer,
xel * const xelrow,
int const cols,
xelval const maxval,
int const format) {
switch (PNM_FORMAT_TYPE(format)) {
case PPM_TYPE:
ppm_writeppmrow((unsigned char*&)PNMImage, pos, lenbuffer, (pixel*) xelrow, cols, (pixval) maxval);
break;
case PGM_TYPE: {
gray* grayrow;
unsigned int col;
grayrow = (gray*)malloc(sizeof (gray) * cols);
for (col = 0; col < cols; ++col)
grayrow[col] = PNM_GET1(xelrow[col]);
pgm_writepgmrow((unsigned char*&)PNMImage, pos, lenbuffer, grayrow, cols, (gray) maxval);
free( grayrow );
}
break;
case PBM_TYPE: {
bit* bitrow;
unsigned int col;
bitrow = (bit*)malloc(sizeof(bit) * cols);
for (col = 0; col < cols; ++col)
bitrow[col] = PNM_GET1(xelrow[col]) == 0 ? PBM_BLACK : PBM_WHITE;
pbm_writepbmrow(PNMImage, pos, lenbuffer, bitrow, cols);
free(bitrow);
}
break;
}
}
void
ppm_writeppminit(char *& PNMImage,
long * lenbuffer,
long * pos,
int const cols,
int const rows,
pixval const maxval) {
char pBuf[256] = {0};
long lLength = 0;
sprintf(pBuf, "%c%c\n%d %d\n%d\n",
PPM_MAGIC1, 0 || maxval >= 1<<16 ? PPM_MAGIC2 : RPPM_MAGIC2,
cols, rows, maxval );
lLength = strlen(pBuf);
PNMImage = append_to_buffer(PNMImage, pBuf, lenbuffer, lLength, pos);
}
void
pgm_writepgminit(char *& PNMImage,
long * lenbuffer,
long * pos,
int const cols,
int const rows,
gray const maxval) {
char pBuf[256] = {0};
long lLength = 0;
sprintf(pBuf, "%c%c\n%d %d\n%d\n",
PGM_MAGIC1,
maxval >= 1<<16 ? PGM_MAGIC2 : RPGM_MAGIC2,
cols, rows, maxval );
lLength = strlen(pBuf);
PNMImage = append_to_buffer(PNMImage, pBuf, lenbuffer, lLength, pos);
}
void
pbm_writepbminit(char *& PNMImage,
long * lenbuffer,
long * pos,
int const cols,
int const rows) {
char pBuf[256] = {0};
long lLength = 0;
sprintf(pBuf, "%c%c\n%d %d\n", PBM_MAGIC1, RPBM_MAGIC2, cols, rows);
lLength = strlen(pBuf);
PNMImage = append_to_buffer(PNMImage, pBuf, lenbuffer, lLength, pos);
}
void
pnm_writepnminit(char *& PNMImage,
long * lenbuffer,
long * pos,
int const cols,
int const rows,
xelval const maxval,
int const format) {
switch (PNM_FORMAT_TYPE(format)) {
case PPM_TYPE:
ppm_writeppminit(PNMImage, lenbuffer, pos, cols, rows, (pixval) maxval);
break;
case PGM_TYPE:
pgm_writepgminit(PNMImage, lenbuffer, pos, cols, rows, (gray) maxval);
break;
case PBM_TYPE:
pbm_writepbminit(PNMImage, lenbuffer, pos, cols, rows);
break;
}
}
static void
writePackedRawRow(unsigned char *& PNMImage,
long * lenbuffer,
long * pos,
const unsigned char * const packed_bits,
int const cols) {
PNMImage = append_to_buffer(PNMImage, packed_bits, lenbuffer, pbm_packed_bytes(cols), pos);
}
void
pbm_writepbmrow_packed(unsigned char *& PNMImage,
long * lenbuffer,
long * pos,
const unsigned char * const packed_bits,
int const cols) {
writePackedRawRow(PNMImage, lenbuffer, pos, packed_bits, cols);
}
static void
writeRasterPbm(unsigned char * const BMPraster,
int const cols,
int const rows,
xel const colormap[],
char *& PNMImage,
long * lenbuffer,
long * pos) {
/*----------------------------------------------------------------------------
Write the PBM raster to Standard Output corresponding to the raw BMP
raster BMPraster. Write the raster assuming the PBM image has
dimensions 'cols' by 'rows'.
The BMP image has 'cBitCount' bits per pixel, arranged in format
'pixelformat'.
The image must be colormapped; colormap[] is the colormap
(colormap[i] is the color with color index i). We cannot handle the
abnormal case in which colormap[0] and colormap[1] have the same
value (i.e. both white or both black.)
We destroy *BMPraster as a side effect.
-----------------------------------------------------------------------------*/
unsigned int const charBits = (sizeof(unsigned char) * 8);
/* Number of bits in a character */
unsigned int const colChars = pbm_packed_bytes(cols);
int row;
enum colorFormat {BlackWhite, WhiteBlack};
enum colorFormat colorformat;
if (PPM_GETR(colormap[0]) > 0)
colorformat = WhiteBlack;
else
colorformat = BlackWhite;
for (row=0; row < rows; ++row){
unsigned char * const bitrow = BMPraster + ( 4 * row * cols );
if (colorformat == BlackWhite) {
unsigned int i;
for (i = 0; i < colChars; ++i)
bitrow[i] = ~bitrow[i]; /* flip all pixels */
}
if (cols % 8 > 0) {
/* adjust final partial byte */
bitrow[colChars-1] >>= charBits - cols % charBits;
bitrow[colChars-1] <<= charBits - cols % charBits;
}
pbm_writepbmrow_packed(( unsigned char *& )PNMImage, lenbuffer, pos, bitrow, cols);
}
}
static void
writeRasterGen(unsigned char * const BMPraster,
int const cols,
int const rows,
int const format,
unsigned int const cBitCount,
struct pixelformat const pixelformat,
xel const colormap[],
char *& PNMImage,
long * lenbuffer,
long * pos) {
/*----------------------------------------------------------------------------
Write the PNM raster to Standard Output, corresponding to the raw BMP
raster BMPraster. Write the raster assuming the PNM image has
dimensions 'cols' by 'rows' and format 'format', with maxval 255.
The BMP image has 'cBitCount' bits per pixel, arranged in format
'pixelformat'.
If the image is colormapped, colormap[] is the colormap
(colormap[i] is the color with color index i).
writeRasterPbm() is faster for a PBM image.
-----------------------------------------------------------------------------*/
xel * xelrow;
unsigned int row;
xelrow = (xel *)malloc(sizeof (xel) * cols);
for (row = 0; row < rows; ++row)
{
convertRow(BMPraster + ( 4 * row * cols ), xelrow, cols, cBitCount, pixelformat, colormap);
pnm_writepnmrow(PNMImage, pos, lenbuffer, xelrow, cols, bmpMaxval, format);
}
free(xelrow);
}
char* convertToPNM ( unsigned char * const Image, unsigned long Width, unsigned long Height, unsigned long BitCount, bool ColorPresent, bool GrayPresent, long &Count )
{
long lPos = 0;
long lLength = Width*Height;
char *pPNMTmp = (char *)malloc(lLength);
/* Format of the raster bits for a single pixel */
struct pixelformat pixelformat;
pixelformat = defaultPixelformat ( BitCount );
/* Malloc'ed colormap (palette) from the BMP. Contents of map
undefined if not a colormapped BMP.
*/
xel * colormap = { 0 };
int outputType;
if (ColorPresent)
outputType = PPM_TYPE;
else if (GrayPresent)
outputType = PGM_TYPE;
else
outputType = PBM_TYPE;
if (outputType == PBM_TYPE && BitCount == 1)
{
pbm_writepbminit(pPNMTmp, &lLength, &lPos, Width, Height);
writeRasterPbm(Image, Width, Height, colormap, pPNMTmp, &lLength, &lPos);
}
else
{
pnm_writepnminit(pPNMTmp, &lLength, &lPos, Width, Height, bmpMaxval, outputType);
writeRasterGen(Image, Width, Height, outputType, BitCount, pixelformat, colormap, pPNMTmp, &lLength, &lPos);
}
if (0 >= lPos)
{
free (pPNMTmp);
return NULL;
}
Count = lPos;
char *pPNM = (char *)malloc(Count);
memcpy(pPNM, pPNMTmp, Count);
free (pPNMTmp);
return pPNM;
}

Binary file not shown.

View File

@@ -0,0 +1,855 @@
ChangeLog file for zlib
Changes in 1.2.3 (18 July 2005)
- Apply security vulnerability fixes to contrib/infback9 as well
- Clean up some text files (carriage returns, trailing space)
- Update testzlib, vstudio, masmx64, and masmx86 in contrib [Vollant]
Changes in 1.2.2.4 (11 July 2005)
- Add inflatePrime() function for starting inflation at bit boundary
- Avoid some Visual C warnings in deflate.c
- Avoid more silly Visual C warnings in inflate.c and inftrees.c for 64-bit
compile
- Fix some spelling errors in comments [Betts]
- Correct inflateInit2() error return documentation in zlib.h
- Added zran.c example of compressed data random access to examples
directory, shows use of inflatePrime()
- Fix cast for assignments to strm->state in inflate.c and infback.c
- Fix zlibCompileFlags() in zutil.c to use 1L for long shifts [Oberhumer]
- Move declarations of gf2 functions to right place in crc32.c [Oberhumer]
- Add cast in trees.c t avoid a warning [Oberhumer]
- Avoid some warnings in fitblk.c, gun.c, gzjoin.c in examples [Oberhumer]
- Update make_vms.com [Zinser]
- Initialize state->write in inflateReset() since copied in inflate_fast()
- Be more strict on incomplete code sets in inflate_table() and increase
ENOUGH and MAXD -- this repairs a possible security vulnerability for
invalid inflate input. Thanks to Tavis Ormandy and Markus Oberhumer for
discovering the vulnerability and providing test cases.
- Add ia64 support to configure for HP-UX [Smith]
- Add error return to gzread() for format or i/o error [Levin]
- Use malloc.h for OS/2 [Necasek]
Changes in 1.2.2.3 (27 May 2005)
- Replace 1U constants in inflate.c and inftrees.c for 64-bit compile
- Typecast fread() return values in gzio.c [Vollant]
- Remove trailing space in minigzip.c outmode (VC++ can't deal with it)
- Fix crc check bug in gzread() after gzungetc() [Heiner]
- Add the deflateTune() function to adjust internal compression parameters
- Add a fast gzip decompressor, gun.c, to examples (use of inflateBack)
- Remove an incorrect assertion in examples/zpipe.c
- Add C++ wrapper in infback9.h [Donais]
- Fix bug in inflateCopy() when decoding fixed codes
- Note in zlib.h how much deflateSetDictionary() actually uses
- Remove USE_DICT_HEAD in deflate.c (would mess up inflate if used)
- Add _WIN32_WCE to define WIN32 in zconf.in.h [Spencer]
- Don't include stderr.h or errno.h for _WIN32_WCE in zutil.h [Spencer]
- Add gzdirect() function to indicate transparent reads
- Update contrib/minizip [Vollant]
- Fix compilation of deflate.c when both ASMV and FASTEST [Oberhumer]
- Add casts in crc32.c to avoid warnings [Oberhumer]
- Add contrib/masmx64 [Vollant]
- Update contrib/asm586, asm686, masmx86, testzlib, vstudio [Vollant]
Changes in 1.2.2.2 (30 December 2004)
- Replace structure assignments in deflate.c and inflate.c with zmemcpy to
avoid implicit memcpy calls (portability for no-library compilation)
- Increase sprintf() buffer size in gzdopen() to allow for large numbers
- Add INFLATE_STRICT to check distances against zlib header
- Improve WinCE errno handling and comments [Chang]
- Remove comment about no gzip header processing in FAQ
- Add Z_FIXED strategy option to deflateInit2() to force fixed trees
- Add updated make_vms.com [Coghlan], update README
- Create a new "examples" directory, move gzappend.c there, add zpipe.c,
fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html.
- Add FAQ entry and comments in deflate.c on uninitialized memory access
- Add Solaris 9 make options in configure [Gilbert]
- Allow strerror() usage in gzio.c for STDC
- Fix DecompressBuf in contrib/delphi/ZLib.pas [ManChesTer]
- Update contrib/masmx86/inffas32.asm and gvmat32.asm [Vollant]
- Use z_off_t for adler32_combine() and crc32_combine() lengths
- Make adler32() much faster for small len
- Use OS_CODE in deflate() default gzip header
Changes in 1.2.2.1 (31 October 2004)
- Allow inflateSetDictionary() call for raw inflate
- Fix inflate header crc check bug for file names and comments
- Add deflateSetHeader() and gz_header structure for custom gzip headers
- Add inflateGetheader() to retrieve gzip headers
- Add crc32_combine() and adler32_combine() functions
- Add alloc_func, free_func, in_func, out_func to Z_PREFIX list
- Use zstreamp consistently in zlib.h (inflate_back functions)
- Remove GUNZIP condition from definition of inflate_mode in inflate.h
and in contrib/inflate86/inffast.S [Truta, Anderson]
- Add support for AMD64 in contrib/inflate86/inffas86.c [Anderson]
- Update projects/README.projects and projects/visualc6 [Truta]
- Update win32/DLL_FAQ.txt [Truta]
- Avoid warning under NO_GZCOMPRESS in gzio.c; fix typo [Truta]
- Deprecate Z_ASCII; use Z_TEXT instead [Truta]
- Use a new algorithm for setting strm->data_type in trees.c [Truta]
- Do not define an exit() prototype in zutil.c unless DEBUG defined
- Remove prototype of exit() from zutil.c, example.c, minigzip.c [Truta]
- Add comment in zlib.h for Z_NO_FLUSH parameter to deflate()
- Fix Darwin build version identification [Peterson]
Changes in 1.2.2 (3 October 2004)
- Update zlib.h comments on gzip in-memory processing
- Set adler to 1 in inflateReset() to support Java test suite [Walles]
- Add contrib/dotzlib [Ravn]
- Update win32/DLL_FAQ.txt [Truta]
- Update contrib/minizip [Vollant]
- Move contrib/visual-basic.txt to old/ [Truta]
- Fix assembler builds in projects/visualc6/ [Truta]
Changes in 1.2.1.2 (9 September 2004)
- Update INDEX file
- Fix trees.c to update strm->data_type (no one ever noticed!)
- Fix bug in error case in inflate.c, infback.c, and infback9.c [Brown]
- Add "volatile" to crc table flag declaration (for DYNAMIC_CRC_TABLE)
- Add limited multitasking protection to DYNAMIC_CRC_TABLE
- Add NO_vsnprintf for VMS in zutil.h [Mozilla]
- Don't declare strerror() under VMS [Mozilla]
- Add comment to DYNAMIC_CRC_TABLE to use get_crc_table() to initialize
- Update contrib/ada [Anisimkov]
- Update contrib/minizip [Vollant]
- Fix configure to not hardcode directories for Darwin [Peterson]
- Fix gzio.c to not return error on empty files [Brown]
- Fix indentation; update version in contrib/delphi/ZLib.pas and
contrib/pascal/zlibpas.pas [Truta]
- Update mkasm.bat in contrib/masmx86 [Truta]
- Update contrib/untgz [Truta]
- Add projects/README.projects [Truta]
- Add project for MS Visual C++ 6.0 in projects/visualc6 [Cadieux, Truta]
- Update win32/DLL_FAQ.txt [Truta]
- Update list of Z_PREFIX symbols in zconf.h [Randers-Pehrson, Truta]
- Remove an unnecessary assignment to curr in inftrees.c [Truta]
- Add OS/2 to exe builds in configure [Poltorak]
- Remove err dummy parameter in zlib.h [Kientzle]
Changes in 1.2.1.1 (9 January 2004)
- Update email address in README
- Several FAQ updates
- Fix a big fat bug in inftrees.c that prevented decoding valid
dynamic blocks with only literals and no distance codes --
Thanks to "Hot Emu" for the bug report and sample file
- Add a note to puff.c on no distance codes case.
Changes in 1.2.1 (17 November 2003)
- Remove a tab in contrib/gzappend/gzappend.c
- Update some interfaces in contrib for new zlib functions
- Update zlib version number in some contrib entries
- Add Windows CE definition for ptrdiff_t in zutil.h [Mai, Truta]
- Support shared libraries on Hurd and KFreeBSD [Brown]
- Fix error in NO_DIVIDE option of adler32.c
Changes in 1.2.0.8 (4 November 2003)
- Update version in contrib/delphi/ZLib.pas and contrib/pascal/zlibpas.pas
- Add experimental NO_DIVIDE #define in adler32.c
- Possibly faster on some processors (let me know if it is)
- Correct Z_BLOCK to not return on first inflate call if no wrap
- Fix strm->data_type on inflate() return to correctly indicate EOB
- Add deflatePrime() function for appending in the middle of a byte
- Add contrib/gzappend for an example of appending to a stream
- Update win32/DLL_FAQ.txt [Truta]
- Delete Turbo C comment in README [Truta]
- Improve some indentation in zconf.h [Truta]
- Fix infinite loop on bad input in configure script [Church]
- Fix gzeof() for concatenated gzip files [Johnson]
- Add example to contrib/visual-basic.txt [Michael B.]
- Add -p to mkdir's in Makefile.in [vda]
- Fix configure to properly detect presence or lack of printf functions
- Add AS400 support [Monnerat]
- Add a little Cygwin support [Wilson]
Changes in 1.2.0.7 (21 September 2003)
- Correct some debug formats in contrib/infback9
- Cast a type in a debug statement in trees.c
- Change search and replace delimiter in configure from % to # [Beebe]
- Update contrib/untgz to 0.2 with various fixes [Truta]
- Add build support for Amiga [Nikl]
- Remove some directories in old that have been updated to 1.2
- Add dylib building for Mac OS X in configure and Makefile.in
- Remove old distribution stuff from Makefile
- Update README to point to DLL_FAQ.txt, and add comment on Mac OS X
- Update links in README
Changes in 1.2.0.6 (13 September 2003)
- Minor FAQ updates
- Update contrib/minizip to 1.00 [Vollant]
- Remove test of gz functions in example.c when GZ_COMPRESS defined [Truta]
- Update POSTINC comment for 68060 [Nikl]
- Add contrib/infback9 with deflate64 decoding (unsupported)
- For MVS define NO_vsnprintf and undefine FAR [van Burik]
- Add pragma for fdopen on MVS [van Burik]
Changes in 1.2.0.5 (8 September 2003)
- Add OF to inflateBackEnd() declaration in zlib.h
- Remember start when using gzdopen in the middle of a file
- Use internal off_t counters in gz* functions to properly handle seeks
- Perform more rigorous check for distance-too-far in inffast.c
- Add Z_BLOCK flush option to return from inflate at block boundary
- Set strm->data_type on return from inflate
- Indicate bits unused, if at block boundary, and if in last block
- Replace size_t with ptrdiff_t in crc32.c, and check for correct size
- Add condition so old NO_DEFLATE define still works for compatibility
- FAQ update regarding the Windows DLL [Truta]
- INDEX update: add qnx entry, remove aix entry [Truta]
- Install zlib.3 into mandir [Wilson]
- Move contrib/zlib_dll_FAQ.txt to win32/DLL_FAQ.txt; update [Truta]
- Adapt the zlib interface to the new DLL convention guidelines [Truta]
- Introduce ZLIB_WINAPI macro to allow the export of functions using
the WINAPI calling convention, for Visual Basic [Vollant, Truta]
- Update msdos and win32 scripts and makefiles [Truta]
- Export symbols by name, not by ordinal, in win32/zlib.def [Truta]
- Add contrib/ada [Anisimkov]
- Move asm files from contrib/vstudio/vc70_32 to contrib/asm386 [Truta]
- Rename contrib/asm386 to contrib/masmx86 [Truta, Vollant]
- Add contrib/masm686 [Truta]
- Fix offsets in contrib/inflate86 and contrib/masmx86/inffas32.asm
[Truta, Vollant]
- Update contrib/delphi; rename to contrib/pascal; add example [Truta]
- Remove contrib/delphi2; add a new contrib/delphi [Truta]
- Avoid inclusion of the nonstandard <memory.h> in contrib/iostream,
and fix some method prototypes [Truta]
- Fix the ZCR_SEED2 constant to avoid warnings in contrib/minizip
[Truta]
- Avoid the use of backslash (\) in contrib/minizip [Vollant]
- Fix file time handling in contrib/untgz; update makefiles [Truta]
- Update contrib/vstudio/vc70_32 to comply with the new DLL guidelines
[Vollant]
- Remove contrib/vstudio/vc15_16 [Vollant]
- Rename contrib/vstudio/vc70_32 to contrib/vstudio/vc7 [Truta]
- Update README.contrib [Truta]
- Invert the assignment order of match_head and s->prev[...] in
INSERT_STRING [Truta]
- Compare TOO_FAR with 32767 instead of 32768, to avoid 16-bit warnings
[Truta]
- Compare function pointers with 0, not with NULL or Z_NULL [Truta]
- Fix prototype of syncsearch in inflate.c [Truta]
- Introduce ASMINF macro to be enabled when using an ASM implementation
of inflate_fast [Truta]
- Change NO_DEFLATE to NO_GZCOMPRESS [Truta]
- Modify test_gzio in example.c to take a single file name as a
parameter [Truta]
- Exit the example.c program if gzopen fails [Truta]
- Add type casts around strlen in example.c [Truta]
- Remove casting to sizeof in minigzip.c; give a proper type
to the variable compared with SUFFIX_LEN [Truta]
- Update definitions of STDC and STDC99 in zconf.h [Truta]
- Synchronize zconf.h with the new Windows DLL interface [Truta]
- Use SYS16BIT instead of __32BIT__ to distinguish between
16- and 32-bit platforms [Truta]
- Use far memory allocators in small 16-bit memory models for
Turbo C [Truta]
- Add info about the use of ASMV, ASMINF and ZLIB_WINAPI in
zlibCompileFlags [Truta]
- Cygwin has vsnprintf [Wilson]
- In Windows16, OS_CODE is 0, as in MSDOS [Truta]
- In Cygwin, OS_CODE is 3 (Unix), not 11 (Windows32) [Wilson]
Changes in 1.2.0.4 (10 August 2003)
- Minor FAQ updates
- Be more strict when checking inflateInit2's windowBits parameter
- Change NO_GUNZIP compile option to NO_GZIP to cover deflate as well
- Add gzip wrapper option to deflateInit2 using windowBits
- Add updated QNX rule in configure and qnx directory [Bonnefoy]
- Make inflate distance-too-far checks more rigorous
- Clean up FAR usage in inflate
- Add casting to sizeof() in gzio.c and minigzip.c
Changes in 1.2.0.3 (19 July 2003)
- Fix silly error in gzungetc() implementation [Vollant]
- Update contrib/minizip and contrib/vstudio [Vollant]
- Fix printf format in example.c
- Correct cdecl support in zconf.in.h [Anisimkov]
- Minor FAQ updates
Changes in 1.2.0.2 (13 July 2003)
- Add ZLIB_VERNUM in zlib.h for numerical preprocessor comparisons
- Attempt to avoid warnings in crc32.c for pointer-int conversion
- Add AIX to configure, remove aix directory [Bakker]
- Add some casts to minigzip.c
- Improve checking after insecure sprintf() or vsprintf() calls
- Remove #elif's from crc32.c
- Change leave label to inf_leave in inflate.c and infback.c to avoid
library conflicts
- Remove inflate gzip decoding by default--only enable gzip decoding by
special request for stricter backward compatibility
- Add zlibCompileFlags() function to return compilation information
- More typecasting in deflate.c to avoid warnings
- Remove leading underscore from _Capital #defines [Truta]
- Fix configure to link shared library when testing
- Add some Windows CE target adjustments [Mai]
- Remove #define ZLIB_DLL in zconf.h [Vollant]
- Add zlib.3 [Rodgers]
- Update RFC URL in deflate.c and algorithm.txt [Mai]
- Add zlib_dll_FAQ.txt to contrib [Truta]
- Add UL to some constants [Truta]
- Update minizip and vstudio [Vollant]
- Remove vestigial NEED_DUMMY_RETURN from zconf.in.h
- Expand use of NO_DUMMY_DECL to avoid all dummy structures
- Added iostream3 to contrib [Schwardt]
- Replace rewind() with fseek() for WinCE [Truta]
- Improve setting of zlib format compression level flags
- Report 0 for huffman and rle strategies and for level == 0 or 1
- Report 2 only for level == 6
- Only deal with 64K limit when necessary at compile time [Truta]
- Allow TOO_FAR check to be turned off at compile time [Truta]
- Add gzclearerr() function [Souza]
- Add gzungetc() function
Changes in 1.2.0.1 (17 March 2003)
- Add Z_RLE strategy for run-length encoding [Truta]
- When Z_RLE requested, restrict matches to distance one
- Update zlib.h, minigzip.c, gzopen(), gzdopen() for Z_RLE
- Correct FASTEST compilation to allow level == 0
- Clean up what gets compiled for FASTEST
- Incorporate changes to zconf.in.h [Vollant]
- Refine detection of Turbo C need for dummy returns
- Refine ZLIB_DLL compilation
- Include additional header file on VMS for off_t typedef
- Try to use _vsnprintf where it supplants vsprintf [Vollant]
- Add some casts in inffast.c
- Enchance comments in zlib.h on what happens if gzprintf() tries to
write more than 4095 bytes before compression
- Remove unused state from inflateBackEnd()
- Remove exit(0) from minigzip.c, example.c
- Get rid of all those darn tabs
- Add "check" target to Makefile.in that does the same thing as "test"
- Add "mostlyclean" and "maintainer-clean" targets to Makefile.in
- Update contrib/inflate86 [Anderson]
- Update contrib/testzlib, contrib/vstudio, contrib/minizip [Vollant]
- Add msdos and win32 directories with makefiles [Truta]
- More additions and improvements to the FAQ
Changes in 1.2.0 (9 March 2003)
- New and improved inflate code
- About 20% faster
- Does not allocate 32K window unless and until needed
- Automatically detects and decompresses gzip streams
- Raw inflate no longer needs an extra dummy byte at end
- Added inflateBack functions using a callback interface--even faster
than inflate, useful for file utilities (gzip, zip)
- Added inflateCopy() function to record state for random access on
externally generated deflate streams (e.g. in gzip files)
- More readable code (I hope)
- New and improved crc32()
- About 50% faster, thanks to suggestions from Rodney Brown
- Add deflateBound() and compressBound() functions
- Fix memory leak in deflateInit2()
- Permit setting dictionary for raw deflate (for parallel deflate)
- Fix const declaration for gzwrite()
- Check for some malloc() failures in gzio.c
- Fix bug in gzopen() on single-byte file 0x1f
- Fix bug in gzread() on concatenated file with 0x1f at end of buffer
and next buffer doesn't start with 0x8b
- Fix uncompress() to return Z_DATA_ERROR on truncated input
- Free memory at end of example.c
- Remove MAX #define in trees.c (conflicted with some libraries)
- Fix static const's in deflate.c, gzio.c, and zutil.[ch]
- Declare malloc() and free() in gzio.c if STDC not defined
- Use malloc() instead of calloc() in zutil.c if int big enough
- Define STDC for AIX
- Add aix/ with approach for compiling shared library on AIX
- Add HP-UX support for shared libraries in configure
- Add OpenUNIX support for shared libraries in configure
- Use $cc instead of gcc to build shared library
- Make prefix directory if needed when installing
- Correct Macintosh avoidance of typedef Byte in zconf.h
- Correct Turbo C memory allocation when under Linux
- Use libz.a instead of -lz in Makefile (assure use of compiled library)
- Update configure to check for snprintf or vsnprintf functions and their
return value, warn during make if using an insecure function
- Fix configure problem with compile-time knowledge of HAVE_UNISTD_H that
is lost when library is used--resolution is to build new zconf.h
- Documentation improvements (in zlib.h):
- Document raw deflate and inflate
- Update RFCs URL
- Point out that zlib and gzip formats are different
- Note that Z_BUF_ERROR is not fatal
- Document string limit for gzprintf() and possible buffer overflow
- Note requirement on avail_out when flushing
- Note permitted values of flush parameter of inflate()
- Add some FAQs (and even answers) to the FAQ
- Add contrib/inflate86/ for x86 faster inflate
- Add contrib/blast/ for PKWare Data Compression Library decompression
- Add contrib/puff/ simple inflate for deflate format description
Changes in 1.1.4 (11 March 2002)
- ZFREE was repeated on same allocation on some error conditions.
This creates a security problem described in
http://www.zlib.org/advisory-2002-03-11.txt
- Returned incorrect error (Z_MEM_ERROR) on some invalid data
- Avoid accesses before window for invalid distances with inflate window
less than 32K.
- force windowBits > 8 to avoid a bug in the encoder for a window size
of 256 bytes. (A complete fix will be available in 1.1.5).
Changes in 1.1.3 (9 July 1998)
- fix "an inflate input buffer bug that shows up on rare but persistent
occasions" (Mark)
- fix gzread and gztell for concatenated .gz files (Didier Le Botlan)
- fix gzseek(..., SEEK_SET) in write mode
- fix crc check after a gzeek (Frank Faubert)
- fix miniunzip when the last entry in a zip file is itself a zip file
(J Lillge)
- add contrib/asm586 and contrib/asm686 (Brian Raiter)
See http://www.muppetlabs.com/~breadbox/software/assembly.html
- add support for Delphi 3 in contrib/delphi (Bob Dellaca)
- add support for C++Builder 3 and Delphi 3 in contrib/delphi2 (Davide Moretti)
- do not exit prematurely in untgz if 0 at start of block (Magnus Holmgren)
- use macro EXTERN instead of extern to support DLL for BeOS (Sander Stoks)
- added a FAQ file
- Support gzdopen on Mac with Metrowerks (Jason Linhart)
- Do not redefine Byte on Mac (Brad Pettit & Jason Linhart)
- define SEEK_END too if SEEK_SET is not defined (Albert Chin-A-Young)
- avoid some warnings with Borland C (Tom Tanner)
- fix a problem in contrib/minizip/zip.c for 16-bit MSDOS (Gilles Vollant)
- emulate utime() for WIN32 in contrib/untgz (Gilles Vollant)
- allow several arguments to configure (Tim Mooney, Frodo Looijaard)
- use libdir and includedir in Makefile.in (Tim Mooney)
- support shared libraries on OSF1 V4 (Tim Mooney)
- remove so_locations in "make clean" (Tim Mooney)
- fix maketree.c compilation error (Glenn, Mark)
- Python interface to zlib now in Python 1.5 (Jeremy Hylton)
- new Makefile.riscos (Rich Walker)
- initialize static descriptors in trees.c for embedded targets (Nick Smith)
- use "foo-gz" in example.c for RISCOS and VMS (Nick Smith)
- add the OS/2 files in Makefile.in too (Andrew Zabolotny)
- fix fdopen and halloc macros for Microsoft C 6.0 (Tom Lane)
- fix maketree.c to allow clean compilation of inffixed.h (Mark)
- fix parameter check in deflateCopy (Gunther Nikl)
- cleanup trees.c, use compressed_len only in debug mode (Christian Spieler)
- Many portability patches by Christian Spieler:
. zutil.c, zutil.h: added "const" for zmem*
. Make_vms.com: fixed some typos
. Make_vms.com: msdos/Makefile.*: removed zutil.h from some dependency lists
. msdos/Makefile.msc: remove "default rtl link library" info from obj files
. msdos/Makefile.*: use model-dependent name for the built zlib library
. msdos/Makefile.emx, nt/Makefile.emx, nt/Makefile.gcc:
new makefiles, for emx (DOS/OS2), emx&rsxnt and mingw32 (Windows 9x / NT)
- use define instead of typedef for Bytef also for MSC small/medium (Tom Lane)
- replace __far with _far for better portability (Christian Spieler, Tom Lane)
- fix test for errno.h in configure (Tim Newsham)
Changes in 1.1.2 (19 March 98)
- added contrib/minzip, mini zip and unzip based on zlib (Gilles Vollant)
See http://www.winimage.com/zLibDll/unzip.html
- preinitialize the inflate tables for fixed codes, to make the code
completely thread safe (Mark)
- some simplifications and slight speed-up to the inflate code (Mark)
- fix gzeof on non-compressed files (Allan Schrum)
- add -std1 option in configure for OSF1 to fix gzprintf (Martin Mokrejs)
- use default value of 4K for Z_BUFSIZE for 16-bit MSDOS (Tim Wegner + Glenn)
- added os2/Makefile.def and os2/zlib.def (Andrew Zabolotny)
- add shared lib support for UNIX_SV4.2MP (MATSUURA Takanori)
- do not wrap extern "C" around system includes (Tom Lane)
- mention zlib binding for TCL in README (Andreas Kupries)
- added amiga/Makefile.pup for Amiga powerUP SAS/C PPC (Andreas Kleinert)
- allow "make install prefix=..." even after configure (Glenn Randers-Pehrson)
- allow "configure --prefix $HOME" (Tim Mooney)
- remove warnings in example.c and gzio.c (Glenn Randers-Pehrson)
- move Makefile.sas to amiga/Makefile.sas
Changes in 1.1.1 (27 Feb 98)
- fix macros _tr_tally_* in deflate.h for debug mode (Glenn Randers-Pehrson)
- remove block truncation heuristic which had very marginal effect for zlib
(smaller lit_bufsize than in gzip 1.2.4) and degraded a little the
compression ratio on some files. This also allows inlining _tr_tally for
matches in deflate_slow.
- added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier)
Changes in 1.1.0 (24 Feb 98)
- do not return STREAM_END prematurely in inflate (John Bowler)
- revert to the zlib 1.0.8 inflate to avoid the gcc 2.8.0 bug (Jeremy Buhler)
- compile with -DFASTEST to get compression code optimized for speed only
- in minigzip, try mmap'ing the input file first (Miguel Albrecht)
- increase size of I/O buffers in minigzip.c and gzio.c (not a big gain
on Sun but significant on HP)
- add a pointer to experimental unzip library in README (Gilles Vollant)
- initialize variable gcc in configure (Chris Herborth)
Changes in 1.0.9 (17 Feb 1998)
- added gzputs and gzgets functions
- do not clear eof flag in gzseek (Mark Diekhans)
- fix gzseek for files in transparent mode (Mark Diekhans)
- do not assume that vsprintf returns the number of bytes written (Jens Krinke)
- replace EXPORT with ZEXPORT to avoid conflict with other programs
- added compress2 in zconf.h, zlib.def, zlib.dnt
- new asm code from Gilles Vollant in contrib/asm386
- simplify the inflate code (Mark):
. Replace ZALLOC's in huft_build() with single ZALLOC in inflate_blocks_new()
. ZALLOC the length list in inflate_trees_fixed() instead of using stack
. ZALLOC the value area for huft_build() instead of using stack
. Simplify Z_FINISH check in inflate()
- Avoid gcc 2.8.0 comparison bug a little differently than zlib 1.0.8
- in inftrees.c, avoid cc -O bug on HP (Farshid Elahi)
- in zconf.h move the ZLIB_DLL stuff earlier to avoid problems with
the declaration of FAR (Gilles VOllant)
- install libz.so* with mode 755 (executable) instead of 644 (Marc Lehmann)
- read_buf buf parameter of type Bytef* instead of charf*
- zmemcpy parameters are of type Bytef*, not charf* (Joseph Strout)
- do not redeclare unlink in minigzip.c for WIN32 (John Bowler)
- fix check for presence of directories in "make install" (Ian Willis)
Changes in 1.0.8 (27 Jan 1998)
- fixed offsets in contrib/asm386/gvmat32.asm (Gilles Vollant)
- fix gzgetc and gzputc for big endian systems (Markus Oberhumer)
- added compress2() to allow setting the compression level
- include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong)
- use constant arrays for the static trees in trees.c instead of computing
them at run time (thanks to Ken Raeburn for this suggestion). To create
trees.h, compile with GEN_TREES_H and run "make test".
- check return code of example in "make test" and display result
- pass minigzip command line options to file_compress
- simplifying code of inflateSync to avoid gcc 2.8 bug
- support CC="gcc -Wall" in configure -s (QingLong)
- avoid a flush caused by ftell in gzopen for write mode (Ken Raeburn)
- fix test for shared library support to avoid compiler warnings
- zlib.lib -> zlib.dll in msdos/zlib.rc (Gilles Vollant)
- check for TARGET_OS_MAC in addition to MACOS (Brad Pettit)
- do not use fdopen for Metrowerks on Mac (Brad Pettit))
- add checks for gzputc and gzputc in example.c
- avoid warnings in gzio.c and deflate.c (Andreas Kleinert)
- use const for the CRC table (Ken Raeburn)
- fixed "make uninstall" for shared libraries
- use Tracev instead of Trace in infblock.c
- in example.c use correct compressed length for test_sync
- suppress +vnocompatwarnings in configure for HPUX (not always supported)
Changes in 1.0.7 (20 Jan 1998)
- fix gzseek which was broken in write mode
- return error for gzseek to negative absolute position
- fix configure for Linux (Chun-Chung Chen)
- increase stack space for MSC (Tim Wegner)
- get_crc_table and inflateSyncPoint are EXPORTed (Gilles Vollant)
- define EXPORTVA for gzprintf (Gilles Vollant)
- added man page zlib.3 (Rick Rodgers)
- for contrib/untgz, fix makedir() and improve Makefile
- check gzseek in write mode in example.c
- allocate extra buffer for seeks only if gzseek is actually called
- avoid signed/unsigned comparisons (Tim Wegner, Gilles Vollant)
- add inflateSyncPoint in zconf.h
- fix list of exported functions in nt/zlib.dnt and mdsos/zlib.def
Changes in 1.0.6 (19 Jan 1998)
- add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and
gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code)
- Fix a deflate bug occurring only with compression level 0 (thanks to
Andy Buckler for finding this one).
- In minigzip, pass transparently also the first byte for .Z files.
- return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress()
- check Z_FINISH in inflate (thanks to Marc Schluper)
- Implement deflateCopy (thanks to Adam Costello)
- make static libraries by default in configure, add --shared option.
- move MSDOS or Windows specific files to directory msdos
- suppress the notion of partial flush to simplify the interface
(but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4)
- suppress history buffer provided by application to simplify the interface
(this feature was not implemented anyway in 1.0.4)
- next_in and avail_in must be initialized before calling inflateInit or
inflateInit2
- add EXPORT in all exported functions (for Windows DLL)
- added Makefile.nt (thanks to Stephen Williams)
- added the unsupported "contrib" directory:
contrib/asm386/ by Gilles Vollant <info@winimage.com>
386 asm code replacing longest_match().
contrib/iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
A C++ I/O streams interface to the zlib gz* functions
contrib/iostream2/ by Tyge L<>vset <Tyge.Lovset@cmr.no>
Another C++ I/O streams interface
contrib/untgz/ by "Pedro A. Aranda Guti\irrez" <paag@tid.es>
A very simple tar.gz file extractor using zlib
contrib/visual-basic.txt by Carlos Rios <c_rios@sonda.cl>
How to use compress(), uncompress() and the gz* functions from VB.
- pass params -f (filtered data), -h (huffman only), -1 to -9 (compression
level) in minigzip (thanks to Tom Lane)
- use const for rommable constants in deflate
- added test for gzseek and gztell in example.c
- add undocumented function inflateSyncPoint() (hack for Paul Mackerras)
- add undocumented function zError to convert error code to string
(for Tim Smithers)
- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code.
- Use default memcpy for Symantec MSDOS compiler.
- Add EXPORT keyword for check_func (needed for Windows DLL)
- add current directory to LD_LIBRARY_PATH for "make test"
- create also a link for libz.so.1
- added support for FUJITSU UXP/DS (thanks to Toshiaki Nomura)
- use $(SHAREDLIB) instead of libz.so in Makefile.in (for HPUX)
- added -soname for Linux in configure (Chun-Chung Chen,
- assign numbers to the exported functions in zlib.def (for Windows DLL)
- add advice in zlib.h for best usage of deflateSetDictionary
- work around compiler bug on Atari (cast Z_NULL in call of s->checkfn)
- allow compilation with ANSI keywords only enabled for TurboC in large model
- avoid "versionString"[0] (Borland bug)
- add NEED_DUMMY_RETURN for Borland
- use variable z_verbose for tracing in debug mode (L. Peter Deutsch).
- allow compilation with CC
- defined STDC for OS/2 (David Charlap)
- limit external names to 8 chars for MVS (Thomas Lund)
- in minigzip.c, use static buffers only for 16-bit systems
- fix suffix check for "minigzip -d foo.gz"
- do not return an error for the 2nd of two consecutive gzflush() (Felix Lee)
- use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau)
- added makelcc.bat for lcc-win32 (Tom St Denis)
- in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe)
- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion.
- check for unistd.h in configure (for off_t)
- remove useless check parameter in inflate_blocks_free
- avoid useless assignment of s->check to itself in inflate_blocks_new
- do not flush twice in gzclose (thanks to Ken Raeburn)
- rename FOPEN as F_OPEN to avoid clash with /usr/include/sys/file.h
- use NO_ERRNO_H instead of enumeration of operating systems with errno.h
- work around buggy fclose on pipes for HP/UX
- support zlib DLL with BORLAND C++ 5.0 (thanks to Glenn Randers-Pehrson)
- fix configure if CC is already equal to gcc
Changes in 1.0.5 (3 Jan 98)
- Fix inflate to terminate gracefully when fed corrupted or invalid data
- Use const for rommable constants in inflate
- Eliminate memory leaks on error conditions in inflate
- Removed some vestigial code in inflate
- Update web address in README
Changes in 1.0.4 (24 Jul 96)
- In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF
bit, so the decompressor could decompress all the correct data but went
on to attempt decompressing extra garbage data. This affected minigzip too.
- zlibVersion and gzerror return const char* (needed for DLL)
- port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno)
- use z_error only for DEBUG (avoid problem with DLLs)
Changes in 1.0.3 (2 Jul 96)
- use z_streamp instead of z_stream *, which is now a far pointer in MSDOS
small and medium models; this makes the library incompatible with previous
versions for these models. (No effect in large model or on other systems.)
- return OK instead of BUF_ERROR if previous deflate call returned with
avail_out as zero but there is nothing to do
- added memcmp for non STDC compilers
- define NO_DUMMY_DECL for more Mac compilers (.h files merged incorrectly)
- define __32BIT__ if __386__ or i386 is defined (pb. with Watcom and SCO)
- better check for 16-bit mode MSC (avoids problem with Symantec)
Changes in 1.0.2 (23 May 96)
- added Windows DLL support
- added a function zlibVersion (for the DLL support)
- fixed declarations using Bytef in infutil.c (pb with MSDOS medium model)
- Bytef is define's instead of typedef'd only for Borland C
- avoid reading uninitialized memory in example.c
- mention in README that the zlib format is now RFC1950
- updated Makefile.dj2
- added algorithm.doc
Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion]
- fix array overlay in deflate.c which sometimes caused bad compressed data
- fix inflate bug with empty stored block
- fix MSDOS medium model which was broken in 0.99
- fix deflateParams() which could generated bad compressed data.
- Bytef is define'd instead of typedef'ed (work around Borland bug)
- added an INDEX file
- new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32),
Watcom (Makefile.wat), Amiga SAS/C (Makefile.sas)
- speed up adler32 for modern machines without auto-increment
- added -ansi for IRIX in configure
- static_init_done in trees.c is an int
- define unlink as delete for VMS
- fix configure for QNX
- add configure branch for SCO and HPUX
- avoid many warnings (unused variables, dead assignments, etc...)
- no fdopen for BeOS
- fix the Watcom fix for 32 bit mode (define FAR as empty)
- removed redefinition of Byte for MKWERKS
- work around an MWKERKS bug (incorrect merge of all .h files)
Changes in 0.99 (27 Jan 96)
- allow preset dictionary shared between compressor and decompressor
- allow compression level 0 (no compression)
- add deflateParams in zlib.h: allow dynamic change of compression level
and compression strategy.
- test large buffers and deflateParams in example.c
- add optional "configure" to build zlib as a shared library
- suppress Makefile.qnx, use configure instead
- fixed deflate for 64-bit systems (detected on Cray)
- fixed inflate_blocks for 64-bit systems (detected on Alpha)
- declare Z_DEFLATED in zlib.h (possible parameter for deflateInit2)
- always return Z_BUF_ERROR when deflate() has nothing to do
- deflateInit and inflateInit are now macros to allow version checking
- prefix all global functions and types with z_ with -DZ_PREFIX
- make falloc completely reentrant (inftrees.c)
- fixed very unlikely race condition in ct_static_init
- free in reverse order of allocation to help memory manager
- use zlib-1.0/* instead of zlib/* inside the tar.gz
- make zlib warning-free with "gcc -O3 -Wall -Wwrite-strings -Wpointer-arith
-Wconversion -Wstrict-prototypes -Wmissing-prototypes"
- allow gzread on concatenated .gz files
- deflateEnd now returns Z_DATA_ERROR if it was premature
- deflate is finally (?) fully deterministic (no matches beyond end of input)
- Document Z_SYNC_FLUSH
- add uninstall in Makefile
- Check for __cpluplus in zlib.h
- Better test in ct_align for partial flush
- avoid harmless warnings for Borland C++
- initialize hash_head in deflate.c
- avoid warning on fdopen (gzio.c) for HP cc -Aa
- include stdlib.h for STDC compilers
- include errno.h for Cray
- ignore error if ranlib doesn't exist
- call ranlib twice for NeXTSTEP
- use exec_prefix instead of prefix for libz.a
- renamed ct_* as _tr_* to avoid conflict with applications
- clear z->msg in inflateInit2 before any error return
- initialize opaque in example.c, gzio.c, deflate.c and inflate.c
- fixed typo in zconf.h (_GNUC__ => __GNUC__)
- check for WIN32 in zconf.h and zutil.c (avoid farmalloc in 32-bit mode)
- fix typo in Make_vms.com (f$trnlnm -> f$getsyi)
- in fcalloc, normalize pointer if size > 65520 bytes
- don't use special fcalloc for 32 bit Borland C++
- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc...
- use Z_BINARY instead of BINARY
- document that gzclose after gzdopen will close the file
- allow "a" as mode in gzopen.
- fix error checking in gzread
- allow skipping .gz extra-field on pipes
- added reference to Perl interface in README
- put the crc table in FAR data (I dislike more and more the medium model :)
- added get_crc_table
- added a dimension to all arrays (Borland C can't count).
- workaround Borland C bug in declaration of inflate_codes_new & inflate_fast
- guard against multiple inclusion of *.h (for precompiled header on Mac)
- Watcom C pretends to be Microsoft C small model even in 32 bit mode.
- don't use unsized arrays to avoid silly warnings by Visual C++:
warning C4746: 'inflate_mask' : unsized array treated as '__far'
(what's wrong with far data in far model?).
- define enum out of inflate_blocks_state to allow compilation with C++
Changes in 0.95 (16 Aug 95)
- fix MSDOS small and medium model (now easier to adapt to any compiler)
- inlined send_bits
- fix the final (:-) bug for deflate with flush (output was correct but
not completely flushed in rare occasions).
- default window size is same for compression and decompression
(it's now sufficient to set MAX_WBITS in zconf.h).
- voidp -> voidpf and voidnp -> voidp (for consistency with other
typedefs and because voidnp was not near in large model).
Changes in 0.94 (13 Aug 95)
- support MSDOS medium model
- fix deflate with flush (could sometimes generate bad output)
- fix deflateReset (zlib header was incorrectly suppressed)
- added support for VMS
- allow a compression level in gzopen()
- gzflush now calls fflush
- For deflate with flush, flush even if no more input is provided.
- rename libgz.a as libz.a
- avoid complex expression in infcodes.c triggering Turbo C bug
- work around a problem with gcc on Alpha (in INSERT_STRING)
- don't use inline functions (problem with some gcc versions)
- allow renaming of Byte, uInt, etc... with #define.
- avoid warning about (unused) pointer before start of array in deflate.c
- avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c
- avoid reserved word 'new' in trees.c
Changes in 0.93 (25 June 95)
- temporarily disable inline functions
- make deflate deterministic
- give enough lookahead for PARTIAL_FLUSH
- Set binary mode for stdin/stdout in minigzip.c for OS/2
- don't even use signed char in inflate (not portable enough)
- fix inflate memory leak for segmented architectures
Changes in 0.92 (3 May 95)
- don't assume that char is signed (problem on SGI)
- Clear bit buffer when starting a stored block
- no memcpy on Pyramid
- suppressed inftest.c
- optimized fill_window, put longest_match inline for gcc
- optimized inflate on stored blocks.
- untabify all sources to simplify patches
Changes in 0.91 (2 May 95)
- Default MEM_LEVEL is 8 (not 9 for Unix) as documented in zlib.h
- Document the memory requirements in zconf.h
- added "make install"
- fix sync search logic in inflateSync
- deflate(Z_FULL_FLUSH) now works even if output buffer too short
- after inflateSync, don't scare people with just "lo world"
- added support for DJGPP
Changes in 0.9 (1 May 95)
- don't assume that zalloc clears the allocated memory (the TurboC bug
was Mark's bug after all :)
- let again gzread copy uncompressed data unchanged (was working in 0.71)
- deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented
- added a test of inflateSync in example.c
- moved MAX_WBITS to zconf.h because users might want to change that.
- document explicitly that zalloc(64K) on MSDOS must return a normalized
pointer (zero offset)
- added Makefiles for Microsoft C, Turbo C, Borland C++
- faster crc32()
Changes in 0.8 (29 April 95)
- added fast inflate (inffast.c)
- deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this
is incompatible with previous versions of zlib which returned Z_OK.
- work around a TurboC compiler bug (bad code for b << 0, see infutil.h)
(actually that was not a compiler bug, see 0.81 above)
- gzread no longer reads one extra byte in certain cases
- In gzio destroy(), don't reference a freed structure
- avoid many warnings for MSDOS
- avoid the ERROR symbol which is used by MS Windows
Changes in 0.71 (14 April 95)
- Fixed more MSDOS compilation problems :( There is still a bug with
TurboC large model.
Changes in 0.7 (14 April 95)
- Added full inflate support.
- Simplified the crc32() interface. The pre- and post-conditioning
(one's complement) is now done inside crc32(). WARNING: this is
incompatible with previous versions; see zlib.h for the new usage.
Changes in 0.61 (12 April 95)
- workaround for a bug in TurboC. example and minigzip now work on MSDOS.
Changes in 0.6 (11 April 95)
- added minigzip.c
- added gzdopen to reopen a file descriptor as gzFile
- added transparent reading of non-gziped files in gzread.
- fixed bug in gzread (don't read crc as data)
- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose).
- don't allocate big arrays in the stack (for MSDOS)
- fix some MSDOS compilation problems
Changes in 0.5:
- do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but
not yet Z_FULL_FLUSH.
- support decompression but only in a single step (forced Z_FINISH)
- added opaque object for zalloc and zfree.
- added deflateReset and inflateReset
- added a variable zlib_version for consistency checking.
- renamed the 'filter' parameter of deflateInit2 as 'strategy'.
Added Z_FILTERED and Z_HUFFMAN_ONLY constants.
Changes in 0.4:
- avoid "zip" everywhere, use zlib instead of ziplib.
- suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush
if compression method == 8.
- added adler32 and crc32
- renamed deflateOptions as deflateInit2, call one or the other but not both
- added the method parameter for deflateInit2.
- added inflateInit2
- simplied considerably deflateInit and inflateInit by not supporting
user-provided history buffer. This is supported only in deflateInit2
and inflateInit2.
Changes in 0.3:
- prefix all macro names with Z_
- use Z_FINISH instead of deflateEnd to finish compression.
- added Z_HUFFMAN_ONLY
- added gzerror()

View File

@@ -0,0 +1,339 @@
Frequently Asked Questions about zlib
If your question is not there, please check the zlib home page
http://www.zlib.org which may have more recent information.
The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
1. Is zlib Y2K-compliant?
Yes. zlib doesn't handle dates.
2. Where can I get a Windows DLL version?
The zlib sources can be compiled without change to produce a DLL.
See the file win32/DLL_FAQ.txt in the zlib distribution.
Pointers to the precompiled DLL are found in the zlib web site at
http://www.zlib.org.
3. Where can I get a Visual Basic interface to zlib?
See
* http://www.dogma.net/markn/articles/zlibtool/zlibtool.htm
* contrib/visual-basic.txt in the zlib distribution
* win32/DLL_FAQ.txt in the zlib distribution
4. compress() returns Z_BUF_ERROR.
Make sure that before the call of compress, the length of the compressed
buffer is equal to the total size of the compressed buffer and not
zero. For Visual Basic, check that this parameter is passed by reference
("as any"), not by value ("as long").
5. deflate() or inflate() returns Z_BUF_ERROR.
Before making the call, make sure that avail_in and avail_out are not
zero. When setting the parameter flush equal to Z_FINISH, also make sure
that avail_out is big enough to allow processing all pending input.
Note that a Z_BUF_ERROR is not fatal--another call to deflate() or
inflate() can be made with more input or output space. A Z_BUF_ERROR
may in fact be unavoidable depending on how the functions are used, since
it is not possible to tell whether or not there is more output pending
when strm.avail_out returns with zero.
6. Where's the zlib documentation (man pages, etc.)?
It's in zlib.h for the moment, and Francis S. Lin has converted it to a
web page zlib.html. Volunteers to transform this to Unix-style man pages,
please contact us (zlib@gzip.org). Examples of zlib usage are in the files
example.c and minigzip.c.
7. Why don't you use GNU autoconf or libtool or ...?
Because we would like to keep zlib as a very small and simple
package. zlib is rather portable and doesn't need much configuration.
8. I found a bug in zlib.
Most of the time, such problems are due to an incorrect usage of
zlib. Please try to reproduce the problem with a small program and send
the corresponding source to us at zlib@gzip.org . Do not send
multi-megabyte data files without prior agreement.
9. Why do I get "undefined reference to gzputc"?
If "make test" produces something like
example.o(.text+0x154): undefined reference to `gzputc'
check that you don't have old files libz.* in /usr/lib, /usr/local/lib or
/usr/X11R6/lib. Remove any old versions, then do "make install".
10. I need a Delphi interface to zlib.
See the contrib/delphi directory in the zlib distribution.
11. Can zlib handle .zip archives?
Not by itself, no. See the directory contrib/minizip in the zlib
distribution.
12. Can zlib handle .Z files?
No, sorry. You have to spawn an uncompress or gunzip subprocess, or adapt
the code of uncompress on your own.
13. How can I make a Unix shared library?
make clean
./configure -s
make
14. How do I install a shared zlib library on Unix?
After the above, then:
make install
However, many flavors of Unix come with a shared zlib already installed.
Before going to the trouble of compiling a shared version of zlib and
trying to install it, you may want to check if it's already there! If you
can #include <zlib.h>, it's there. The -lz option will probably link to it.
15. I have a question about OttoPDF.
We are not the authors of OttoPDF. The real author is on the OttoPDF web
site: Joel Hainley, jhainley@myndkryme.com.
16. Can zlib decode Flate data in an Adobe PDF file?
Yes. See http://www.fastio.com/ (ClibPDF), or http://www.pdflib.com/ .
To modify PDF forms, see http://sourceforge.net/projects/acroformtool/ .
17. Why am I getting this "register_frame_info not found" error on Solaris?
After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib
generates an error such as:
ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so:
symbol __register_frame_info: referenced symbol not found
The symbol __register_frame_info is not part of zlib, it is generated by
the C compiler (cc or gcc). You must recompile applications using zlib
which have this problem. This problem is specific to Solaris. See
http://www.sunfreeware.com for Solaris versions of zlib and applications
using zlib.
18. Why does gzip give an error on a file I make with compress/deflate?
The compress and deflate functions produce data in the zlib format, which
is different and incompatible with the gzip format. The gz* functions in
zlib on the other hand use the gzip format. Both the zlib and gzip
formats use the same compressed data format internally, but have different
headers and trailers around the compressed data.
19. Ok, so why are there two different formats?
The gzip format was designed to retain the directory information about
a single file, such as the name and last modification date. The zlib
format on the other hand was designed for in-memory and communication
channel applications, and has a much more compact header and trailer and
uses a faster integrity check than gzip.
20. Well that's nice, but how do I make a gzip file in memory?
You can request that deflate write the gzip format instead of the zlib
format using deflateInit2(). You can also request that inflate decode
the gzip format using inflateInit2(). Read zlib.h for more details.
21. Is zlib thread-safe?
Yes. However any library routines that zlib uses and any application-
provided memory allocation routines must also be thread-safe. zlib's gz*
functions use stdio library routines, and most of zlib's functions use the
library memory allocation routines by default. zlib's Init functions allow
for the application to provide custom memory allocation routines.
Of course, you should only operate on any given zlib or gzip stream from a
single thread at a time.
22. Can I use zlib in my commercial application?
Yes. Please read the license in zlib.h.
23. Is zlib under the GNU license?
No. Please read the license in zlib.h.
24. The license says that altered source versions must be "plainly marked". So
what exactly do I need to do to meet that requirement?
You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h. In
particular, the final version number needs to be changed to "f", and an
identification string should be appended to ZLIB_VERSION. Version numbers
x.x.x.f are reserved for modifications to zlib by others than the zlib
maintainers. For example, if the version of the base zlib you are altering
is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and
ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3". You can also
update the version strings in deflate.c and inftrees.c.
For altered source distributions, you should also note the origin and
nature of the changes in zlib.h, as well as in ChangeLog and README, along
with the dates of the alterations. The origin should include at least your
name (or your company's name), and an email address to contact for help or
issues with the library.
Note that distributing a compiled zlib library along with zlib.h and
zconf.h is also a source distribution, and so you should change
ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes
in zlib.h as you would for a full source distribution.
25. Will zlib work on a big-endian or little-endian architecture, and can I
exchange compressed data between them?
Yes and yes.
26. Will zlib work on a 64-bit machine?
It should. It has been tested on 64-bit machines, and has no dependence
on any data types being limited to 32-bits in length. If you have any
difficulties, please provide a complete problem report to zlib@gzip.org
27. Will zlib decompress data from the PKWare Data Compression Library?
No. The PKWare DCL uses a completely different compressed data format
than does PKZIP and zlib. However, you can look in zlib's contrib/blast
directory for a possible solution to your problem.
28. Can I access data randomly in a compressed stream?
No, not without some preparation. If when compressing you periodically
use Z_FULL_FLUSH, carefully write all the pending data at those points,
and keep an index of those locations, then you can start decompression
at those points. You have to be careful to not use Z_FULL_FLUSH too
often, since it can significantly degrade compression.
29. Does zlib work on MVS, OS/390, CICS, etc.?
We don't know for sure. We have heard occasional reports of success on
these systems. If you do use it on one of these, please provide us with
a report, instructions, and patches that we can reference when we get
these questions. Thanks.
30. Is there some simpler, easier to read version of inflate I can look at
to understand the deflate format?
First off, you should read RFC 1951. Second, yes. Look in zlib's
contrib/puff directory.
31. Does zlib infringe on any patents?
As far as we know, no. In fact, that was originally the whole point behind
zlib. Look here for some more information:
http://www.gzip.org/#faq11
32. Can zlib work with greater than 4 GB of data?
Yes. inflate() and deflate() will process any amount of data correctly.
Each call of inflate() or deflate() is limited to input and output chunks
of the maximum value that can be stored in the compiler's "unsigned int"
type, but there is no limit to the number of chunks. Note however that the
strm.total_in and strm_total_out counters may be limited to 4 GB. These
counters are provided as a convenience and are not used internally by
inflate() or deflate(). The application can easily set up its own counters
updated after each call of inflate() or deflate() to count beyond 4 GB.
compress() and uncompress() may be limited to 4 GB, since they operate in a
single call. gzseek() and gztell() may be limited to 4 GB depending on how
zlib is compiled. See the zlibCompileFlags() function in zlib.h.
The word "may" appears several times above since there is a 4 GB limit
only if the compiler's "long" type is 32 bits. If the compiler's "long"
type is 64 bits, then the limit is 16 exabytes.
33. Does zlib have any security vulnerabilities?
The only one that we are aware of is potentially in gzprintf(). If zlib
is compiled to use sprintf() or vsprintf(), then there is no protection
against a buffer overflow of a 4K string space, other than the caller of
gzprintf() assuring that the output will not exceed 4K. On the other
hand, if zlib is compiled to use snprintf() or vsnprintf(), which should
normally be the case, then there is no vulnerability. The ./configure
script will display warnings if an insecure variation of sprintf() will
be used by gzprintf(). Also the zlibCompileFlags() function will return
information on what variant of sprintf() is used by gzprintf().
If you don't have snprintf() or vsnprintf() and would like one, you can
find a portable implementation here:
http://www.ijs.si/software/snprintf/
Note that you should be using the most recent version of zlib. Versions
1.1.3 and before were subject to a double-free vulnerability.
34. Is there a Java version of zlib?
Probably what you want is to use zlib in Java. zlib is already included
as part of the Java SDK in the java.util.zip package. If you really want
a version of zlib written in the Java language, look on the zlib home
page for links: http://www.zlib.org/
35. I get this or that compiler or source-code scanner warning when I crank it
up to maximally-pedantic. Can't you guys write proper code?
Many years ago, we gave up attempting to avoid warnings on every compiler
in the universe. It just got to be a waste of time, and some compilers
were downright silly. So now, we simply make sure that the code always
works.
36. Valgrind (or some similar memory access checker) says that deflate is
performing a conditional jump that depends on an uninitialized value.
Isn't that a bug?
No. That is intentional for performance reasons, and the output of
deflate is not affected. This only started showing up recently since
zlib 1.2.x uses malloc() by default for allocations, whereas earlier
versions used calloc(), which zeros out the allocated memory.
37. Will zlib read the (insert any ancient or arcane format here) compressed
data format?
Probably not. Look in the comp.compression FAQ for pointers to various
formats and associated software.
38. How can I encrypt/decrypt zip files with zlib?
zlib doesn't support encryption. The original PKZIP encryption is very weak
and can be broken with freely available programs. To get strong encryption,
use GnuPG, http://www.gnupg.org/ , which already includes zlib compression.
For PKZIP compatible "encryption", look at http://www.info-zip.org/
39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings?
"gzip" is the gzip format, and "deflate" is the zlib format. They should
probably have called the second one "zlib" instead to avoid confusion
with the raw deflate compressed data format. While the HTTP 1.1 RFC 2616
correctly points to the zlib specification in RFC 1950 for the "deflate"
transfer encoding, there have been reports of servers and browsers that
incorrectly produce or expect raw deflate data per the deflate
specficiation in RFC 1951, most notably Microsoft. So even though the
"deflate" transfer encoding using the zlib format would be the more
efficient approach (and in fact exactly what the zlib format was designed
for), using the "gzip" transfer encoding is probably more reliable due to
an unfortunate choice of name on the part of the HTTP 1.1 authors.
Bottom line: use the gzip format for HTTP 1.1 encoding.
40. Does zlib support the new "Deflate64" format introduced by PKWare?
No. PKWare has apparently decided to keep that format proprietary, since
they have not documented it as they have previous compression formats.
In any case, the compression improvements are so modest compared to other
more modern approaches, that it's not worth the effort to implement.
41. Can you please sign these lengthy legal documents and fax them back to us
so that we can use your software in our product?
No. Go away. Shoo.

View File

@@ -0,0 +1,51 @@
ChangeLog history of changes
FAQ Frequently Asked Questions about zlib
INDEX this file
Makefile makefile for Unix (generated by configure)
Makefile.in makefile for Unix (template for configure)
README guess what
algorithm.txt description of the (de)compression algorithm
configure configure script for Unix
zconf.in.h template for zconf.h (used by configure)
amiga/ makefiles for Amiga SAS C
as400/ makefiles for IBM AS/400
msdos/ makefiles for MSDOS
old/ makefiles for various architectures and zlib documentation
files that have not yet been updated for zlib 1.2.x
projects/ projects for various Integrated Development Environments
qnx/ makefiles for QNX
win32/ makefiles for Windows
zlib public header files (must be kept):
zconf.h
zlib.h
private source files used to build the zlib library:
adler32.c
compress.c
crc32.c
crc32.h
deflate.c
deflate.h
gzio.c
infback.c
inffast.c
inffast.h
inffixed.h
inflate.c
inflate.h
inftrees.c
inftrees.h
trees.c
trees.h
uncompr.c
zutil.c
zutil.h
source files for sample programs:
example.c
minigzip.c
unsupported contribution by third parties
See contrib/README.contrib

View File

@@ -0,0 +1,154 @@
# Makefile for zlib
# Copyright (C) 1995-2005 Jean-loup Gailly.
# For conditions of distribution and use, see copyright notice in zlib.h
# To compile and test, type:
# ./configure; make test
# The call of configure is optional if you don't have special requirements
# If you wish to build zlib as a shared library, use: ./configure -s
# To use the asm code, type:
# cp contrib/asm?86/match.S ./match.S
# make LOC=-DASMV OBJA=match.o
# To install /usr/local/lib/libz.* and /usr/local/include/zlib.h, type:
# make install
# To install in $HOME instead of /usr/local, use:
# make install prefix=$HOME
CC=cc
CFLAGS=-O
#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7
#CFLAGS=-g -DDEBUG
#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \
# -Wstrict-prototypes -Wmissing-prototypes
LDFLAGS=libz.a
LDSHARED=$(CC)
CPP=$(CC) -E
LIBS=libz.a
SHAREDLIB=libz.so
SHAREDLIBV=libz.so.1.2.3
SHAREDLIBM=libz.so.1
AR=ar rc
RANLIB=ranlib
TAR=tar
SHELL=/bin/sh
EXE=
prefix = /usr/local
exec_prefix = ${prefix}
libdir = ${exec_prefix}/lib
includedir = ${prefix}/include
mandir = ${prefix}/share/man
man3dir = ${mandir}/man3
OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \
zutil.o inflate.o infback.o inftrees.o inffast.o
OBJA =
# to use the asm code: make OBJA=match.o
TEST_OBJS = example.o minigzip.o
all: example$(EXE) minigzip$(EXE)
check: test
test: all
@LD_LIBRARY_PATH=.:$(LD_LIBRARY_PATH) ; export LD_LIBRARY_PATH; \
echo hello world | ./minigzip | ./minigzip -d || \
echo ' *** minigzip test FAILED ***' ; \
if ./example; then \
echo ' *** zlib test OK ***'; \
else \
echo ' *** zlib test FAILED ***'; \
fi
libz.a: $(OBJS) $(OBJA)
$(AR) $@ $(OBJS) $(OBJA)
-@ ($(RANLIB) $@ || true) >/dev/null 2>&1
match.o: match.S
$(CPP) match.S > _match.s
$(CC) -c _match.s
mv _match.o match.o
rm -f _match.s
$(SHAREDLIBV): $(OBJS)
$(LDSHARED) -o $@ $(OBJS)
rm -f $(SHAREDLIB) $(SHAREDLIBM)
ln -s $@ $(SHAREDLIB)
ln -s $@ $(SHAREDLIBM)
example$(EXE): example.o $(LIBS)
$(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS)
minigzip$(EXE): minigzip.o $(LIBS)
$(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS)
install: $(LIBS)
-@if [ ! -d $(exec_prefix) ]; then mkdir -p $(exec_prefix); fi
-@if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi
-@if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi
-@if [ ! -d $(man3dir) ]; then mkdir -p $(man3dir); fi
cp zlib.h zconf.h $(includedir)
chmod 644 $(includedir)/zlib.h $(includedir)/zconf.h
cp $(LIBS) $(libdir)
cd $(libdir); chmod 755 $(LIBS)
-@(cd $(libdir); $(RANLIB) libz.a || true) >/dev/null 2>&1
cd $(libdir); if test -f $(SHAREDLIBV); then \
rm -f $(SHAREDLIB) $(SHAREDLIBM); \
ln -s $(SHAREDLIBV) $(SHAREDLIB); \
ln -s $(SHAREDLIBV) $(SHAREDLIBM); \
(ldconfig || true) >/dev/null 2>&1; \
fi
cp zlib.3 $(man3dir)
chmod 644 $(man3dir)/zlib.3
# The ranlib in install is needed on NeXTSTEP which checks file times
# ldconfig is for Linux
uninstall:
cd $(includedir); \
cd $(libdir); rm -f libz.a; \
if test -f $(SHAREDLIBV); then \
rm -f $(SHAREDLIBV) $(SHAREDLIB) $(SHAREDLIBM); \
fi
cd $(man3dir); rm -f zlib.3
mostlyclean: clean
clean:
rm -f *.o *~ example$(EXE) minigzip$(EXE) \
libz.* foo.gz so_locations \
_match.s maketree contrib/infback9/*.o
maintainer-clean: distclean
distclean: clean
cp -p Makefile.in Makefile
cp -p zconf.in.h zconf.h
rm -f .DS_Store
tags:
etags *.[ch]
depend:
makedepend -- $(CFLAGS) -- *.[ch]
# DO NOT DELETE THIS LINE -- make depend depends on it.
adler32.o: zlib.h zconf.h
compress.o: zlib.h zconf.h
crc32.o: crc32.h zlib.h zconf.h
deflate.o: deflate.h zutil.h zlib.h zconf.h
example.o: zlib.h zconf.h
gzio.o: zutil.h zlib.h zconf.h
inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inftrees.o: zutil.h zlib.h zconf.h inftrees.h
minigzip.o: zlib.h zconf.h
trees.o: deflate.h zutil.h zlib.h zconf.h trees.h
uncompr.o: zlib.h zconf.h
zutil.o: zutil.h zlib.h zconf.h

View File

@@ -0,0 +1,154 @@
# Makefile for zlib
# Copyright (C) 1995-2005 Jean-loup Gailly.
# For conditions of distribution and use, see copyright notice in zlib.h
# To compile and test, type:
# ./configure; make test
# The call of configure is optional if you don't have special requirements
# If you wish to build zlib as a shared library, use: ./configure -s
# To use the asm code, type:
# cp contrib/asm?86/match.S ./match.S
# make LOC=-DASMV OBJA=match.o
# To install /usr/local/lib/libz.* and /usr/local/include/zlib.h, type:
# make install
# To install in $HOME instead of /usr/local, use:
# make install prefix=$HOME
CC=cc
CFLAGS=-O
#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7
#CFLAGS=-g -DDEBUG
#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \
# -Wstrict-prototypes -Wmissing-prototypes
LDFLAGS=libz.a
LDSHARED=$(CC)
CPP=$(CC) -E
LIBS=libz.a
SHAREDLIB=libz.so
SHAREDLIBV=libz.so.1.2.3
SHAREDLIBM=libz.so.1
AR=ar rc
RANLIB=ranlib
TAR=tar
SHELL=/bin/sh
EXE=
prefix = /usr/local
exec_prefix = ${prefix}
libdir = ${exec_prefix}/lib
includedir = ${prefix}/include
mandir = ${prefix}/share/man
man3dir = ${mandir}/man3
OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \
zutil.o inflate.o infback.o inftrees.o inffast.o
OBJA =
# to use the asm code: make OBJA=match.o
TEST_OBJS = example.o minigzip.o
all: example$(EXE) minigzip$(EXE)
check: test
test: all
@LD_LIBRARY_PATH=.:$(LD_LIBRARY_PATH) ; export LD_LIBRARY_PATH; \
echo hello world | ./minigzip | ./minigzip -d || \
echo ' *** minigzip test FAILED ***' ; \
if ./example; then \
echo ' *** zlib test OK ***'; \
else \
echo ' *** zlib test FAILED ***'; \
fi
libz.a: $(OBJS) $(OBJA)
$(AR) $@ $(OBJS) $(OBJA)
-@ ($(RANLIB) $@ || true) >/dev/null 2>&1
match.o: match.S
$(CPP) match.S > _match.s
$(CC) -c _match.s
mv _match.o match.o
rm -f _match.s
$(SHAREDLIBV): $(OBJS)
$(LDSHARED) -o $@ $(OBJS)
rm -f $(SHAREDLIB) $(SHAREDLIBM)
ln -s $@ $(SHAREDLIB)
ln -s $@ $(SHAREDLIBM)
example$(EXE): example.o $(LIBS)
$(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS)
minigzip$(EXE): minigzip.o $(LIBS)
$(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS)
install: $(LIBS)
-@if [ ! -d $(exec_prefix) ]; then mkdir -p $(exec_prefix); fi
-@if [ ! -d $(includedir) ]; then mkdir -p $(includedir); fi
-@if [ ! -d $(libdir) ]; then mkdir -p $(libdir); fi
-@if [ ! -d $(man3dir) ]; then mkdir -p $(man3dir); fi
cp zlib.h zconf.h $(includedir)
chmod 644 $(includedir)/zlib.h $(includedir)/zconf.h
cp $(LIBS) $(libdir)
cd $(libdir); chmod 755 $(LIBS)
-@(cd $(libdir); $(RANLIB) libz.a || true) >/dev/null 2>&1
cd $(libdir); if test -f $(SHAREDLIBV); then \
rm -f $(SHAREDLIB) $(SHAREDLIBM); \
ln -s $(SHAREDLIBV) $(SHAREDLIB); \
ln -s $(SHAREDLIBV) $(SHAREDLIBM); \
(ldconfig || true) >/dev/null 2>&1; \
fi
cp zlib.3 $(man3dir)
chmod 644 $(man3dir)/zlib.3
# The ranlib in install is needed on NeXTSTEP which checks file times
# ldconfig is for Linux
uninstall:
cd $(includedir); \
cd $(libdir); rm -f libz.a; \
if test -f $(SHAREDLIBV); then \
rm -f $(SHAREDLIBV) $(SHAREDLIB) $(SHAREDLIBM); \
fi
cd $(man3dir); rm -f zlib.3
mostlyclean: clean
clean:
rm -f *.o *~ example$(EXE) minigzip$(EXE) \
libz.* foo.gz so_locations \
_match.s maketree contrib/infback9/*.o
maintainer-clean: distclean
distclean: clean
cp -p Makefile.in Makefile
cp -p zconf.in.h zconf.h
rm -f .DS_Store
tags:
etags *.[ch]
depend:
makedepend -- $(CFLAGS) -- *.[ch]
# DO NOT DELETE THIS LINE -- make depend depends on it.
adler32.o: zlib.h zconf.h
compress.o: zlib.h zconf.h
crc32.o: crc32.h zlib.h zconf.h
deflate.o: deflate.h zutil.h zlib.h zconf.h
example.o: zlib.h zconf.h
gzio.o: zutil.h zlib.h zconf.h
inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inftrees.o: zutil.h zlib.h zconf.h inftrees.h
minigzip.o: zlib.h zconf.h
trees.o: deflate.h zutil.h zlib.h zconf.h trees.h
uncompr.o: zlib.h zconf.h
zutil.o: zutil.h zlib.h zconf.h

View File

@@ -0,0 +1,125 @@
ZLIB DATA COMPRESSION LIBRARY
zlib 1.2.3 is a general purpose data compression library. All the code is
thread safe. The data format used by the zlib library is described by RFCs
(Request for Comments) 1950 to 1952 in the files
http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format)
and rfc1952.txt (gzip format). These documents are also available in other
formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html
All functions of the compression library are documented in the file zlib.h
(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example
of the library is given in the file example.c which also tests that the library
is working correctly. Another example is given in the file minigzip.c. The
compression library itself is composed of all source files except example.c and
minigzip.c.
To compile all files and run the test program, follow the instructions given at
the top of Makefile. In short "make test; make install" should work for most
machines. For Unix: "./configure; make test; make install". For MSDOS, use one
of the special makefiles such as Makefile.msc. For VMS, use make_vms.com.
Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
<info@winimage.com> for the Windows DLL version. The zlib home page is
http://www.zlib.org or http://www.gzip.org/zlib/ Before reporting a problem,
please check this site to verify that you have the latest version of zlib;
otherwise get the latest version and check whether the problem still exists or
not.
PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html before asking
for help.
Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
issue of Dr. Dobb's Journal; a copy of the article is available in
http://dogma.net/markn/articles/zlibtool/zlibtool.htm
The changes made in version 1.2.3 are documented in the file ChangeLog.
Unsupported third party contributions are provided in directory "contrib".
A Java implementation of zlib is available in the Java Development Kit
http://java.sun.com/j2se/1.4.2/docs/api/java/util/zip/package-summary.html
See the zlib home page http://www.zlib.org for details.
A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is in the
CPAN (Comprehensive Perl Archive Network) sites
http://www.cpan.org/modules/by-module/Compress/
A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
available in Python 1.5 and later versions, see
http://www.python.org/doc/lib/module-zlib.html
A zlib binding for TCL written by Andreas Kupries <a.kupries@westend.com> is
availlable at http://www.oche.de/~akupries/soft/trf/trf_zip.html
An experimental package to read and write files in .zip format, written on top
of zlib by Gilles Vollant <info@winimage.com>, is available in the
contrib/minizip directory of zlib.
Notes for some targets:
- For Windows DLL versions, please see win32/DLL_FAQ.txt
- For 64-bit Irix, deflate.c must be compiled without any optimization. With
-O, one libpng test fails. The test works in 32 bit mode (with the -n32
compiler flag). The compiler bug has been reported to SGI.
- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
when compiled with cc.
- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
necessary to get gzprintf working correctly. This is done by configure.
- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
other compilers. Use "make test" to check your compiler.
- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers.
- For PalmOs, see http://palmzlib.sourceforge.net/
- When building a shared, i.e. dynamic library on Mac OS X, the library must be
installed before testing (do "make install" before "make test"), since the
library location is specified in the library.
Acknowledgments:
The deflate format used by zlib was defined by Phil Katz. The deflate
and zlib specifications were written by L. Peter Deutsch. Thanks to all the
people who reported problems and suggested various improvements in zlib;
they are too numerous to cite here.
Copyright notice:
(C) 1995-2004 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
If you use the zlib library in a product, we would appreciate *not*
receiving lengthy legal documents to sign. The sources are provided
for free but without warranty of any kind. The library has been
entirely written by Jean-loup Gailly and Mark Adler; it does not
include third-party code.
If you redistribute modified sources, we would appreciate that you include
in the file ChangeLog history information documenting your changes. Please
read the FAQ for more information on the distribution of modified source
versions.

View File

@@ -0,0 +1,149 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-2004 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#define ZLIB_INTERNAL
#include "zlib.h"
#define BASE 65521UL /* largest prime smaller than 65536 */
#define NMAX 5552
/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
#define DO16(buf) DO8(buf,0); DO8(buf,8);
/* use NO_DIVIDE if your processor does not do division in hardware */
#ifdef NO_DIVIDE
# define MOD(a) \
do { \
if (a >= (BASE << 16)) a -= (BASE << 16); \
if (a >= (BASE << 15)) a -= (BASE << 15); \
if (a >= (BASE << 14)) a -= (BASE << 14); \
if (a >= (BASE << 13)) a -= (BASE << 13); \
if (a >= (BASE << 12)) a -= (BASE << 12); \
if (a >= (BASE << 11)) a -= (BASE << 11); \
if (a >= (BASE << 10)) a -= (BASE << 10); \
if (a >= (BASE << 9)) a -= (BASE << 9); \
if (a >= (BASE << 8)) a -= (BASE << 8); \
if (a >= (BASE << 7)) a -= (BASE << 7); \
if (a >= (BASE << 6)) a -= (BASE << 6); \
if (a >= (BASE << 5)) a -= (BASE << 5); \
if (a >= (BASE << 4)) a -= (BASE << 4); \
if (a >= (BASE << 3)) a -= (BASE << 3); \
if (a >= (BASE << 2)) a -= (BASE << 2); \
if (a >= (BASE << 1)) a -= (BASE << 1); \
if (a >= BASE) a -= BASE; \
} while (0)
# define MOD4(a) \
do { \
if (a >= (BASE << 4)) a -= (BASE << 4); \
if (a >= (BASE << 3)) a -= (BASE << 3); \
if (a >= (BASE << 2)) a -= (BASE << 2); \
if (a >= (BASE << 1)) a -= (BASE << 1); \
if (a >= BASE) a -= BASE; \
} while (0)
#else
# define MOD(a) a %= BASE
# define MOD4(a) a %= BASE
#endif
/* ========================================================================= */
uLong ZEXPORT adler32(adler, buf, len)
uLong adler;
const Bytef *buf;
uInt len;
{
unsigned long sum2;
unsigned n;
/* split Adler-32 into component sums */
sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (len == 1) {
adler += buf[0];
if (adler >= BASE)
adler -= BASE;
sum2 += adler;
if (sum2 >= BASE)
sum2 -= BASE;
return adler | (sum2 << 16);
}
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (buf == Z_NULL)
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (len < 16) {
while (len--) {
adler += *buf++;
sum2 += adler;
}
if (adler >= BASE)
adler -= BASE;
MOD4(sum2); /* only added so many BASE's */
return adler | (sum2 << 16);
}
/* do length NMAX blocks -- requires just one modulo operation */
while (len >= NMAX) {
len -= NMAX;
n = NMAX / 16; /* NMAX is divisible by 16 */
do {
DO16(buf); /* 16 sums unrolled */
buf += 16;
} while (--n);
MOD(adler);
MOD(sum2);
}
/* do remaining bytes (less than NMAX, still just one modulo) */
if (len) { /* avoid modulos if none remaining */
while (len >= 16) {
len -= 16;
DO16(buf);
buf += 16;
}
while (len--) {
adler += *buf++;
sum2 += adler;
}
MOD(adler);
MOD(sum2);
}
/* return recombined sums */
return adler | (sum2 << 16);
}
/* ========================================================================= */
uLong ZEXPORT adler32_combine(adler1, adler2, len2)
uLong adler1;
uLong adler2;
z_off_t len2;
{
unsigned long sum1;
unsigned long sum2;
unsigned rem;
/* the derivation of this formula is left as an exercise for the reader */
rem = (unsigned)(len2 % BASE);
sum1 = adler1 & 0xffff;
sum2 = rem * sum1;
MOD(sum2);
sum1 += (adler2 & 0xffff) + BASE - 1;
sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
if (sum1 > BASE) sum1 -= BASE;
if (sum1 > BASE) sum1 -= BASE;
if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
if (sum2 > BASE) sum2 -= BASE;
return sum1 | (sum2 << 16);
}

View File

@@ -0,0 +1,209 @@
1. Compression algorithm (deflate)
The deflation algorithm used by gzip (also zip and zlib) is a variation of
LZ77 (Lempel-Ziv 1977, see reference below). It finds duplicated strings in
the input data. The second occurrence of a string is replaced by a
pointer to the previous string, in the form of a pair (distance,
length). Distances are limited to 32K bytes, and lengths are limited
to 258 bytes. When a string does not occur anywhere in the previous
32K bytes, it is emitted as a sequence of literal bytes. (In this
description, `string' must be taken as an arbitrary sequence of bytes,
and is not restricted to printable characters.)
Literals or match lengths are compressed with one Huffman tree, and
match distances are compressed with another tree. The trees are stored
in a compact form at the start of each block. The blocks can have any
size (except that the compressed data for one block must fit in
available memory). A block is terminated when deflate() determines that
it would be useful to start another block with fresh trees. (This is
somewhat similar to the behavior of LZW-based _compress_.)
Duplicated strings are found using a hash table. All input strings of
length 3 are inserted in the hash table. A hash index is computed for
the next 3 bytes. If the hash chain for this index is not empty, all
strings in the chain are compared with the current input string, and
the longest match is selected.
The hash chains are searched starting with the most recent strings, to
favor small distances and thus take advantage of the Huffman encoding.
The hash chains are singly linked. There are no deletions from the
hash chains, the algorithm simply discards matches that are too old.
To avoid a worst-case situation, very long hash chains are arbitrarily
truncated at a certain length, determined by a runtime option (level
parameter of deflateInit). So deflate() does not always find the longest
possible match but generally finds a match which is long enough.
deflate() also defers the selection of matches with a lazy evaluation
mechanism. After a match of length N has been found, deflate() searches for
a longer match at the next input byte. If a longer match is found, the
previous match is truncated to a length of one (thus producing a single
literal byte) and the process of lazy evaluation begins again. Otherwise,
the original match is kept, and the next match search is attempted only N
steps later.
The lazy match evaluation is also subject to a runtime parameter. If
the current match is long enough, deflate() reduces the search for a longer
match, thus speeding up the whole process. If compression ratio is more
important than speed, deflate() attempts a complete second search even if
the first match is already long enough.
The lazy match evaluation is not performed for the fastest compression
modes (level parameter 1 to 3). For these fast modes, new strings
are inserted in the hash table only when no match was found, or
when the match is not too long. This degrades the compression ratio
but saves time since there are both fewer insertions and fewer searches.
2. Decompression algorithm (inflate)
2.1 Introduction
The key question is how to represent a Huffman code (or any prefix code) so
that you can decode fast. The most important characteristic is that shorter
codes are much more common than longer codes, so pay attention to decoding the
short codes fast, and let the long codes take longer to decode.
inflate() sets up a first level table that covers some number of bits of
input less than the length of longest code. It gets that many bits from the
stream, and looks it up in the table. The table will tell if the next
code is that many bits or less and how many, and if it is, it will tell
the value, else it will point to the next level table for which inflate()
grabs more bits and tries to decode a longer code.
How many bits to make the first lookup is a tradeoff between the time it
takes to decode and the time it takes to build the table. If building the
table took no time (and if you had infinite memory), then there would only
be a first level table to cover all the way to the longest code. However,
building the table ends up taking a lot longer for more bits since short
codes are replicated many times in such a table. What inflate() does is
simply to make the number of bits in the first table a variable, and then
to set that variable for the maximum speed.
For inflate, which has 286 possible codes for the literal/length tree, the size
of the first table is nine bits. Also the distance trees have 30 possible
values, and the size of the first table is six bits. Note that for each of
those cases, the table ended up one bit longer than the ``average'' code
length, i.e. the code length of an approximately flat code which would be a
little more than eight bits for 286 symbols and a little less than five bits
for 30 symbols.
2.2 More details on the inflate table lookup
Ok, you want to know what this cleverly obfuscated inflate tree actually
looks like. You are correct that it's not a Huffman tree. It is simply a
lookup table for the first, let's say, nine bits of a Huffman symbol. The
symbol could be as short as one bit or as long as 15 bits. If a particular
symbol is shorter than nine bits, then that symbol's translation is duplicated
in all those entries that start with that symbol's bits. For example, if the
symbol is four bits, then it's duplicated 32 times in a nine-bit table. If a
symbol is nine bits long, it appears in the table once.
If the symbol is longer than nine bits, then that entry in the table points
to another similar table for the remaining bits. Again, there are duplicated
entries as needed. The idea is that most of the time the symbol will be short
and there will only be one table look up. (That's whole idea behind data
compression in the first place.) For the less frequent long symbols, there
will be two lookups. If you had a compression method with really long
symbols, you could have as many levels of lookups as is efficient. For
inflate, two is enough.
So a table entry either points to another table (in which case nine bits in
the above example are gobbled), or it contains the translation for the symbol
and the number of bits to gobble. Then you start again with the next
ungobbled bit.
You may wonder: why not just have one lookup table for how ever many bits the
longest symbol is? The reason is that if you do that, you end up spending
more time filling in duplicate symbol entries than you do actually decoding.
At least for deflate's output that generates new trees every several 10's of
kbytes. You can imagine that filling in a 2^15 entry table for a 15-bit code
would take too long if you're only decoding several thousand symbols. At the
other extreme, you could make a new table for every bit in the code. In fact,
that's essentially a Huffman tree. But then you spend two much time
traversing the tree while decoding, even for short symbols.
So the number of bits for the first lookup table is a trade of the time to
fill out the table vs. the time spent looking at the second level and above of
the table.
Here is an example, scaled down:
The code being decoded, with 10 symbols, from 1 to 6 bits long:
A: 0
B: 10
C: 1100
D: 11010
E: 11011
F: 11100
G: 11101
H: 11110
I: 111110
J: 111111
Let's make the first table three bits long (eight entries):
000: A,1
001: A,1
010: A,1
011: A,1
100: B,2
101: B,2
110: -> table X (gobble 3 bits)
111: -> table Y (gobble 3 bits)
Each entry is what the bits decode as and how many bits that is, i.e. how
many bits to gobble. Or the entry points to another table, with the number of
bits to gobble implicit in the size of the table.
Table X is two bits long since the longest code starting with 110 is five bits
long:
00: C,1
01: C,1
10: D,2
11: E,2
Table Y is three bits long since the longest code starting with 111 is six
bits long:
000: F,2
001: F,2
010: G,2
011: G,2
100: H,2
101: H,2
110: I,3
111: J,3
So what we have here are three tables with a total of 20 entries that had to
be constructed. That's compared to 64 entries for a single table. Or
compared to 16 entries for a Huffman tree (six two entry tables and one four
entry table). Assuming that the code ideally represents the probability of
the symbols, it takes on the average 1.25 lookups per symbol. That's compared
to one lookup for the single table, or 1.66 lookups per symbol for the
Huffman tree.
There, I think that gives you a picture of what's going on. For inflate, the
meaning of a particular symbol is often more than just a letter. It can be a
byte (a "literal"), or it can be either a length or a distance which
indicates a base value and a number of bits to fetch after the code that is
added to the base value. Or it might be the special end-of-block code. The
data structures created in inftrees.c try to encode all that information
compactly in the tables.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
References:
[LZ77] Ziv J., Lempel A., ``A Universal Algorithm for Sequential Data
Compression,'' IEEE Transactions on Information Theory, Vol. 23, No. 3,
pp. 337-343.
``DEFLATE Compressed Data Format Specification'' available in
http://www.ietf.org/rfc/rfc1951.txt

View File

@@ -0,0 +1,66 @@
# Amiga powerUP (TM) Makefile
# makefile for libpng and SAS C V6.58/7.00 PPC compiler
# Copyright (C) 1998 by Andreas R. Kleinert
LIBNAME = libzip.a
CC = scppc
CFLAGS = NOSTKCHK NOSINT OPTIMIZE OPTGO OPTPEEP OPTINLOCAL OPTINL \
OPTLOOP OPTRDEP=8 OPTDEP=8 OPTCOMP=8 NOVER
AR = ppc-amigaos-ar cr
RANLIB = ppc-amigaos-ranlib
LD = ppc-amigaos-ld -r
LDFLAGS = -o
LDLIBS = LIB:scppc.a LIB:end.o
RM = delete quiet
OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \
zutil.o inflate.o infback.o inftrees.o inffast.o
TEST_OBJS = example.o minigzip.o
all: example minigzip
check: test
test: all
example
echo hello world | minigzip | minigzip -d
$(LIBNAME): $(OBJS)
$(AR) $@ $(OBJS)
-$(RANLIB) $@
example: example.o $(LIBNAME)
$(LD) $(LDFLAGS) $@ LIB:c_ppc.o $@.o $(LIBNAME) $(LDLIBS)
minigzip: minigzip.o $(LIBNAME)
$(LD) $(LDFLAGS) $@ LIB:c_ppc.o $@.o $(LIBNAME) $(LDLIBS)
mostlyclean: clean
clean:
$(RM) *.o example minigzip $(LIBNAME) foo.gz
zip:
zip -ul9 zlib README ChangeLog Makefile Make????.??? Makefile.?? \
descrip.mms *.[ch]
tgz:
cd ..; tar cfz zlib/zlib.tgz zlib/README zlib/ChangeLog zlib/Makefile \
zlib/Make????.??? zlib/Makefile.?? zlib/descrip.mms zlib/*.[ch]
# DO NOT DELETE THIS LINE -- make depend depends on it.
adler32.o: zlib.h zconf.h
compress.o: zlib.h zconf.h
crc32.o: crc32.h zlib.h zconf.h
deflate.o: deflate.h zutil.h zlib.h zconf.h
example.o: zlib.h zconf.h
gzio.o: zutil.h zlib.h zconf.h
inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inftrees.o: zutil.h zlib.h zconf.h inftrees.h
minigzip.o: zlib.h zconf.h
trees.o: deflate.h zutil.h zlib.h zconf.h trees.h
uncompr.o: zlib.h zconf.h
zutil.o: zutil.h zlib.h zconf.h

View File

@@ -0,0 +1,65 @@
# SMakefile for zlib
# Modified from the standard UNIX Makefile Copyright Jean-loup Gailly
# Osma Ahvenlampi <Osma.Ahvenlampi@hut.fi>
# Amiga, SAS/C 6.56 & Smake
CC=sc
CFLAGS=OPT
#CFLAGS=OPT CPU=68030
#CFLAGS=DEBUG=LINE
LDFLAGS=LIB z.lib
SCOPTIONS=OPTSCHED OPTINLINE OPTALIAS OPTTIME OPTINLOCAL STRMERGE \
NOICONS PARMS=BOTH NOSTACKCHECK UTILLIB NOVERSION ERRORREXX \
DEF=POSTINC
OBJS = adler32.o compress.o crc32.o gzio.o uncompr.o deflate.o trees.o \
zutil.o inflate.o infback.o inftrees.o inffast.o
TEST_OBJS = example.o minigzip.o
all: SCOPTIONS example minigzip
check: test
test: all
example
echo hello world | minigzip | minigzip -d
install: z.lib
copy clone zlib.h zconf.h INCLUDE:
copy clone z.lib LIB:
z.lib: $(OBJS)
oml z.lib r $(OBJS)
example: example.o z.lib
$(CC) $(CFLAGS) LINK TO $@ example.o $(LDFLAGS)
minigzip: minigzip.o z.lib
$(CC) $(CFLAGS) LINK TO $@ minigzip.o $(LDFLAGS)
mostlyclean: clean
clean:
-delete force quiet example minigzip *.o z.lib foo.gz *.lnk SCOPTIONS
SCOPTIONS: Makefile.sas
copy to $@ <from <
$(SCOPTIONS)
<
# DO NOT DELETE THIS LINE -- make depend depends on it.
adler32.o: zlib.h zconf.h
compress.o: zlib.h zconf.h
crc32.o: crc32.h zlib.h zconf.h
deflate.o: deflate.h zutil.h zlib.h zconf.h
example.o: zlib.h zconf.h
gzio.o: zutil.h zlib.h zconf.h
inffast.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inflate.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
infback.o: zutil.h zlib.h zconf.h inftrees.h inflate.h inffast.h
inftrees.o: zutil.h zlib.h zconf.h inftrees.h
minigzip.o: zlib.h zconf.h
trees.o: deflate.h zutil.h zlib.h zconf.h trees.h
uncompr.o: zlib.h zconf.h
zutil.o: zutil.h zlib.h zconf.h

View File

@@ -0,0 +1,132 @@
STRPGMEXP PGMLVL(*CURRENT) SIGNATURE('ZLIB')
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
/* Version 1.1.3 entry points. */
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
/********************************************************************/
/* *MODULE ADLER32 ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("adler32")
/********************************************************************/
/* *MODULE COMPRESS ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("compress")
EXPORT SYMBOL("compress2")
/********************************************************************/
/* *MODULE CRC32 ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("crc32")
EXPORT SYMBOL("get_crc_table")
/********************************************************************/
/* *MODULE DEFLATE ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("deflate")
EXPORT SYMBOL("deflateEnd")
EXPORT SYMBOL("deflateSetDictionary")
EXPORT SYMBOL("deflateCopy")
EXPORT SYMBOL("deflateReset")
EXPORT SYMBOL("deflateParams")
EXPORT SYMBOL("deflatePrime")
EXPORT SYMBOL("deflateInit_")
EXPORT SYMBOL("deflateInit2_")
/********************************************************************/
/* *MODULE GZIO ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("gzopen")
EXPORT SYMBOL("gzdopen")
EXPORT SYMBOL("gzsetparams")
EXPORT SYMBOL("gzread")
EXPORT SYMBOL("gzwrite")
EXPORT SYMBOL("gzprintf")
EXPORT SYMBOL("gzputs")
EXPORT SYMBOL("gzgets")
EXPORT SYMBOL("gzputc")
EXPORT SYMBOL("gzgetc")
EXPORT SYMBOL("gzflush")
EXPORT SYMBOL("gzseek")
EXPORT SYMBOL("gzrewind")
EXPORT SYMBOL("gztell")
EXPORT SYMBOL("gzeof")
EXPORT SYMBOL("gzclose")
EXPORT SYMBOL("gzerror")
/********************************************************************/
/* *MODULE INFLATE ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("inflate")
EXPORT SYMBOL("inflateEnd")
EXPORT SYMBOL("inflateSetDictionary")
EXPORT SYMBOL("inflateSync")
EXPORT SYMBOL("inflateReset")
EXPORT SYMBOL("inflateInit_")
EXPORT SYMBOL("inflateInit2_")
EXPORT SYMBOL("inflateSyncPoint")
/********************************************************************/
/* *MODULE UNCOMPR ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("uncompress")
/********************************************************************/
/* *MODULE ZUTIL ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("zlibVersion")
EXPORT SYMBOL("zError")
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
/* Version 1.2.1 additional entry points. */
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
/********************************************************************/
/* *MODULE COMPRESS ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("compressBound")
/********************************************************************/
/* *MODULE DEFLATE ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("deflateBound")
/********************************************************************/
/* *MODULE GZIO ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("gzungetc")
EXPORT SYMBOL("gzclearerr")
/********************************************************************/
/* *MODULE INFBACK ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("inflateBack")
EXPORT SYMBOL("inflateBackEnd")
EXPORT SYMBOL("inflateBackInit_")
/********************************************************************/
/* *MODULE INFLATE ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("inflateCopy")
/********************************************************************/
/* *MODULE ZUTIL ZLIB 01/02/01 00:15:09 */
/********************************************************************/
EXPORT SYMBOL("zlibCompileFlags")
ENDPGMEXP

View File

@@ -0,0 +1,123 @@
/******************************************************************************/
/* */
/* ZLIB */
/* */
/* Compile sources into modules and link them into a service program. */
/* */
/******************************************************************************/
PGM
/* Configuration adjustable parameters. */
DCL VAR(&SRCLIB) TYPE(*CHAR) LEN(10) +
VALUE('ZLIB') /* Source library. */
DCL VAR(&SRCFILE) TYPE(*CHAR) LEN(10) +
VALUE('SOURCES') /* Source member file. */
DCL VAR(&CTLFILE) TYPE(*CHAR) LEN(10) +
VALUE('TOOLS') /* Control member file. */
DCL VAR(&MODLIB) TYPE(*CHAR) LEN(10) +
VALUE('ZLIB') /* Module library. */
DCL VAR(&SRVLIB) TYPE(*CHAR) LEN(10) +
VALUE('LGPL') /* Service program library. */
DCL VAR(&CFLAGS) TYPE(*CHAR) +
VALUE('OPTIMIZE(40)') /* Compile options. */
/* Working storage. */
DCL VAR(&CMDLEN) TYPE(*DEC) LEN(15 5) VALUE(300) /* Command length. */
DCL VAR(&CMD) TYPE(*CHAR) LEN(512)
/* Compile sources into modules. */
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/ADLER32) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/COMPRESS) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/CRC32) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/DEFLATE) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/GZIO) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/INFBACK) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/INFFAST) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/INFLATE) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/INFTREES) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/TREES) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/UNCOMPR) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
CHGVAR VAR(&CMD) VALUE('CRTCMOD MODULE(' *TCAT &MODLIB *TCAT +
'/ZUTIL) SRCFILE(' *TCAT +
&SRCLIB *TCAT '/' *TCAT &SRCFILE *TCAT +
') SYSIFCOPT(*IFSIO)' *BCAT &CFLAGS)
CALL PGM(QCMDEXC) PARM(&CMD &CMDLEN)
/* Link modules into a service program. */
CRTSRVPGM SRVPGM(&SRVLIB/ZLIB) +
MODULE(&MODLIB/ADLER32 &MODLIB/COMPRESS +
&MODLIB/CRC32 &MODLIB/DEFLATE +
&MODLIB/GZIO &MODLIB/INFBACK +
&MODLIB/INFFAST &MODLIB/INFLATE +
&MODLIB/INFTREES &MODLIB/TREES +
&MODLIB/UNCOMPR &MODLIB/ZUTIL) +
SRCFILE(&SRCLIB/&CTLFILE) SRCMBR(BNDSRC) +
TEXT('ZLIB 1.2.3') TGTRLS(V4R4M0)
ENDPGM

View File

@@ -0,0 +1,111 @@
ZLIB version 1.2.3 for AS400 installation instructions
I) From an AS400 *SAVF file:
1) Unpacking archive to an AS400 save file
On the AS400:
_ Create the ZLIB AS400 library:
CRTLIB LIB(ZLIB) TYPE(PROD) TEXT('ZLIB compression API library')
_ Create a work save file, for example:
CRTSAVF FILE(ZLIB/ZLIBSAVF)
On a PC connected to the target AS400:
_ Unpack the save file image to a PC file "ZLIBSAVF"
_ Upload this file into the save file on the AS400, for example
using ftp in BINARY mode.
2) Populating the ZLIB AS400 source library
On the AS400:
_ Extract the saved objects into the ZLIB AS400 library using:
RSTOBJ OBJ(*ALL) SAVLIB(ZLIB) DEV(*SAVF) SAVF(ZLIB/ZLIBSAVF) RSTLIB(ZLIB)
3) Customize installation:
_ Edit CL member ZLIB/TOOLS(COMPILE) and change parameters if needed,
according to the comments.
_ Compile this member with:
CRTCLPGM PGM(ZLIB/COMPILE) SRCFILE(ZLIB/TOOLS) SRCMBR(COMPILE)
4) Compile and generate the service program:
_ This can now be done by executing:
CALL PGM(ZLIB/COMPILE)
II) From the original source distribution:
1) On the AS400, create the source library:
CRTLIB LIB(ZLIB) TYPE(PROD) TEXT('ZLIB compression API library')
2) Create the source files:
CRTSRCPF FILE(ZLIB/SOURCES) RCDLEN(112) TEXT('ZLIB library modules')
CRTSRCPF FILE(ZLIB/H) RCDLEN(112) TEXT('ZLIB library includes')
CRTSRCPF FILE(ZLIB/TOOLS) RCDLEN(112) TEXT('ZLIB library control utilities')
3) From the machine hosting the distribution files, upload them (with
FTP in text mode, for example) according to the following table:
Original AS400 AS400 AS400 AS400
file file member type description
SOURCES Original ZLIB C subprogram sources
adler32.c ADLER32 C ZLIB - Compute the Adler-32 checksum of a dta strm
compress.c COMPRESS C ZLIB - Compress a memory buffer
crc32.c CRC32 C ZLIB - Compute the CRC-32 of a data stream
deflate.c DEFLATE C ZLIB - Compress data using the deflation algorithm
gzio.c GZIO C ZLIB - IO on .gz files
infback.c INFBACK C ZLIB - Inflate using a callback interface
inffast.c INFFAST C ZLIB - Fast proc. literals & length/distance pairs
inflate.c INFLATE C ZLIB - Interface to inflate modules
inftrees.c INFTREES C ZLIB - Generate Huffman trees for efficient decode
trees.c TREES C ZLIB - Output deflated data using Huffman coding
uncompr.c UNCOMPR C ZLIB - Decompress a memory buffer
zutil.c ZUTIL C ZLIB - Target dependent utility functions
H Original ZLIB C and ILE/RPG include files
crc32.h CRC32 C ZLIB - CRC32 tables
deflate.h DEFLATE C ZLIB - Internal compression state
inffast.h INFFAST C ZLIB - Header to use inffast.c
inffixed.h INFFIXED C ZLIB - Table for decoding fixed codes
inflate.h INFLATE C ZLIB - Internal inflate state definitions
inftrees.h INFTREES C ZLIB - Header to use inftrees.c
trees.h TREES C ZLIB - Created automatically with -DGEN_TREES_H
zconf.h ZCONF C ZLIB - Compression library configuration
zlib.h ZLIB C ZLIB - Compression library C user interface
as400/zlib.inc ZLIB.INC RPGLE ZLIB - Compression library ILE RPG user interface
zutil.h ZUTIL C ZLIB - Internal interface and configuration
TOOLS Building source software & AS/400 README
as400/bndsrc BNDSRC Entry point exportation list
as400/compile.clp COMPILE CLP Compile sources & generate service program
as400/readme.txt README TXT Installation instructions
4) Continue as in I)3).
Notes: For AS400 ILE RPG programmers, a /copy member defining the ZLIB
API prototypes for ILE RPG can be found in ZLIB/H(ZLIB.INC).
Please read comments in this member for more information.
Remember that most foreign textual data are ASCII coded: this
implementation does not handle conversion from/to ASCII, so
text data code conversions must be done explicitely.
Always open zipped files in binary mode.

View File

@@ -0,0 +1,331 @@
* ZLIB.INC - Interface to the general purpose compression library
*
* ILE RPG400 version by Patrick Monnerat, DATASPHERE.
* Version 1.2.3
*
*
* WARNING:
* Procedures inflateInit(), inflateInit2(), deflateInit(),
* deflateInit2() and inflateBackInit() need to be called with
* two additional arguments:
* the package version string and the stream control structure.
* size. This is needed because RPG lacks some macro feature.
* Call these procedures as:
* inflateInit(...: ZLIB_VERSION: %size(z_stream))
*
/if not defined(ZLIB_H_)
/define ZLIB_H_
*
**************************************************************************
* Constants
**************************************************************************
*
* Versioning information.
*
D ZLIB_VERSION C '1.2.3'
D ZLIB_VERNUM C X'1230'
*
* Other equates.
*
D Z_NO_FLUSH C 0
D Z_SYNC_FLUSH C 2
D Z_FULL_FLUSH C 3
D Z_FINISH C 4
D Z_BLOCK C 5
*
D Z_OK C 0
D Z_STREAM_END C 1
D Z_NEED_DICT C 2
D Z_ERRNO C -1
D Z_STREAM_ERROR C -2
D Z_DATA_ERROR C -3
D Z_MEM_ERROR C -4
D Z_BUF_ERROR C -5
DZ_VERSION_ERROR C -6
*
D Z_NO_COMPRESSION...
D C 0
D Z_BEST_SPEED C 1
D Z_BEST_COMPRESSION...
D C 9
D Z_DEFAULT_COMPRESSION...
D C -1
*
D Z_FILTERED C 1
D Z_HUFFMAN_ONLY C 2
D Z_RLE C 3
D Z_DEFAULT_STRATEGY...
D C 0
*
D Z_BINARY C 0
D Z_ASCII C 1
D Z_UNKNOWN C 2
*
D Z_DEFLATED C 8
*
D Z_NULL C 0
*
**************************************************************************
* Types
**************************************************************************
*
D z_streamp S * Stream struct ptr
D gzFile S * File pointer
D z_off_t S 10i 0 Stream offsets
*
**************************************************************************
* Structures
**************************************************************************
*
* The GZIP encode/decode stream support structure.
*
D z_stream DS align based(z_streamp)
D zs_next_in * Next input byte
D zs_avail_in 10U 0 Byte cnt at next_in
D zs_total_in 10U 0 Total bytes read
D zs_next_out * Output buffer ptr
D zs_avail_out 10U 0 Room left @ next_out
D zs_total_out 10U 0 Total bytes written
D zs_msg * Last errmsg or null
D zs_state * Internal state
D zs_zalloc * procptr Int. state allocator
D zs_free * procptr Int. state dealloc.
D zs_opaque * Private alloc. data
D zs_data_type 10i 0 ASC/BIN best guess
D zs_adler 10u 0 Uncompr. adler32 val
D 10U 0 Reserved
D 10U 0 Ptr. alignment
*
**************************************************************************
* Utility function prototypes
**************************************************************************
*
D compress PR 10I 0 extproc('compress')
D dest 32767 options(*varsize) Destination buffer
D destLen 10U 0 Destination length
D source 32767 const options(*varsize) Source buffer
D sourceLen 10u 0 value Source length
*
D compress2 PR 10I 0 extproc('compress2')
D dest 32767 options(*varsize) Destination buffer
D destLen 10U 0 Destination length
D source 32767 const options(*varsize) Source buffer
D sourceLen 10U 0 value Source length
D level 10I 0 value Compression level
*
D compressBound PR 10U 0 extproc('compressBound')
D sourceLen 10U 0 value
*
D uncompress PR 10I 0 extproc('uncompress')
D dest 32767 options(*varsize) Destination buffer
D destLen 10U 0 Destination length
D source 32767 const options(*varsize) Source buffer
D sourceLen 10U 0 value Source length
*
D gzopen PR extproc('gzopen')
D like(gzFile)
D path * value options(*string) File pathname
D mode * value options(*string) Open mode
*
D gzdopen PR extproc('gzdopen')
D like(gzFile)
D fd 10i 0 value File descriptor
D mode * value options(*string) Open mode
*
D gzsetparams PR 10I 0 extproc('gzsetparams')
D file value like(gzFile) File pointer
D level 10I 0 value
D strategy 10i 0 value
*
D gzread PR 10I 0 extproc('gzread')
D file value like(gzFile) File pointer
D buf 32767 options(*varsize) Buffer
D len 10u 0 value Buffer length
*
D gzwrite PR 10I 0 extproc('gzwrite')
D file value like(gzFile) File pointer
D buf 32767 const options(*varsize) Buffer
D len 10u 0 value Buffer length
*
D gzputs PR 10I 0 extproc('gzputs')
D file value like(gzFile) File pointer
D s * value options(*string) String to output
*
D gzgets PR * extproc('gzgets')
D file value like(gzFile) File pointer
D buf 32767 options(*varsize) Read buffer
D len 10i 0 value Buffer length
*
D gzflush PR 10i 0 extproc('gzflush')
D file value like(gzFile) File pointer
D flush 10I 0 value Type of flush
*
D gzseek PR extproc('gzseek')
D like(z_off_t)
D file value like(gzFile) File pointer
D offset value like(z_off_t) Offset
D whence 10i 0 value Origin
*
D gzrewind PR 10i 0 extproc('gzrewind')
D file value like(gzFile) File pointer
*
D gztell PR extproc('gztell')
D like(z_off_t)
D file value like(gzFile) File pointer
*
D gzeof PR 10i 0 extproc('gzeof')
D file value like(gzFile) File pointer
*
D gzclose PR 10i 0 extproc('gzclose')
D file value like(gzFile) File pointer
*
D gzerror PR * extproc('gzerror') Error string
D file value like(gzFile) File pointer
D errnum 10I 0 Error code
*
D gzclearerr PR extproc('gzclearerr')
D file value like(gzFile) File pointer
*
**************************************************************************
* Basic function prototypes
**************************************************************************
*
D zlibVersion PR * extproc('zlibVersion') Version string
*
D deflateInit PR 10I 0 extproc('deflateInit_') Init. compression
D strm like(z_stream) Compression stream
D level 10I 0 value Compression level
D version * value options(*string) Version string
D stream_size 10i 0 value Stream struct. size
*
D deflate PR 10I 0 extproc('deflate') Compress data
D strm like(z_stream) Compression stream
D flush 10I 0 value Flush type required
*
D deflateEnd PR 10I 0 extproc('deflateEnd') Termin. compression
D strm like(z_stream) Compression stream
*
D inflateInit PR 10I 0 extproc('inflateInit_') Init. expansion
D strm like(z_stream) Expansion stream
D version * value options(*string) Version string
D stream_size 10i 0 value Stream struct. size
*
D inflate PR 10I 0 extproc('inflate') Expand data
D strm like(z_stream) Expansion stream
D flush 10I 0 value Flush type required
*
D inflateEnd PR 10I 0 extproc('inflateEnd') Termin. expansion
D strm like(z_stream) Expansion stream
*
**************************************************************************
* Advanced function prototypes
**************************************************************************
*
D deflateInit2 PR 10I 0 extproc('deflateInit2_') Init. compression
D strm like(z_stream) Compression stream
D level 10I 0 value Compression level
D method 10I 0 value Compression method
D windowBits 10I 0 value log2(window size)
D memLevel 10I 0 value Mem/cmpress tradeoff
D strategy 10I 0 value Compression stategy
D version * value options(*string) Version string
D stream_size 10i 0 value Stream struct. size
*
D deflateSetDictionary...
D PR 10I 0 extproc('deflateSetDictionary') Init. dictionary
D strm like(z_stream) Compression stream
D dictionary 32767 const options(*varsize) Dictionary bytes
D dictLength 10U 0 value Dictionary length
*
D deflateCopy PR 10I 0 extproc('deflateCopy') Compress strm 2 strm
D dest like(z_stream) Destination stream
D source like(z_stream) Source stream
*
D deflateReset PR 10I 0 extproc('deflateReset') End and init. stream
D strm like(z_stream) Compression stream
*
D deflateParams PR 10I 0 extproc('deflateParams') Change level & strat
D strm like(z_stream) Compression stream
D level 10I 0 value Compression level
D strategy 10I 0 value Compression stategy
*
D deflateBound PR 10U 0 extproc('deflateBound') Change level & strat
D strm like(z_stream) Compression stream
D sourcelen 10U 0 value Compression level
*
D deflatePrime PR 10I 0 extproc('deflatePrime') Change level & strat
D strm like(z_stream) Compression stream
D bits 10I 0 value Number of bits to insert
D value 10I 0 value Bits to insert
*
D inflateInit2 PR 10I 0 extproc('inflateInit2_') Init. expansion
D strm like(z_stream) Expansion stream
D windowBits 10I 0 value log2(window size)
D version * value options(*string) Version string
D stream_size 10i 0 value Stream struct. size
*
D inflateSetDictionary...
D PR 10I 0 extproc('inflateSetDictionary') Init. dictionary
D strm like(z_stream) Expansion stream
D dictionary 32767 const options(*varsize) Dictionary bytes
D dictLength 10U 0 value Dictionary length
*
D inflateSync PR 10I 0 extproc('inflateSync') Sync. expansion
D strm like(z_stream) Expansion stream
*
D inflateCopy PR 10I 0 extproc('inflateCopy')
D dest like(z_stream) Destination stream
D source like(z_stream) Source stream
*
D inflateReset PR 10I 0 extproc('inflateReset') End and init. stream
D strm like(z_stream) Expansion stream
*
D inflateBackInit...
D PR 10I 0 extproc('inflateBackInit_')
D strm like(z_stream) Expansion stream
D windowBits 10I 0 value Log2(buffer size)
D window 32767 options(*varsize) Buffer
D version * value options(*string) Version string
D stream_size 10i 0 value Stream struct. size
*
D inflateBack PR 10I 0 extproc('inflateBack')
D strm like(z_stream) Expansion stream
D in * value procptr Input function
D in_desc * value Input descriptor
D out * value procptr Output function
D out_desc * value Output descriptor
*
D inflateBackEnd PR 10I 0 extproc('inflateBackEnd')
D strm like(z_stream) Expansion stream
*
D zlibCompileFlags...
D PR 10U 0 extproc('zlibCompileFlags')
*
**************************************************************************
* Checksum function prototypes
**************************************************************************
*
D adler32 PR 10U 0 extproc('adler32') New checksum
D adler 10U 0 value Old checksum
D buf 32767 const options(*varsize) Bytes to accumulate
D len 10U 0 value Buffer length
*
D crc32 PR 10U 0 extproc('crc32') New checksum
D crc 10U 0 value Old checksum
D buf 32767 const options(*varsize) Bytes to accumulate
D len 10U 0 value Buffer length
*
**************************************************************************
* Miscellaneous function prototypes
**************************************************************************
*
D zError PR * extproc('zError') Error string
D err 10I 0 value Error code
*
D inflateSyncPoint...
D PR 10I 0 extproc('inflateSyncPoint')
D strm like(z_stream) Expansion stream
*
D get_crc_table PR * extproc('get_crc_table') Ptr to ulongs
*
/endif

View File

@@ -0,0 +1,79 @@
/* compress.c -- compress a memory buffer
* Copyright (C) 1995-2003 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#define ZLIB_INTERNAL
#include "zlib.h"
/* ===========================================================================
Compresses the source buffer into the destination buffer. The level
parameter has the same meaning as in deflateInit. sourceLen is the byte
length of the source buffer. Upon entry, destLen is the total size of the
destination buffer, which must be at least 0.1% larger than sourceLen plus
12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
memory, Z_BUF_ERROR if there was not enough room in the output buffer,
Z_STREAM_ERROR if the level parameter is invalid.
*/
int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
int level;
{
z_stream stream;
int err;
stream.next_in = (Bytef*)source;
stream.avail_in = (uInt)sourceLen;
#ifdef MAXSEG_64K
/* Check for source > 64K on 16-bit machine: */
if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
#endif
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0;
stream.opaque = (voidpf)0;
err = deflateInit(&stream, level);
if (err != Z_OK) return err;
err = deflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
deflateEnd(&stream);
return err == Z_OK ? Z_BUF_ERROR : err;
}
*destLen = stream.total_out;
err = deflateEnd(&stream);
return err;
}
/* ===========================================================================
*/
int ZEXPORT compress (dest, destLen, source, sourceLen)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
{
return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
}
/* ===========================================================================
If the default memLevel or windowBits for deflateInit() is changed, then
this function needs to be updated.
*/
uLong ZEXPORT compressBound (sourceLen)
uLong sourceLen;
{
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11;
}

View File

@@ -0,0 +1,459 @@
#!/bin/sh
# configure script for zlib. This script is needed only if
# you wish to build a shared library and your system supports them,
# of if you need special compiler, flags or install directory.
# Otherwise, you can just use directly "make test; make install"
#
# To create a shared library, use "configure --shared"; by default a static
# library is created. If the primitive shared library support provided here
# does not work, use ftp://prep.ai.mit.edu/pub/gnu/libtool-*.tar.gz
#
# To impose specific compiler or flags or install directory, use for example:
# prefix=$HOME CC=cc CFLAGS="-O4" ./configure
# or for csh/tcsh users:
# (setenv prefix $HOME; setenv CC cc; setenv CFLAGS "-O4"; ./configure)
# LDSHARED is the command to be used to create a shared library
# Incorrect settings of CC or CFLAGS may prevent creating a shared library.
# If you have problems, try without defining CC and CFLAGS before reporting
# an error.
LIBS=libz.a
LDFLAGS="-L. ${LIBS}"
VER=`sed -n -e '/VERSION "/s/.*"\(.*\)".*/\1/p' < zlib.h`
VER2=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\)\\..*/\1/p' < zlib.h`
VER1=`sed -n -e '/VERSION "/s/.*"\([0-9]*\)\\..*/\1/p' < zlib.h`
AR=${AR-"ar rc"}
RANLIB=${RANLIB-"ranlib"}
prefix=${prefix-/usr/local}
exec_prefix=${exec_prefix-'${prefix}'}
libdir=${libdir-'${exec_prefix}/lib'}
includedir=${includedir-'${prefix}/include'}
mandir=${mandir-'${prefix}/share/man'}
shared_ext='.so'
shared=0
gcc=0
old_cc="$CC"
old_cflags="$CFLAGS"
while test $# -ge 1
do
case "$1" in
-h* | --h*)
echo 'usage:'
echo ' configure [--shared] [--prefix=PREFIX] [--exec_prefix=EXPREFIX]'
echo ' [--libdir=LIBDIR] [--includedir=INCLUDEDIR]'
exit 0;;
-p*=* | --p*=*) prefix=`echo $1 | sed 's/[-a-z_]*=//'`; shift;;
-e*=* | --e*=*) exec_prefix=`echo $1 | sed 's/[-a-z_]*=//'`; shift;;
-l*=* | --libdir=*) libdir=`echo $1 | sed 's/[-a-z_]*=//'`; shift;;
-i*=* | --includedir=*) includedir=`echo $1 | sed 's/[-a-z_]*=//'`;shift;;
-p* | --p*) prefix="$2"; shift; shift;;
-e* | --e*) exec_prefix="$2"; shift; shift;;
-l* | --l*) libdir="$2"; shift; shift;;
-i* | --i*) includedir="$2"; shift; shift;;
-s* | --s*) shared=1; shift;;
*) echo "unknown option: $1"; echo "$0 --help for help"; exit 1;;
esac
done
test=ztest$$
cat > $test.c <<EOF
extern int getchar();
int hello() {return getchar();}
EOF
test -z "$CC" && echo Checking for gcc...
cc=${CC-gcc}
cflags=${CFLAGS-"-O3"}
# to force the asm version use: CFLAGS="-O3 -DASMV" ./configure
case "$cc" in
*gcc*) gcc=1;;
esac
if test "$gcc" -eq 1 && ($cc -c $cflags $test.c) 2>/dev/null; then
CC="$cc"
SFLAGS=${CFLAGS-"-fPIC -O3"}
CFLAGS="$cflags"
case `(uname -s || echo unknown) 2>/dev/null` in
Linux | linux | GNU | GNU/*) LDSHARED=${LDSHARED-"$cc -shared -Wl,-soname,libz.so.1"};;
CYGWIN* | Cygwin* | cygwin* | OS/2* )
EXE='.exe';;
QNX*) # This is for QNX6. I suppose that the QNX rule below is for QNX2,QNX4
# (alain.bonnefoy@icbt.com)
LDSHARED=${LDSHARED-"$cc -shared -Wl,-hlibz.so.1"};;
HP-UX*)
LDSHARED=${LDSHARED-"$cc -shared $SFLAGS"}
case `(uname -m || echo unknown) 2>/dev/null` in
ia64)
shared_ext='.so'
SHAREDLIB='libz.so';;
*)
shared_ext='.sl'
SHAREDLIB='libz.sl';;
esac;;
Darwin*) shared_ext='.dylib'
SHAREDLIB=libz$shared_ext
SHAREDLIBV=libz.$VER$shared_ext
SHAREDLIBM=libz.$VER1$shared_ext
LDSHARED=${LDSHARED-"$cc -dynamiclib -install_name $libdir/$SHAREDLIBM -compatibility_version $VER1 -current_version $VER"};;
*) LDSHARED=${LDSHARED-"$cc -shared"};;
esac
else
# find system name and corresponding cc options
CC=${CC-cc}
case `(uname -sr || echo unknown) 2>/dev/null` in
HP-UX*) SFLAGS=${CFLAGS-"-O +z"}
CFLAGS=${CFLAGS-"-O"}
# LDSHARED=${LDSHARED-"ld -b +vnocompatwarnings"}
LDSHARED=${LDSHARED-"ld -b"}
case `(uname -m || echo unknown) 2>/dev/null` in
ia64)
shared_ext='.so'
SHAREDLIB='libz.so';;
*)
shared_ext='.sl'
SHAREDLIB='libz.sl';;
esac;;
IRIX*) SFLAGS=${CFLAGS-"-ansi -O2 -rpath ."}
CFLAGS=${CFLAGS-"-ansi -O2"}
LDSHARED=${LDSHARED-"cc -shared"};;
OSF1\ V4*) SFLAGS=${CFLAGS-"-O -std1"}
CFLAGS=${CFLAGS-"-O -std1"}
LDSHARED=${LDSHARED-"cc -shared -Wl,-soname,libz.so -Wl,-msym -Wl,-rpath,$(libdir) -Wl,-set_version,${VER}:1.0"};;
OSF1*) SFLAGS=${CFLAGS-"-O -std1"}
CFLAGS=${CFLAGS-"-O -std1"}
LDSHARED=${LDSHARED-"cc -shared"};;
QNX*) SFLAGS=${CFLAGS-"-4 -O"}
CFLAGS=${CFLAGS-"-4 -O"}
LDSHARED=${LDSHARED-"cc"}
RANLIB=${RANLIB-"true"}
AR="cc -A";;
SCO_SV\ 3.2*) SFLAGS=${CFLAGS-"-O3 -dy -KPIC "}
CFLAGS=${CFLAGS-"-O3"}
LDSHARED=${LDSHARED-"cc -dy -KPIC -G"};;
SunOS\ 5*) SFLAGS=${CFLAGS-"-fast -xcg89 -KPIC -R."}
CFLAGS=${CFLAGS-"-fast -xcg89"}
LDSHARED=${LDSHARED-"cc -G"};;
SunOS\ 4*) SFLAGS=${CFLAGS-"-O2 -PIC"}
CFLAGS=${CFLAGS-"-O2"}
LDSHARED=${LDSHARED-"ld"};;
SunStudio\ 9*) SFLAGS=${CFLAGS-"-DUSE_MMAP -fast -xcode=pic32 -xtarget=ultra3 -xarch=v9b"}
CFLAGS=${CFLAGS-"-DUSE_MMAP -fast -xtarget=ultra3 -xarch=v9b"}
LDSHARED=${LDSHARED-"cc -xarch=v9b"};;
UNIX_System_V\ 4.2.0)
SFLAGS=${CFLAGS-"-KPIC -O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -G"};;
UNIX_SV\ 4.2MP)
SFLAGS=${CFLAGS-"-Kconform_pic -O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -G"};;
OpenUNIX\ 5)
SFLAGS=${CFLAGS-"-KPIC -O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -G"};;
AIX*) # Courtesy of dbakker@arrayasolutions.com
SFLAGS=${CFLAGS-"-O -qmaxmem=8192"}
CFLAGS=${CFLAGS-"-O -qmaxmem=8192"}
LDSHARED=${LDSHARED-"xlc -G"};;
# send working options for other systems to support@gzip.org
*) SFLAGS=${CFLAGS-"-O"}
CFLAGS=${CFLAGS-"-O"}
LDSHARED=${LDSHARED-"cc -shared"};;
esac
fi
SHAREDLIB=${SHAREDLIB-"libz$shared_ext"}
SHAREDLIBV=${SHAREDLIBV-"libz$shared_ext.$VER"}
SHAREDLIBM=${SHAREDLIBM-"libz$shared_ext.$VER1"}
if test $shared -eq 1; then
echo Checking for shared library support...
# we must test in two steps (cc then ld), required at least on SunOS 4.x
if test "`($CC -c $SFLAGS $test.c) 2>&1`" = "" &&
test "`($LDSHARED -o $test$shared_ext $test.o) 2>&1`" = ""; then
CFLAGS="$SFLAGS"
LIBS="$SHAREDLIBV"
echo Building shared library $SHAREDLIBV with $CC.
elif test -z "$old_cc" -a -z "$old_cflags"; then
echo No shared library support.
shared=0;
else
echo 'No shared library support; try without defining CC and CFLAGS'
shared=0;
fi
fi
if test $shared -eq 0; then
LDSHARED="$CC"
echo Building static library $LIBS version $VER with $CC.
else
LDFLAGS="-L. ${SHAREDLIBV}"
fi
cat > $test.c <<EOF
#include <unistd.h>
int main() { return 0; }
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
sed < zconf.in.h "/HAVE_UNISTD_H/s%0%1%" > zconf.h
echo "Checking for unistd.h... Yes."
else
cp -p zconf.in.h zconf.h
echo "Checking for unistd.h... No."
fi
cat > $test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
#include "zconf.h"
int main()
{
#ifndef STDC
choke me
#endif
return 0;
}
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
echo "Checking whether to use vs[n]printf() or s[n]printf()... using vs[n]printf()"
cat > $test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
int mytest(char *fmt, ...)
{
char buf[20];
va_list ap;
va_start(ap, fmt);
vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
return 0;
}
int main()
{
return (mytest("Hello%d\n", 1));
}
EOF
if test "`($CC $CFLAGS -o $test $test.c) 2>&1`" = ""; then
echo "Checking for vsnprintf() in stdio.h... Yes."
cat >$test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
int mytest(char *fmt, ...)
{
int n;
char buf[20];
va_list ap;
va_start(ap, fmt);
n = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
return n;
}
int main()
{
return (mytest("Hello%d\n", 1));
}
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
echo "Checking for return value of vsnprintf()... Yes."
else
CFLAGS="$CFLAGS -DHAS_vsnprintf_void"
echo "Checking for return value of vsnprintf()... No."
echo " WARNING: apparently vsnprintf() does not return a value. zlib"
echo " can build but will be open to possible string-format security"
echo " vulnerabilities."
fi
else
CFLAGS="$CFLAGS -DNO_vsnprintf"
echo "Checking for vsnprintf() in stdio.h... No."
echo " WARNING: vsnprintf() not found, falling back to vsprintf(). zlib"
echo " can build but will be open to possible buffer-overflow security"
echo " vulnerabilities."
cat >$test.c <<EOF
#include <stdio.h>
#include <stdarg.h>
int mytest(char *fmt, ...)
{
int n;
char buf[20];
va_list ap;
va_start(ap, fmt);
n = vsprintf(buf, fmt, ap);
va_end(ap);
return n;
}
int main()
{
return (mytest("Hello%d\n", 1));
}
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
echo "Checking for return value of vsprintf()... Yes."
else
CFLAGS="$CFLAGS -DHAS_vsprintf_void"
echo "Checking for return value of vsprintf()... No."
echo " WARNING: apparently vsprintf() does not return a value. zlib"
echo " can build but will be open to possible string-format security"
echo " vulnerabilities."
fi
fi
else
echo "Checking whether to use vs[n]printf() or s[n]printf()... using s[n]printf()"
cat >$test.c <<EOF
#include <stdio.h>
int mytest()
{
char buf[20];
snprintf(buf, sizeof(buf), "%s", "foo");
return 0;
}
int main()
{
return (mytest());
}
EOF
if test "`($CC $CFLAGS -o $test $test.c) 2>&1`" = ""; then
echo "Checking for snprintf() in stdio.h... Yes."
cat >$test.c <<EOF
#include <stdio.h>
int mytest()
{
char buf[20];
return snprintf(buf, sizeof(buf), "%s", "foo");
}
int main()
{
return (mytest());
}
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
echo "Checking for return value of snprintf()... Yes."
else
CFLAGS="$CFLAGS -DHAS_snprintf_void"
echo "Checking for return value of snprintf()... No."
echo " WARNING: apparently snprintf() does not return a value. zlib"
echo " can build but will be open to possible string-format security"
echo " vulnerabilities."
fi
else
CFLAGS="$CFLAGS -DNO_snprintf"
echo "Checking for snprintf() in stdio.h... No."
echo " WARNING: snprintf() not found, falling back to sprintf(). zlib"
echo " can build but will be open to possible buffer-overflow security"
echo " vulnerabilities."
cat >$test.c <<EOF
#include <stdio.h>
int mytest()
{
char buf[20];
return sprintf(buf, "%s", "foo");
}
int main()
{
return (mytest());
}
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
echo "Checking for return value of sprintf()... Yes."
else
CFLAGS="$CFLAGS -DHAS_sprintf_void"
echo "Checking for return value of sprintf()... No."
echo " WARNING: apparently sprintf() does not return a value. zlib"
echo " can build but will be open to possible string-format security"
echo " vulnerabilities."
fi
fi
fi
cat >$test.c <<EOF
#include <errno.h>
int main() { return 0; }
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
echo "Checking for errno.h... Yes."
else
echo "Checking for errno.h... No."
CFLAGS="$CFLAGS -DNO_ERRNO_H"
fi
cat > $test.c <<EOF
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
caddr_t hello() {
return mmap((caddr_t)0, (off_t)0, PROT_READ, MAP_SHARED, 0, (off_t)0);
}
EOF
if test "`($CC -c $CFLAGS $test.c) 2>&1`" = ""; then
CFLAGS="$CFLAGS -DUSE_MMAP"
echo Checking for mmap support... Yes.
else
echo Checking for mmap support... No.
fi
CPP=${CPP-"$CC -E"}
case $CFLAGS in
*ASMV*)
if test "`nm $test.o | grep _hello`" = ""; then
CPP="$CPP -DNO_UNDERLINE"
echo Checking for underline in external names... No.
else
echo Checking for underline in external names... Yes.
fi;;
esac
rm -f $test.[co] $test $test$shared_ext
# udpate Makefile
sed < Makefile.in "
/^CC *=/s#=.*#=$CC#
/^CFLAGS *=/s#=.*#=$CFLAGS#
/^CPP *=/s#=.*#=$CPP#
/^LDSHARED *=/s#=.*#=$LDSHARED#
/^LIBS *=/s#=.*#=$LIBS#
/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
/^SHAREDLIBM *=/s#=.*#=$SHAREDLIBM#
/^AR *=/s#=.*#=$AR#
/^RANLIB *=/s#=.*#=$RANLIB#
/^EXE *=/s#=.*#=$EXE#
/^prefix *=/s#=.*#=$prefix#
/^exec_prefix *=/s#=.*#=$exec_prefix#
/^libdir *=/s#=.*#=$libdir#
/^includedir *=/s#=.*#=$includedir#
/^mandir *=/s#=.*#=$mandir#
/^LDFLAGS *=/s#=.*#=$LDFLAGS#
" > Makefile

View File

@@ -0,0 +1,71 @@
All files under this contrib directory are UNSUPPORTED. There were
provided by users of zlib and were not tested by the authors of zlib.
Use at your own risk. Please contact the authors of the contributions
for help about these, not the zlib authors. Thanks.
ada/ by Dmitriy Anisimkov <anisimkov@yahoo.com>
Support for Ada
See http://zlib-ada.sourceforge.net/
asm586/
asm686/ by Brian Raiter <breadbox@muppetlabs.com>
asm code for Pentium and PPro/PII, using the AT&T (GNU as) syntax
See http://www.muppetlabs.com/~breadbox/software/assembly.html
blast/ by Mark Adler <madler@alumni.caltech.edu>
Decompressor for output of PKWare Data Compression Library (DCL)
delphi/ by Cosmin Truta <cosmint@cs.ubbcluj.ro>
Support for Delphi and C++ Builder
dotzlib/ by Henrik Ravn <henrik@ravn.com>
Support for Microsoft .Net and Visual C++ .Net
infback9/ by Mark Adler <madler@alumni.caltech.edu>
Unsupported diffs to infback to decode the deflate64 format
inflate86/ by Chris Anderson <christop@charm.net>
Tuned x86 gcc asm code to replace inflate_fast()
iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
A C++ I/O streams interface to the zlib gz* functions
iostream2/ by Tyge L<>vset <Tyge.Lovset@cmr.no>
Another C++ I/O streams interface
iostream3/ by Ludwig Schwardt <schwardt@sun.ac.za>
and Kevin Ruland <kevin@rodin.wustl.edu>
Yet another C++ I/O streams interface
masm686/ by Dan Higdon <hdan@kinesoft.com>
and Chuck Walbourn <chuckw@kinesoft.com>
asm code for Pentium Pro/PII, using the MASM syntax
masmx64/ by Gilles Vollant <info@winimage.com>
x86 64-bit (AMD64 and Intel EM64t) code for x64 assembler to
replace longest_match() and inflate_fast()
masmx86/ by Gilles Vollant <info@winimage.com>
x86 asm code to replace longest_match() and inflate_fast(),
for Visual C++ and MASM
minizip/ by Gilles Vollant <info@winimage.com>
Mini zip and unzip based on zlib
See http://www.winimage.com/zLibDll/unzip.html
pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al.
Support for Pascal
puff/ by Mark Adler <madler@alumni.caltech.edu>
Small, low memory usage inflate. Also serves to provide an
unambiguous description of the deflate format.
testzlib/ by Gilles Vollant <info@winimage.com>
Example of the use of zlib
untgz/ by Pedro A. Aranda Gutierrez <paag@tid.es>
A very simple tar.gz file extractor using zlib
vstudio/ by Gilles Vollant <info@winimage.com>
Building a minizip-enhanced zlib with Microsoft Visual Studio

View File

@@ -0,0 +1,106 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2004 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
--
-- $Id: buffer_demo.adb,v 1.3 2004/09/06 06:55:35 vagul Exp $
-- This demo program provided by Dr Steve Sangwine <sjs@essex.ac.uk>
--
-- Demonstration of a problem with Zlib-Ada (already fixed) when a buffer
-- of exactly the correct size is used for decompressed data, and the last
-- few bytes passed in to Zlib are checksum bytes.
-- This program compresses a string of text, and then decompresses the
-- compressed text into a buffer of the same size as the original text.
with Ada.Streams; use Ada.Streams;
with Ada.Text_IO;
with ZLib; use ZLib;
procedure Buffer_Demo is
EOL : Character renames ASCII.LF;
Text : constant String
:= "Four score and seven years ago our fathers brought forth," & EOL &
"upon this continent, a new nation, conceived in liberty," & EOL &
"and dedicated to the proposition that `all men are created equal'.";
Source : Stream_Element_Array (1 .. Text'Length);
for Source'Address use Text'Address;
begin
Ada.Text_IO.Put (Text);
Ada.Text_IO.New_Line;
Ada.Text_IO.Put_Line
("Uncompressed size : " & Positive'Image (Text'Length) & " bytes");
declare
Compressed_Data : Stream_Element_Array (1 .. Text'Length);
L : Stream_Element_Offset;
begin
Compress : declare
Compressor : Filter_Type;
I : Stream_Element_Offset;
begin
Deflate_Init (Compressor);
-- Compress the whole of T at once.
Translate (Compressor, Source, I, Compressed_Data, L, Finish);
pragma Assert (I = Source'Last);
Close (Compressor);
Ada.Text_IO.Put_Line
("Compressed size : "
& Stream_Element_Offset'Image (L) & " bytes");
end Compress;
-- Now we decompress the data, passing short blocks of data to Zlib
-- (because this demonstrates the problem - the last block passed will
-- contain checksum information and there will be no output, only a
-- check inside Zlib that the checksum is correct).
Decompress : declare
Decompressor : Filter_Type;
Uncompressed_Data : Stream_Element_Array (1 .. Text'Length);
Block_Size : constant := 4;
-- This makes sure that the last block contains
-- only Adler checksum data.
P : Stream_Element_Offset := Compressed_Data'First - 1;
O : Stream_Element_Offset;
begin
Inflate_Init (Decompressor);
loop
Translate
(Decompressor,
Compressed_Data
(P + 1 .. Stream_Element_Offset'Min (P + Block_Size, L)),
P,
Uncompressed_Data
(Total_Out (Decompressor) + 1 .. Uncompressed_Data'Last),
O,
No_Flush);
Ada.Text_IO.Put_Line
("Total in : " & Count'Image (Total_In (Decompressor)) &
", out : " & Count'Image (Total_Out (Decompressor)));
exit when P = L;
end loop;
Ada.Text_IO.New_Line;
Ada.Text_IO.Put_Line
("Decompressed text matches original text : "
& Boolean'Image (Uncompressed_Data = Source));
end Decompress;
end;
end Buffer_Demo;

View File

@@ -0,0 +1,156 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2003 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- Continuous test for ZLib multithreading. If the test would fail
-- we should provide thread safe allocation routines for the Z_Stream.
--
-- $Id: mtest.adb,v 1.4 2004/07/23 07:49:54 vagul Exp $
with ZLib;
with Ada.Streams;
with Ada.Numerics.Discrete_Random;
with Ada.Text_IO;
with Ada.Exceptions;
with Ada.Task_Identification;
procedure MTest is
use Ada.Streams;
use ZLib;
Stop : Boolean := False;
pragma Atomic (Stop);
subtype Visible_Symbols is Stream_Element range 16#20# .. 16#7E#;
package Random_Elements is
new Ada.Numerics.Discrete_Random (Visible_Symbols);
task type Test_Task;
task body Test_Task is
Buffer : Stream_Element_Array (1 .. 100_000);
Gen : Random_Elements.Generator;
Buffer_First : Stream_Element_Offset;
Compare_First : Stream_Element_Offset;
Deflate : Filter_Type;
Inflate : Filter_Type;
procedure Further (Item : in Stream_Element_Array);
procedure Read_Buffer
(Item : out Ada.Streams.Stream_Element_Array;
Last : out Ada.Streams.Stream_Element_Offset);
-------------
-- Further --
-------------
procedure Further (Item : in Stream_Element_Array) is
procedure Compare (Item : in Stream_Element_Array);
-------------
-- Compare --
-------------
procedure Compare (Item : in Stream_Element_Array) is
Next_First : Stream_Element_Offset := Compare_First + Item'Length;
begin
if Buffer (Compare_First .. Next_First - 1) /= Item then
raise Program_Error;
end if;
Compare_First := Next_First;
end Compare;
procedure Compare_Write is new ZLib.Write (Write => Compare);
begin
Compare_Write (Inflate, Item, No_Flush);
end Further;
-----------------
-- Read_Buffer --
-----------------
procedure Read_Buffer
(Item : out Ada.Streams.Stream_Element_Array;
Last : out Ada.Streams.Stream_Element_Offset)
is
Buff_Diff : Stream_Element_Offset := Buffer'Last - Buffer_First;
Next_First : Stream_Element_Offset;
begin
if Item'Length <= Buff_Diff then
Last := Item'Last;
Next_First := Buffer_First + Item'Length;
Item := Buffer (Buffer_First .. Next_First - 1);
Buffer_First := Next_First;
else
Last := Item'First + Buff_Diff;
Item (Item'First .. Last) := Buffer (Buffer_First .. Buffer'Last);
Buffer_First := Buffer'Last + 1;
end if;
end Read_Buffer;
procedure Translate is new Generic_Translate
(Data_In => Read_Buffer,
Data_Out => Further);
begin
Random_Elements.Reset (Gen);
Buffer := (others => 20);
Main : loop
for J in Buffer'Range loop
Buffer (J) := Random_Elements.Random (Gen);
Deflate_Init (Deflate);
Inflate_Init (Inflate);
Buffer_First := Buffer'First;
Compare_First := Buffer'First;
Translate (Deflate);
if Compare_First /= Buffer'Last + 1 then
raise Program_Error;
end if;
Ada.Text_IO.Put_Line
(Ada.Task_Identification.Image
(Ada.Task_Identification.Current_Task)
& Stream_Element_Offset'Image (J)
& ZLib.Count'Image (Total_Out (Deflate)));
Close (Deflate);
Close (Inflate);
exit Main when Stop;
end loop;
end loop Main;
exception
when E : others =>
Ada.Text_IO.Put_Line (Ada.Exceptions.Exception_Information (E));
Stop := True;
end Test_Task;
Test : array (1 .. 4) of Test_Task;
pragma Unreferenced (Test);
Dummy : Character;
begin
Ada.Text_IO.Get_Immediate (Dummy);
Stop := True;
end MTest;

View File

@@ -0,0 +1,156 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2003 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- $Id: read.adb,v 1.8 2004/05/31 10:53:40 vagul Exp $
-- Test/demo program for the generic read interface.
with Ada.Numerics.Discrete_Random;
with Ada.Streams;
with Ada.Text_IO;
with ZLib;
procedure Read is
use Ada.Streams;
------------------------------------
-- Test configuration parameters --
------------------------------------
File_Size : Stream_Element_Offset := 100_000;
Continuous : constant Boolean := False;
-- If this constant is True, the test would be repeated again and again,
-- with increment File_Size for every iteration.
Header : constant ZLib.Header_Type := ZLib.Default;
-- Do not use Header other than Default in ZLib versions 1.1.4 and older.
Init_Random : constant := 8;
-- We are using the same random sequence, in case of we catch bug,
-- so we would be able to reproduce it.
-- End --
Pack_Size : Stream_Element_Offset;
Offset : Stream_Element_Offset;
Filter : ZLib.Filter_Type;
subtype Visible_Symbols
is Stream_Element range 16#20# .. 16#7E#;
package Random_Elements is new
Ada.Numerics.Discrete_Random (Visible_Symbols);
Gen : Random_Elements.Generator;
Period : constant Stream_Element_Offset := 200;
-- Period constant variable for random generator not to be very random.
-- Bigger period, harder random.
Read_Buffer : Stream_Element_Array (1 .. 2048);
Read_First : Stream_Element_Offset;
Read_Last : Stream_Element_Offset;
procedure Reset;
procedure Read
(Item : out Stream_Element_Array;
Last : out Stream_Element_Offset);
-- this procedure is for generic instantiation of
-- ZLib.Read
-- reading data from the File_In.
procedure Read is new ZLib.Read
(Read,
Read_Buffer,
Rest_First => Read_First,
Rest_Last => Read_Last);
----------
-- Read --
----------
procedure Read
(Item : out Stream_Element_Array;
Last : out Stream_Element_Offset) is
begin
Last := Stream_Element_Offset'Min
(Item'Last,
Item'First + File_Size - Offset);
for J in Item'First .. Last loop
if J < Item'First + Period then
Item (J) := Random_Elements.Random (Gen);
else
Item (J) := Item (J - Period);
end if;
Offset := Offset + 1;
end loop;
end Read;
-----------
-- Reset --
-----------
procedure Reset is
begin
Random_Elements.Reset (Gen, Init_Random);
Pack_Size := 0;
Offset := 1;
Read_First := Read_Buffer'Last + 1;
Read_Last := Read_Buffer'Last;
end Reset;
begin
Ada.Text_IO.Put_Line ("ZLib " & ZLib.Version);
loop
for Level in ZLib.Compression_Level'Range loop
Ada.Text_IO.Put ("Level ="
& ZLib.Compression_Level'Image (Level));
-- Deflate using generic instantiation.
ZLib.Deflate_Init
(Filter,
Level,
Header => Header);
Reset;
Ada.Text_IO.Put
(Stream_Element_Offset'Image (File_Size) & " ->");
loop
declare
Buffer : Stream_Element_Array (1 .. 1024);
Last : Stream_Element_Offset;
begin
Read (Filter, Buffer, Last);
Pack_Size := Pack_Size + Last - Buffer'First + 1;
exit when Last < Buffer'Last;
end;
end loop;
Ada.Text_IO.Put_Line (Stream_Element_Offset'Image (Pack_Size));
ZLib.Close (Filter);
end loop;
exit when not Continuous;
File_Size := File_Size + 1;
end loop;
end Read;

View File

@@ -0,0 +1,65 @@
ZLib for Ada thick binding (ZLib.Ada)
Release 1.3
ZLib.Ada is a thick binding interface to the popular ZLib data
compression library, available at http://www.gzip.org/zlib/.
It provides Ada-style access to the ZLib C library.
Here are the main changes since ZLib.Ada 1.2:
- Attension: ZLib.Read generic routine have a initialization requirement
for Read_Last parameter now. It is a bit incompartible with previous version,
but extends functionality, we could use new parameters Allow_Read_Some and
Flush now.
- Added Is_Open routines to ZLib and ZLib.Streams packages.
- Add pragma Assert to check Stream_Element is 8 bit.
- Fix extraction to buffer with exact known decompressed size. Error reported by
Steve Sangwine.
- Fix definition of ULong (changed to unsigned_long), fix regression on 64 bits
computers. Patch provided by Pascal Obry.
- Add Status_Error exception definition.
- Add pragma Assertion that Ada.Streams.Stream_Element size is 8 bit.
How to build ZLib.Ada under GNAT
You should have the ZLib library already build on your computer, before
building ZLib.Ada. Make the directory of ZLib.Ada sources current and
issue the command:
gnatmake test -largs -L<directory where libz.a is> -lz
Or use the GNAT project file build for GNAT 3.15 or later:
gnatmake -Pzlib.gpr -L<directory where libz.a is>
How to build ZLib.Ada under Aonix ObjectAda for Win32 7.2.2
1. Make a project with all *.ads and *.adb files from the distribution.
2. Build the libz.a library from the ZLib C sources.
3. Rename libz.a to z.lib.
4. Add the library z.lib to the project.
5. Add the libc.lib library from the ObjectAda distribution to the project.
6. Build the executable using test.adb as a main procedure.
How to use ZLib.Ada
The source files test.adb and read.adb are small demo programs that show
the main functionality of ZLib.Ada.
The routines from the package specifications are commented.
Homepage: http://zlib-ada.sourceforge.net/
Author: Dmitriy Anisimkov <anisimkov@yahoo.com>
Contributors: Pascal Obry <pascal@obry.org>, Steve Sangwine <sjs@essex.ac.uk>

View File

@@ -0,0 +1,463 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2003 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- $Id: test.adb,v 1.17 2003/08/12 12:13:30 vagul Exp $
-- The program has a few aims.
-- 1. Test ZLib.Ada95 thick binding functionality.
-- 2. Show the example of use main functionality of the ZLib.Ada95 binding.
-- 3. Build this program automatically compile all ZLib.Ada95 packages under
-- GNAT Ada95 compiler.
with ZLib.Streams;
with Ada.Streams.Stream_IO;
with Ada.Numerics.Discrete_Random;
with Ada.Text_IO;
with Ada.Calendar;
procedure Test is
use Ada.Streams;
use Stream_IO;
------------------------------------
-- Test configuration parameters --
------------------------------------
File_Size : Count := 100_000;
Continuous : constant Boolean := False;
Header : constant ZLib.Header_Type := ZLib.Default;
-- ZLib.None;
-- ZLib.Auto;
-- ZLib.GZip;
-- Do not use Header other then Default in ZLib versions 1.1.4
-- and older.
Strategy : constant ZLib.Strategy_Type := ZLib.Default_Strategy;
Init_Random : constant := 10;
-- End --
In_File_Name : constant String := "testzlib.in";
-- Name of the input file
Z_File_Name : constant String := "testzlib.zlb";
-- Name of the compressed file.
Out_File_Name : constant String := "testzlib.out";
-- Name of the decompressed file.
File_In : File_Type;
File_Out : File_Type;
File_Back : File_Type;
File_Z : ZLib.Streams.Stream_Type;
Filter : ZLib.Filter_Type;
Time_Stamp : Ada.Calendar.Time;
procedure Generate_File;
-- Generate file of spetsified size with some random data.
-- The random data is repeatable, for the good compression.
procedure Compare_Streams
(Left, Right : in out Root_Stream_Type'Class);
-- The procedure compearing data in 2 streams.
-- It is for compare data before and after compression/decompression.
procedure Compare_Files (Left, Right : String);
-- Compare files. Based on the Compare_Streams.
procedure Copy_Streams
(Source, Target : in out Root_Stream_Type'Class;
Buffer_Size : in Stream_Element_Offset := 1024);
-- Copying data from one stream to another. It is for test stream
-- interface of the library.
procedure Data_In
(Item : out Stream_Element_Array;
Last : out Stream_Element_Offset);
-- this procedure is for generic instantiation of
-- ZLib.Generic_Translate.
-- reading data from the File_In.
procedure Data_Out (Item : in Stream_Element_Array);
-- this procedure is for generic instantiation of
-- ZLib.Generic_Translate.
-- writing data to the File_Out.
procedure Stamp;
-- Store the timestamp to the local variable.
procedure Print_Statistic (Msg : String; Data_Size : ZLib.Count);
-- Print the time statistic with the message.
procedure Translate is new ZLib.Generic_Translate
(Data_In => Data_In,
Data_Out => Data_Out);
-- This procedure is moving data from File_In to File_Out
-- with compression or decompression, depend on initialization of
-- Filter parameter.
-------------------
-- Compare_Files --
-------------------
procedure Compare_Files (Left, Right : String) is
Left_File, Right_File : File_Type;
begin
Open (Left_File, In_File, Left);
Open (Right_File, In_File, Right);
Compare_Streams (Stream (Left_File).all, Stream (Right_File).all);
Close (Left_File);
Close (Right_File);
end Compare_Files;
---------------------
-- Compare_Streams --
---------------------
procedure Compare_Streams
(Left, Right : in out Ada.Streams.Root_Stream_Type'Class)
is
Left_Buffer, Right_Buffer : Stream_Element_Array (0 .. 16#FFF#);
Left_Last, Right_Last : Stream_Element_Offset;
begin
loop
Read (Left, Left_Buffer, Left_Last);
Read (Right, Right_Buffer, Right_Last);
if Left_Last /= Right_Last then
Ada.Text_IO.Put_Line ("Compare error :"
& Stream_Element_Offset'Image (Left_Last)
& " /= "
& Stream_Element_Offset'Image (Right_Last));
raise Constraint_Error;
elsif Left_Buffer (0 .. Left_Last)
/= Right_Buffer (0 .. Right_Last)
then
Ada.Text_IO.Put_Line ("ERROR: IN and OUT files is not equal.");
raise Constraint_Error;
end if;
exit when Left_Last < Left_Buffer'Last;
end loop;
end Compare_Streams;
------------------
-- Copy_Streams --
------------------
procedure Copy_Streams
(Source, Target : in out Ada.Streams.Root_Stream_Type'Class;
Buffer_Size : in Stream_Element_Offset := 1024)
is
Buffer : Stream_Element_Array (1 .. Buffer_Size);
Last : Stream_Element_Offset;
begin
loop
Read (Source, Buffer, Last);
Write (Target, Buffer (1 .. Last));
exit when Last < Buffer'Last;
end loop;
end Copy_Streams;
-------------
-- Data_In --
-------------
procedure Data_In
(Item : out Stream_Element_Array;
Last : out Stream_Element_Offset) is
begin
Read (File_In, Item, Last);
end Data_In;
--------------
-- Data_Out --
--------------
procedure Data_Out (Item : in Stream_Element_Array) is
begin
Write (File_Out, Item);
end Data_Out;
-------------------
-- Generate_File --
-------------------
procedure Generate_File is
subtype Visible_Symbols is Stream_Element range 16#20# .. 16#7E#;
package Random_Elements is
new Ada.Numerics.Discrete_Random (Visible_Symbols);
Gen : Random_Elements.Generator;
Buffer : Stream_Element_Array := (1 .. 77 => 16#20#) & 10;
Buffer_Count : constant Count := File_Size / Buffer'Length;
-- Number of same buffers in the packet.
Density : constant Count := 30; -- from 0 to Buffer'Length - 2;
procedure Fill_Buffer (J, D : in Count);
-- Change the part of the buffer.
-----------------
-- Fill_Buffer --
-----------------
procedure Fill_Buffer (J, D : in Count) is
begin
for K in 0 .. D loop
Buffer
(Stream_Element_Offset ((J + K) mod (Buffer'Length - 1) + 1))
:= Random_Elements.Random (Gen);
end loop;
end Fill_Buffer;
begin
Random_Elements.Reset (Gen, Init_Random);
Create (File_In, Out_File, In_File_Name);
Fill_Buffer (1, Buffer'Length - 2);
for J in 1 .. Buffer_Count loop
Write (File_In, Buffer);
Fill_Buffer (J, Density);
end loop;
-- fill remain size.
Write
(File_In,
Buffer
(1 .. Stream_Element_Offset
(File_Size - Buffer'Length * Buffer_Count)));
Flush (File_In);
Close (File_In);
end Generate_File;
---------------------
-- Print_Statistic --
---------------------
procedure Print_Statistic (Msg : String; Data_Size : ZLib.Count) is
use Ada.Calendar;
use Ada.Text_IO;
package Count_IO is new Integer_IO (ZLib.Count);
Curr_Dur : Duration := Clock - Time_Stamp;
begin
Put (Msg);
Set_Col (20);
Ada.Text_IO.Put ("size =");
Count_IO.Put
(Data_Size,
Width => Stream_IO.Count'Image (File_Size)'Length);
Put_Line (" duration =" & Duration'Image (Curr_Dur));
end Print_Statistic;
-----------
-- Stamp --
-----------
procedure Stamp is
begin
Time_Stamp := Ada.Calendar.Clock;
end Stamp;
begin
Ada.Text_IO.Put_Line ("ZLib " & ZLib.Version);
loop
Generate_File;
for Level in ZLib.Compression_Level'Range loop
Ada.Text_IO.Put_Line ("Level ="
& ZLib.Compression_Level'Image (Level));
-- Test generic interface.
Open (File_In, In_File, In_File_Name);
Create (File_Out, Out_File, Z_File_Name);
Stamp;
-- Deflate using generic instantiation.
ZLib.Deflate_Init
(Filter => Filter,
Level => Level,
Strategy => Strategy,
Header => Header);
Translate (Filter);
Print_Statistic ("Generic compress", ZLib.Total_Out (Filter));
ZLib.Close (Filter);
Close (File_In);
Close (File_Out);
Open (File_In, In_File, Z_File_Name);
Create (File_Out, Out_File, Out_File_Name);
Stamp;
-- Inflate using generic instantiation.
ZLib.Inflate_Init (Filter, Header => Header);
Translate (Filter);
Print_Statistic ("Generic decompress", ZLib.Total_Out (Filter));
ZLib.Close (Filter);
Close (File_In);
Close (File_Out);
Compare_Files (In_File_Name, Out_File_Name);
-- Test stream interface.
-- Compress to the back stream.
Open (File_In, In_File, In_File_Name);
Create (File_Back, Out_File, Z_File_Name);
Stamp;
ZLib.Streams.Create
(Stream => File_Z,
Mode => ZLib.Streams.Out_Stream,
Back => ZLib.Streams.Stream_Access
(Stream (File_Back)),
Back_Compressed => True,
Level => Level,
Strategy => Strategy,
Header => Header);
Copy_Streams
(Source => Stream (File_In).all,
Target => File_Z);
-- Flushing internal buffers to the back stream.
ZLib.Streams.Flush (File_Z, ZLib.Finish);
Print_Statistic ("Write compress",
ZLib.Streams.Write_Total_Out (File_Z));
ZLib.Streams.Close (File_Z);
Close (File_In);
Close (File_Back);
-- Compare reading from original file and from
-- decompression stream.
Open (File_In, In_File, In_File_Name);
Open (File_Back, In_File, Z_File_Name);
ZLib.Streams.Create
(Stream => File_Z,
Mode => ZLib.Streams.In_Stream,
Back => ZLib.Streams.Stream_Access
(Stream (File_Back)),
Back_Compressed => True,
Header => Header);
Stamp;
Compare_Streams (Stream (File_In).all, File_Z);
Print_Statistic ("Read decompress",
ZLib.Streams.Read_Total_Out (File_Z));
ZLib.Streams.Close (File_Z);
Close (File_In);
Close (File_Back);
-- Compress by reading from compression stream.
Open (File_Back, In_File, In_File_Name);
Create (File_Out, Out_File, Z_File_Name);
ZLib.Streams.Create
(Stream => File_Z,
Mode => ZLib.Streams.In_Stream,
Back => ZLib.Streams.Stream_Access
(Stream (File_Back)),
Back_Compressed => False,
Level => Level,
Strategy => Strategy,
Header => Header);
Stamp;
Copy_Streams
(Source => File_Z,
Target => Stream (File_Out).all);
Print_Statistic ("Read compress",
ZLib.Streams.Read_Total_Out (File_Z));
ZLib.Streams.Close (File_Z);
Close (File_Out);
Close (File_Back);
-- Decompress to decompression stream.
Open (File_In, In_File, Z_File_Name);
Create (File_Back, Out_File, Out_File_Name);
ZLib.Streams.Create
(Stream => File_Z,
Mode => ZLib.Streams.Out_Stream,
Back => ZLib.Streams.Stream_Access
(Stream (File_Back)),
Back_Compressed => False,
Header => Header);
Stamp;
Copy_Streams
(Source => Stream (File_In).all,
Target => File_Z);
Print_Statistic ("Write decompress",
ZLib.Streams.Write_Total_Out (File_Z));
ZLib.Streams.Close (File_Z);
Close (File_In);
Close (File_Back);
Compare_Files (In_File_Name, Out_File_Name);
end loop;
Ada.Text_IO.Put_Line (Count'Image (File_Size) & " Ok.");
exit when not Continuous;
File_Size := File_Size + 1;
end loop;
end Test;

View File

@@ -0,0 +1,225 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2003 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- $Id: zlib-streams.adb,v 1.10 2004/05/31 10:53:40 vagul Exp $
with Ada.Unchecked_Deallocation;
package body ZLib.Streams is
-----------
-- Close --
-----------
procedure Close (Stream : in out Stream_Type) is
procedure Free is new Ada.Unchecked_Deallocation
(Stream_Element_Array, Buffer_Access);
begin
if Stream.Mode = Out_Stream or Stream.Mode = Duplex then
-- We should flush the data written by the writer.
Flush (Stream, Finish);
Close (Stream.Writer);
end if;
if Stream.Mode = In_Stream or Stream.Mode = Duplex then
Close (Stream.Reader);
Free (Stream.Buffer);
end if;
end Close;
------------
-- Create --
------------
procedure Create
(Stream : out Stream_Type;
Mode : in Stream_Mode;
Back : in Stream_Access;
Back_Compressed : in Boolean;
Level : in Compression_Level := Default_Compression;
Strategy : in Strategy_Type := Default_Strategy;
Header : in Header_Type := Default;
Read_Buffer_Size : in Ada.Streams.Stream_Element_Offset
:= Default_Buffer_Size;
Write_Buffer_Size : in Ada.Streams.Stream_Element_Offset
:= Default_Buffer_Size)
is
subtype Buffer_Subtype is Stream_Element_Array (1 .. Read_Buffer_Size);
procedure Init_Filter
(Filter : in out Filter_Type;
Compress : in Boolean);
-----------------
-- Init_Filter --
-----------------
procedure Init_Filter
(Filter : in out Filter_Type;
Compress : in Boolean) is
begin
if Compress then
Deflate_Init
(Filter, Level, Strategy, Header => Header);
else
Inflate_Init (Filter, Header => Header);
end if;
end Init_Filter;
begin
Stream.Back := Back;
Stream.Mode := Mode;
if Mode = Out_Stream or Mode = Duplex then
Init_Filter (Stream.Writer, Back_Compressed);
Stream.Buffer_Size := Write_Buffer_Size;
else
Stream.Buffer_Size := 0;
end if;
if Mode = In_Stream or Mode = Duplex then
Init_Filter (Stream.Reader, not Back_Compressed);
Stream.Buffer := new Buffer_Subtype;
Stream.Rest_First := Stream.Buffer'Last + 1;
Stream.Rest_Last := Stream.Buffer'Last;
end if;
end Create;
-----------
-- Flush --
-----------
procedure Flush
(Stream : in out Stream_Type;
Mode : in Flush_Mode := Sync_Flush)
is
Buffer : Stream_Element_Array (1 .. Stream.Buffer_Size);
Last : Stream_Element_Offset;
begin
loop
Flush (Stream.Writer, Buffer, Last, Mode);
Ada.Streams.Write (Stream.Back.all, Buffer (1 .. Last));
exit when Last < Buffer'Last;
end loop;
end Flush;
-------------
-- Is_Open --
-------------
function Is_Open (Stream : Stream_Type) return Boolean is
begin
return Is_Open (Stream.Reader) or else Is_Open (Stream.Writer);
end Is_Open;
----------
-- Read --
----------
procedure Read
(Stream : in out Stream_Type;
Item : out Stream_Element_Array;
Last : out Stream_Element_Offset)
is
procedure Read
(Item : out Stream_Element_Array;
Last : out Stream_Element_Offset);
----------
-- Read --
----------
procedure Read
(Item : out Stream_Element_Array;
Last : out Stream_Element_Offset) is
begin
Ada.Streams.Read (Stream.Back.all, Item, Last);
end Read;
procedure Read is new ZLib.Read
(Read => Read,
Buffer => Stream.Buffer.all,
Rest_First => Stream.Rest_First,
Rest_Last => Stream.Rest_Last);
begin
Read (Stream.Reader, Item, Last);
end Read;
-------------------
-- Read_Total_In --
-------------------
function Read_Total_In (Stream : in Stream_Type) return Count is
begin
return Total_In (Stream.Reader);
end Read_Total_In;
--------------------
-- Read_Total_Out --
--------------------
function Read_Total_Out (Stream : in Stream_Type) return Count is
begin
return Total_Out (Stream.Reader);
end Read_Total_Out;
-----------
-- Write --
-----------
procedure Write
(Stream : in out Stream_Type;
Item : in Stream_Element_Array)
is
procedure Write (Item : in Stream_Element_Array);
-----------
-- Write --
-----------
procedure Write (Item : in Stream_Element_Array) is
begin
Ada.Streams.Write (Stream.Back.all, Item);
end Write;
procedure Write is new ZLib.Write
(Write => Write,
Buffer_Size => Stream.Buffer_Size);
begin
Write (Stream.Writer, Item, No_Flush);
end Write;
--------------------
-- Write_Total_In --
--------------------
function Write_Total_In (Stream : in Stream_Type) return Count is
begin
return Total_In (Stream.Writer);
end Write_Total_In;
---------------------
-- Write_Total_Out --
---------------------
function Write_Total_Out (Stream : in Stream_Type) return Count is
begin
return Total_Out (Stream.Writer);
end Write_Total_Out;
end ZLib.Streams;

View File

@@ -0,0 +1,114 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2003 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- $Id: zlib-streams.ads,v 1.12 2004/05/31 10:53:40 vagul Exp $
package ZLib.Streams is
type Stream_Mode is (In_Stream, Out_Stream, Duplex);
type Stream_Access is access all Ada.Streams.Root_Stream_Type'Class;
type Stream_Type is
new Ada.Streams.Root_Stream_Type with private;
procedure Read
(Stream : in out Stream_Type;
Item : out Ada.Streams.Stream_Element_Array;
Last : out Ada.Streams.Stream_Element_Offset);
procedure Write
(Stream : in out Stream_Type;
Item : in Ada.Streams.Stream_Element_Array);
procedure Flush
(Stream : in out Stream_Type;
Mode : in Flush_Mode := Sync_Flush);
-- Flush the written data to the back stream,
-- all data placed to the compressor is flushing to the Back stream.
-- Should not be used untill necessary, becouse it is decreasing
-- compression.
function Read_Total_In (Stream : in Stream_Type) return Count;
pragma Inline (Read_Total_In);
-- Return total number of bytes read from back stream so far.
function Read_Total_Out (Stream : in Stream_Type) return Count;
pragma Inline (Read_Total_Out);
-- Return total number of bytes read so far.
function Write_Total_In (Stream : in Stream_Type) return Count;
pragma Inline (Write_Total_In);
-- Return total number of bytes written so far.
function Write_Total_Out (Stream : in Stream_Type) return Count;
pragma Inline (Write_Total_Out);
-- Return total number of bytes written to the back stream.
procedure Create
(Stream : out Stream_Type;
Mode : in Stream_Mode;
Back : in Stream_Access;
Back_Compressed : in Boolean;
Level : in Compression_Level := Default_Compression;
Strategy : in Strategy_Type := Default_Strategy;
Header : in Header_Type := Default;
Read_Buffer_Size : in Ada.Streams.Stream_Element_Offset
:= Default_Buffer_Size;
Write_Buffer_Size : in Ada.Streams.Stream_Element_Offset
:= Default_Buffer_Size);
-- Create the Comression/Decompression stream.
-- If mode is In_Stream then Write operation is disabled.
-- If mode is Out_Stream then Read operation is disabled.
-- If Back_Compressed is true then
-- Data written to the Stream is compressing to the Back stream
-- and data read from the Stream is decompressed data from the Back stream.
-- If Back_Compressed is false then
-- Data written to the Stream is decompressing to the Back stream
-- and data read from the Stream is compressed data from the Back stream.
-- !!! When the Need_Header is False ZLib-Ada is using undocumented
-- ZLib 1.1.4 functionality to do not create/wait for ZLib headers.
function Is_Open (Stream : Stream_Type) return Boolean;
procedure Close (Stream : in out Stream_Type);
private
use Ada.Streams;
type Buffer_Access is access all Stream_Element_Array;
type Stream_Type
is new Root_Stream_Type with
record
Mode : Stream_Mode;
Buffer : Buffer_Access;
Rest_First : Stream_Element_Offset;
Rest_Last : Stream_Element_Offset;
-- Buffer for Read operation.
-- We need to have this buffer in the record
-- becouse not all read data from back stream
-- could be processed during the read operation.
Buffer_Size : Stream_Element_Offset;
-- Buffer size for write operation.
-- We do not need to have this buffer
-- in the record becouse all data could be
-- processed in the write operation.
Back : Stream_Access;
Reader : Filter_Type;
Writer : Filter_Type;
end record;
end ZLib.Streams;

View File

@@ -0,0 +1,141 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2003 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- $Id: zlib-thin.adb,v 1.8 2003/12/14 18:27:31 vagul Exp $
package body ZLib.Thin is
ZLIB_VERSION : constant Chars_Ptr := zlibVersion;
Z_Stream_Size : constant Int := Z_Stream'Size / System.Storage_Unit;
--------------
-- Avail_In --
--------------
function Avail_In (Strm : in Z_Stream) return UInt is
begin
return Strm.Avail_In;
end Avail_In;
---------------
-- Avail_Out --
---------------
function Avail_Out (Strm : in Z_Stream) return UInt is
begin
return Strm.Avail_Out;
end Avail_Out;
------------------
-- Deflate_Init --
------------------
function Deflate_Init
(strm : Z_Streamp;
level : Int;
method : Int;
windowBits : Int;
memLevel : Int;
strategy : Int)
return Int is
begin
return deflateInit2
(strm,
level,
method,
windowBits,
memLevel,
strategy,
ZLIB_VERSION,
Z_Stream_Size);
end Deflate_Init;
------------------
-- Inflate_Init --
------------------
function Inflate_Init (strm : Z_Streamp; windowBits : Int) return Int is
begin
return inflateInit2 (strm, windowBits, ZLIB_VERSION, Z_Stream_Size);
end Inflate_Init;
------------------------
-- Last_Error_Message --
------------------------
function Last_Error_Message (Strm : in Z_Stream) return String is
use Interfaces.C.Strings;
begin
if Strm.msg = Null_Ptr then
return "";
else
return Value (Strm.msg);
end if;
end Last_Error_Message;
------------
-- Set_In --
------------
procedure Set_In
(Strm : in out Z_Stream;
Buffer : in Voidp;
Size : in UInt) is
begin
Strm.Next_In := Buffer;
Strm.Avail_In := Size;
end Set_In;
------------------
-- Set_Mem_Func --
------------------
procedure Set_Mem_Func
(Strm : in out Z_Stream;
Opaque : in Voidp;
Alloc : in alloc_func;
Free : in free_func) is
begin
Strm.opaque := Opaque;
Strm.zalloc := Alloc;
Strm.zfree := Free;
end Set_Mem_Func;
-------------
-- Set_Out --
-------------
procedure Set_Out
(Strm : in out Z_Stream;
Buffer : in Voidp;
Size : in UInt) is
begin
Strm.Next_Out := Buffer;
Strm.Avail_Out := Size;
end Set_Out;
--------------
-- Total_In --
--------------
function Total_In (Strm : in Z_Stream) return ULong is
begin
return Strm.Total_In;
end Total_In;
---------------
-- Total_Out --
---------------
function Total_Out (Strm : in Z_Stream) return ULong is
begin
return Strm.Total_Out;
end Total_Out;
end ZLib.Thin;

View File

@@ -0,0 +1,450 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2003 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- $Id: zlib-thin.ads,v 1.11 2004/07/23 06:33:11 vagul Exp $
with Interfaces.C.Strings;
with System;
private package ZLib.Thin is
-- From zconf.h
MAX_MEM_LEVEL : constant := 9; -- zconf.h:105
-- zconf.h:105
MAX_WBITS : constant := 15; -- zconf.h:115
-- 32K LZ77 window
-- zconf.h:115
SEEK_SET : constant := 8#0000#; -- zconf.h:244
-- Seek from beginning of file.
-- zconf.h:244
SEEK_CUR : constant := 1; -- zconf.h:245
-- Seek from current position.
-- zconf.h:245
SEEK_END : constant := 2; -- zconf.h:246
-- Set file pointer to EOF plus "offset"
-- zconf.h:246
type Byte is new Interfaces.C.unsigned_char; -- 8 bits
-- zconf.h:214
type UInt is new Interfaces.C.unsigned; -- 16 bits or more
-- zconf.h:216
type Int is new Interfaces.C.int;
type ULong is new Interfaces.C.unsigned_long; -- 32 bits or more
-- zconf.h:217
subtype Chars_Ptr is Interfaces.C.Strings.chars_ptr;
type ULong_Access is access ULong;
type Int_Access is access Int;
subtype Voidp is System.Address; -- zconf.h:232
subtype Byte_Access is Voidp;
Nul : constant Voidp := System.Null_Address;
-- end from zconf
Z_NO_FLUSH : constant := 8#0000#; -- zlib.h:125
-- zlib.h:125
Z_PARTIAL_FLUSH : constant := 1; -- zlib.h:126
-- will be removed, use
-- Z_SYNC_FLUSH instead
-- zlib.h:126
Z_SYNC_FLUSH : constant := 2; -- zlib.h:127
-- zlib.h:127
Z_FULL_FLUSH : constant := 3; -- zlib.h:128
-- zlib.h:128
Z_FINISH : constant := 4; -- zlib.h:129
-- zlib.h:129
Z_OK : constant := 8#0000#; -- zlib.h:132
-- zlib.h:132
Z_STREAM_END : constant := 1; -- zlib.h:133
-- zlib.h:133
Z_NEED_DICT : constant := 2; -- zlib.h:134
-- zlib.h:134
Z_ERRNO : constant := -1; -- zlib.h:135
-- zlib.h:135
Z_STREAM_ERROR : constant := -2; -- zlib.h:136
-- zlib.h:136
Z_DATA_ERROR : constant := -3; -- zlib.h:137
-- zlib.h:137
Z_MEM_ERROR : constant := -4; -- zlib.h:138
-- zlib.h:138
Z_BUF_ERROR : constant := -5; -- zlib.h:139
-- zlib.h:139
Z_VERSION_ERROR : constant := -6; -- zlib.h:140
-- zlib.h:140
Z_NO_COMPRESSION : constant := 8#0000#; -- zlib.h:145
-- zlib.h:145
Z_BEST_SPEED : constant := 1; -- zlib.h:146
-- zlib.h:146
Z_BEST_COMPRESSION : constant := 9; -- zlib.h:147
-- zlib.h:147
Z_DEFAULT_COMPRESSION : constant := -1; -- zlib.h:148
-- zlib.h:148
Z_FILTERED : constant := 1; -- zlib.h:151
-- zlib.h:151
Z_HUFFMAN_ONLY : constant := 2; -- zlib.h:152
-- zlib.h:152
Z_DEFAULT_STRATEGY : constant := 8#0000#; -- zlib.h:153
-- zlib.h:153
Z_BINARY : constant := 8#0000#; -- zlib.h:156
-- zlib.h:156
Z_ASCII : constant := 1; -- zlib.h:157
-- zlib.h:157
Z_UNKNOWN : constant := 2; -- zlib.h:158
-- zlib.h:158
Z_DEFLATED : constant := 8; -- zlib.h:161
-- zlib.h:161
Z_NULL : constant := 8#0000#; -- zlib.h:164
-- for initializing zalloc, zfree, opaque
-- zlib.h:164
type gzFile is new Voidp; -- zlib.h:646
type Z_Stream is private;
type Z_Streamp is access all Z_Stream; -- zlib.h:89
type alloc_func is access function
(Opaque : Voidp;
Items : UInt;
Size : UInt)
return Voidp; -- zlib.h:63
type free_func is access procedure (opaque : Voidp; address : Voidp);
function zlibVersion return Chars_Ptr;
function Deflate (strm : Z_Streamp; flush : Int) return Int;
function DeflateEnd (strm : Z_Streamp) return Int;
function Inflate (strm : Z_Streamp; flush : Int) return Int;
function InflateEnd (strm : Z_Streamp) return Int;
function deflateSetDictionary
(strm : Z_Streamp;
dictionary : Byte_Access;
dictLength : UInt)
return Int;
function deflateCopy (dest : Z_Streamp; source : Z_Streamp) return Int;
-- zlib.h:478
function deflateReset (strm : Z_Streamp) return Int; -- zlib.h:495
function deflateParams
(strm : Z_Streamp;
level : Int;
strategy : Int)
return Int; -- zlib.h:506
function inflateSetDictionary
(strm : Z_Streamp;
dictionary : Byte_Access;
dictLength : UInt)
return Int; -- zlib.h:548
function inflateSync (strm : Z_Streamp) return Int; -- zlib.h:565
function inflateReset (strm : Z_Streamp) return Int; -- zlib.h:580
function compress
(dest : Byte_Access;
destLen : ULong_Access;
source : Byte_Access;
sourceLen : ULong)
return Int; -- zlib.h:601
function compress2
(dest : Byte_Access;
destLen : ULong_Access;
source : Byte_Access;
sourceLen : ULong;
level : Int)
return Int; -- zlib.h:615
function uncompress
(dest : Byte_Access;
destLen : ULong_Access;
source : Byte_Access;
sourceLen : ULong)
return Int;
function gzopen (path : Chars_Ptr; mode : Chars_Ptr) return gzFile;
function gzdopen (fd : Int; mode : Chars_Ptr) return gzFile;
function gzsetparams
(file : gzFile;
level : Int;
strategy : Int)
return Int;
function gzread
(file : gzFile;
buf : Voidp;
len : UInt)
return Int;
function gzwrite
(file : in gzFile;
buf : in Voidp;
len : in UInt)
return Int;
function gzprintf (file : in gzFile; format : in Chars_Ptr) return Int;
function gzputs (file : in gzFile; s : in Chars_Ptr) return Int;
function gzgets
(file : gzFile;
buf : Chars_Ptr;
len : Int)
return Chars_Ptr;
function gzputc (file : gzFile; char : Int) return Int;
function gzgetc (file : gzFile) return Int;
function gzflush (file : gzFile; flush : Int) return Int;
function gzseek
(file : gzFile;
offset : Int;
whence : Int)
return Int;
function gzrewind (file : gzFile) return Int;
function gztell (file : gzFile) return Int;
function gzeof (file : gzFile) return Int;
function gzclose (file : gzFile) return Int;
function gzerror (file : gzFile; errnum : Int_Access) return Chars_Ptr;
function adler32
(adler : ULong;
buf : Byte_Access;
len : UInt)
return ULong;
function crc32
(crc : ULong;
buf : Byte_Access;
len : UInt)
return ULong;
function deflateInit
(strm : Z_Streamp;
level : Int;
version : Chars_Ptr;
stream_size : Int)
return Int;
function deflateInit2
(strm : Z_Streamp;
level : Int;
method : Int;
windowBits : Int;
memLevel : Int;
strategy : Int;
version : Chars_Ptr;
stream_size : Int)
return Int;
function Deflate_Init
(strm : Z_Streamp;
level : Int;
method : Int;
windowBits : Int;
memLevel : Int;
strategy : Int)
return Int;
pragma Inline (Deflate_Init);
function inflateInit
(strm : Z_Streamp;
version : Chars_Ptr;
stream_size : Int)
return Int;
function inflateInit2
(strm : in Z_Streamp;
windowBits : in Int;
version : in Chars_Ptr;
stream_size : in Int)
return Int;
function inflateBackInit
(strm : in Z_Streamp;
windowBits : in Int;
window : in Byte_Access;
version : in Chars_Ptr;
stream_size : in Int)
return Int;
-- Size of window have to be 2**windowBits.
function Inflate_Init (strm : Z_Streamp; windowBits : Int) return Int;
pragma Inline (Inflate_Init);
function zError (err : Int) return Chars_Ptr;
function inflateSyncPoint (z : Z_Streamp) return Int;
function get_crc_table return ULong_Access;
-- Interface to the available fields of the z_stream structure.
-- The application must update next_in and avail_in when avail_in has
-- dropped to zero. It must update next_out and avail_out when avail_out
-- has dropped to zero. The application must initialize zalloc, zfree and
-- opaque before calling the init function.
procedure Set_In
(Strm : in out Z_Stream;
Buffer : in Voidp;
Size : in UInt);
pragma Inline (Set_In);
procedure Set_Out
(Strm : in out Z_Stream;
Buffer : in Voidp;
Size : in UInt);
pragma Inline (Set_Out);
procedure Set_Mem_Func
(Strm : in out Z_Stream;
Opaque : in Voidp;
Alloc : in alloc_func;
Free : in free_func);
pragma Inline (Set_Mem_Func);
function Last_Error_Message (Strm : in Z_Stream) return String;
pragma Inline (Last_Error_Message);
function Avail_Out (Strm : in Z_Stream) return UInt;
pragma Inline (Avail_Out);
function Avail_In (Strm : in Z_Stream) return UInt;
pragma Inline (Avail_In);
function Total_In (Strm : in Z_Stream) return ULong;
pragma Inline (Total_In);
function Total_Out (Strm : in Z_Stream) return ULong;
pragma Inline (Total_Out);
function inflateCopy
(dest : in Z_Streamp;
Source : in Z_Streamp)
return Int;
function compressBound (Source_Len : in ULong) return ULong;
function deflateBound
(Strm : in Z_Streamp;
Source_Len : in ULong)
return ULong;
function gzungetc (C : in Int; File : in gzFile) return Int;
function zlibCompileFlags return ULong;
private
type Z_Stream is record -- zlib.h:68
Next_In : Voidp := Nul; -- next input byte
Avail_In : UInt := 0; -- number of bytes available at next_in
Total_In : ULong := 0; -- total nb of input bytes read so far
Next_Out : Voidp := Nul; -- next output byte should be put there
Avail_Out : UInt := 0; -- remaining free space at next_out
Total_Out : ULong := 0; -- total nb of bytes output so far
msg : Chars_Ptr; -- last error message, NULL if no error
state : Voidp; -- not visible by applications
zalloc : alloc_func := null; -- used to allocate the internal state
zfree : free_func := null; -- used to free the internal state
opaque : Voidp; -- private data object passed to
-- zalloc and zfree
data_type : Int; -- best guess about the data type:
-- ascii or binary
adler : ULong; -- adler32 value of the uncompressed
-- data
reserved : ULong; -- reserved for future use
end record;
pragma Convention (C, Z_Stream);
pragma Import (C, zlibVersion, "zlibVersion");
pragma Import (C, Deflate, "deflate");
pragma Import (C, DeflateEnd, "deflateEnd");
pragma Import (C, Inflate, "inflate");
pragma Import (C, InflateEnd, "inflateEnd");
pragma Import (C, deflateSetDictionary, "deflateSetDictionary");
pragma Import (C, deflateCopy, "deflateCopy");
pragma Import (C, deflateReset, "deflateReset");
pragma Import (C, deflateParams, "deflateParams");
pragma Import (C, inflateSetDictionary, "inflateSetDictionary");
pragma Import (C, inflateSync, "inflateSync");
pragma Import (C, inflateReset, "inflateReset");
pragma Import (C, compress, "compress");
pragma Import (C, compress2, "compress2");
pragma Import (C, uncompress, "uncompress");
pragma Import (C, gzopen, "gzopen");
pragma Import (C, gzdopen, "gzdopen");
pragma Import (C, gzsetparams, "gzsetparams");
pragma Import (C, gzread, "gzread");
pragma Import (C, gzwrite, "gzwrite");
pragma Import (C, gzprintf, "gzprintf");
pragma Import (C, gzputs, "gzputs");
pragma Import (C, gzgets, "gzgets");
pragma Import (C, gzputc, "gzputc");
pragma Import (C, gzgetc, "gzgetc");
pragma Import (C, gzflush, "gzflush");
pragma Import (C, gzseek, "gzseek");
pragma Import (C, gzrewind, "gzrewind");
pragma Import (C, gztell, "gztell");
pragma Import (C, gzeof, "gzeof");
pragma Import (C, gzclose, "gzclose");
pragma Import (C, gzerror, "gzerror");
pragma Import (C, adler32, "adler32");
pragma Import (C, crc32, "crc32");
pragma Import (C, deflateInit, "deflateInit_");
pragma Import (C, inflateInit, "inflateInit_");
pragma Import (C, deflateInit2, "deflateInit2_");
pragma Import (C, inflateInit2, "inflateInit2_");
pragma Import (C, zError, "zError");
pragma Import (C, inflateSyncPoint, "inflateSyncPoint");
pragma Import (C, get_crc_table, "get_crc_table");
-- since zlib 1.2.0:
pragma Import (C, inflateCopy, "inflateCopy");
pragma Import (C, compressBound, "compressBound");
pragma Import (C, deflateBound, "deflateBound");
pragma Import (C, gzungetc, "gzungetc");
pragma Import (C, zlibCompileFlags, "zlibCompileFlags");
pragma Import (C, inflateBackInit, "inflateBackInit_");
-- I stopped binding the inflateBack routines, becouse realize that
-- it does not support zlib and gzip headers for now, and have no
-- symmetric deflateBack routines.
-- ZLib-Ada is symmetric regarding deflate/inflate data transformation
-- and has a similar generic callback interface for the
-- deflate/inflate transformation based on the regular Deflate/Inflate
-- routines.
-- pragma Import (C, inflateBack, "inflateBack");
-- pragma Import (C, inflateBackEnd, "inflateBackEnd");
end ZLib.Thin;

View File

@@ -0,0 +1,701 @@
----------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2004 Dmitriy Anisimkov --
-- --
-- Open source license information is in the zlib.ads file. --
----------------------------------------------------------------
-- $Id: zlib.adb,v 1.31 2004/09/06 06:53:19 vagul Exp $
with Ada.Exceptions;
with Ada.Unchecked_Conversion;
with Ada.Unchecked_Deallocation;
with Interfaces.C.Strings;
with ZLib.Thin;
package body ZLib is
use type Thin.Int;
type Z_Stream is new Thin.Z_Stream;
type Return_Code_Enum is
(OK,
STREAM_END,
NEED_DICT,
ERRNO,
STREAM_ERROR,
DATA_ERROR,
MEM_ERROR,
BUF_ERROR,
VERSION_ERROR);
type Flate_Step_Function is access
function (Strm : in Thin.Z_Streamp; Flush : in Thin.Int) return Thin.Int;
pragma Convention (C, Flate_Step_Function);
type Flate_End_Function is access
function (Ctrm : in Thin.Z_Streamp) return Thin.Int;
pragma Convention (C, Flate_End_Function);
type Flate_Type is record
Step : Flate_Step_Function;
Done : Flate_End_Function;
end record;
subtype Footer_Array is Stream_Element_Array (1 .. 8);
Simple_GZip_Header : constant Stream_Element_Array (1 .. 10)
:= (16#1f#, 16#8b#, -- Magic header
16#08#, -- Z_DEFLATED
16#00#, -- Flags
16#00#, 16#00#, 16#00#, 16#00#, -- Time
16#00#, -- XFlags
16#03# -- OS code
);
-- The simplest gzip header is not for informational, but just for
-- gzip format compatibility.
-- Note that some code below is using assumption
-- Simple_GZip_Header'Last > Footer_Array'Last, so do not make
-- Simple_GZip_Header'Last <= Footer_Array'Last.
Return_Code : constant array (Thin.Int range <>) of Return_Code_Enum
:= (0 => OK,
1 => STREAM_END,
2 => NEED_DICT,
-1 => ERRNO,
-2 => STREAM_ERROR,
-3 => DATA_ERROR,
-4 => MEM_ERROR,
-5 => BUF_ERROR,
-6 => VERSION_ERROR);
Flate : constant array (Boolean) of Flate_Type
:= (True => (Step => Thin.Deflate'Access,
Done => Thin.DeflateEnd'Access),
False => (Step => Thin.Inflate'Access,
Done => Thin.InflateEnd'Access));
Flush_Finish : constant array (Boolean) of Flush_Mode
:= (True => Finish, False => No_Flush);
procedure Raise_Error (Stream : in Z_Stream);
pragma Inline (Raise_Error);
procedure Raise_Error (Message : in String);
pragma Inline (Raise_Error);
procedure Check_Error (Stream : in Z_Stream; Code : in Thin.Int);
procedure Free is new Ada.Unchecked_Deallocation
(Z_Stream, Z_Stream_Access);
function To_Thin_Access is new Ada.Unchecked_Conversion
(Z_Stream_Access, Thin.Z_Streamp);
procedure Translate_GZip
(Filter : in out Filter_Type;
In_Data : in Ada.Streams.Stream_Element_Array;
In_Last : out Ada.Streams.Stream_Element_Offset;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode);
-- Separate translate routine for make gzip header.
procedure Translate_Auto
(Filter : in out Filter_Type;
In_Data : in Ada.Streams.Stream_Element_Array;
In_Last : out Ada.Streams.Stream_Element_Offset;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode);
-- translate routine without additional headers.
-----------------
-- Check_Error --
-----------------
procedure Check_Error (Stream : in Z_Stream; Code : in Thin.Int) is
use type Thin.Int;
begin
if Code /= Thin.Z_OK then
Raise_Error
(Return_Code_Enum'Image (Return_Code (Code))
& ": " & Last_Error_Message (Stream));
end if;
end Check_Error;
-----------
-- Close --
-----------
procedure Close
(Filter : in out Filter_Type;
Ignore_Error : in Boolean := False)
is
Code : Thin.Int;
begin
if not Ignore_Error and then not Is_Open (Filter) then
raise Status_Error;
end if;
Code := Flate (Filter.Compression).Done (To_Thin_Access (Filter.Strm));
if Ignore_Error or else Code = Thin.Z_OK then
Free (Filter.Strm);
else
declare
Error_Message : constant String
:= Last_Error_Message (Filter.Strm.all);
begin
Free (Filter.Strm);
Ada.Exceptions.Raise_Exception
(ZLib_Error'Identity,
Return_Code_Enum'Image (Return_Code (Code))
& ": " & Error_Message);
end;
end if;
end Close;
-----------
-- CRC32 --
-----------
function CRC32
(CRC : in Unsigned_32;
Data : in Ada.Streams.Stream_Element_Array)
return Unsigned_32
is
use Thin;
begin
return Unsigned_32 (crc32 (ULong (CRC),
Data'Address,
Data'Length));
end CRC32;
procedure CRC32
(CRC : in out Unsigned_32;
Data : in Ada.Streams.Stream_Element_Array) is
begin
CRC := CRC32 (CRC, Data);
end CRC32;
------------------
-- Deflate_Init --
------------------
procedure Deflate_Init
(Filter : in out Filter_Type;
Level : in Compression_Level := Default_Compression;
Strategy : in Strategy_Type := Default_Strategy;
Method : in Compression_Method := Deflated;
Window_Bits : in Window_Bits_Type := Default_Window_Bits;
Memory_Level : in Memory_Level_Type := Default_Memory_Level;
Header : in Header_Type := Default)
is
use type Thin.Int;
Win_Bits : Thin.Int := Thin.Int (Window_Bits);
begin
if Is_Open (Filter) then
raise Status_Error;
end if;
-- We allow ZLib to make header only in case of default header type.
-- Otherwise we would either do header by ourselfs, or do not do
-- header at all.
if Header = None or else Header = GZip then
Win_Bits := -Win_Bits;
end if;
-- For the GZip CRC calculation and make headers.
if Header = GZip then
Filter.CRC := 0;
Filter.Offset := Simple_GZip_Header'First;
else
Filter.Offset := Simple_GZip_Header'Last + 1;
end if;
Filter.Strm := new Z_Stream;
Filter.Compression := True;
Filter.Stream_End := False;
Filter.Header := Header;
if Thin.Deflate_Init
(To_Thin_Access (Filter.Strm),
Level => Thin.Int (Level),
method => Thin.Int (Method),
windowBits => Win_Bits,
memLevel => Thin.Int (Memory_Level),
strategy => Thin.Int (Strategy)) /= Thin.Z_OK
then
Raise_Error (Filter.Strm.all);
end if;
end Deflate_Init;
-----------
-- Flush --
-----------
procedure Flush
(Filter : in out Filter_Type;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode)
is
No_Data : Stream_Element_Array := (1 .. 0 => 0);
Last : Stream_Element_Offset;
begin
Translate (Filter, No_Data, Last, Out_Data, Out_Last, Flush);
end Flush;
-----------------------
-- Generic_Translate --
-----------------------
procedure Generic_Translate
(Filter : in out ZLib.Filter_Type;
In_Buffer_Size : in Integer := Default_Buffer_Size;
Out_Buffer_Size : in Integer := Default_Buffer_Size)
is
In_Buffer : Stream_Element_Array
(1 .. Stream_Element_Offset (In_Buffer_Size));
Out_Buffer : Stream_Element_Array
(1 .. Stream_Element_Offset (Out_Buffer_Size));
Last : Stream_Element_Offset;
In_Last : Stream_Element_Offset;
In_First : Stream_Element_Offset;
Out_Last : Stream_Element_Offset;
begin
Main : loop
Data_In (In_Buffer, Last);
In_First := In_Buffer'First;
loop
Translate
(Filter => Filter,
In_Data => In_Buffer (In_First .. Last),
In_Last => In_Last,
Out_Data => Out_Buffer,
Out_Last => Out_Last,
Flush => Flush_Finish (Last < In_Buffer'First));
if Out_Buffer'First <= Out_Last then
Data_Out (Out_Buffer (Out_Buffer'First .. Out_Last));
end if;
exit Main when Stream_End (Filter);
-- The end of in buffer.
exit when In_Last = Last;
In_First := In_Last + 1;
end loop;
end loop Main;
end Generic_Translate;
------------------
-- Inflate_Init --
------------------
procedure Inflate_Init
(Filter : in out Filter_Type;
Window_Bits : in Window_Bits_Type := Default_Window_Bits;
Header : in Header_Type := Default)
is
use type Thin.Int;
Win_Bits : Thin.Int := Thin.Int (Window_Bits);
procedure Check_Version;
-- Check the latest header types compatibility.
procedure Check_Version is
begin
if Version <= "1.1.4" then
Raise_Error
("Inflate header type " & Header_Type'Image (Header)
& " incompatible with ZLib version " & Version);
end if;
end Check_Version;
begin
if Is_Open (Filter) then
raise Status_Error;
end if;
case Header is
when None =>
Check_Version;
-- Inflate data without headers determined
-- by negative Win_Bits.
Win_Bits := -Win_Bits;
when GZip =>
Check_Version;
-- Inflate gzip data defined by flag 16.
Win_Bits := Win_Bits + 16;
when Auto =>
Check_Version;
-- Inflate with automatic detection
-- of gzip or native header defined by flag 32.
Win_Bits := Win_Bits + 32;
when Default => null;
end case;
Filter.Strm := new Z_Stream;
Filter.Compression := False;
Filter.Stream_End := False;
Filter.Header := Header;
if Thin.Inflate_Init
(To_Thin_Access (Filter.Strm), Win_Bits) /= Thin.Z_OK
then
Raise_Error (Filter.Strm.all);
end if;
end Inflate_Init;
-------------
-- Is_Open --
-------------
function Is_Open (Filter : in Filter_Type) return Boolean is
begin
return Filter.Strm /= null;
end Is_Open;
-----------------
-- Raise_Error --
-----------------
procedure Raise_Error (Message : in String) is
begin
Ada.Exceptions.Raise_Exception (ZLib_Error'Identity, Message);
end Raise_Error;
procedure Raise_Error (Stream : in Z_Stream) is
begin
Raise_Error (Last_Error_Message (Stream));
end Raise_Error;
----------
-- Read --
----------
procedure Read
(Filter : in out Filter_Type;
Item : out Ada.Streams.Stream_Element_Array;
Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode := No_Flush)
is
In_Last : Stream_Element_Offset;
Item_First : Ada.Streams.Stream_Element_Offset := Item'First;
V_Flush : Flush_Mode := Flush;
begin
pragma Assert (Rest_First in Buffer'First .. Buffer'Last + 1);
pragma Assert (Rest_Last in Buffer'First - 1 .. Buffer'Last);
loop
if Rest_Last = Buffer'First - 1 then
V_Flush := Finish;
elsif Rest_First > Rest_Last then
Read (Buffer, Rest_Last);
Rest_First := Buffer'First;
if Rest_Last < Buffer'First then
V_Flush := Finish;
end if;
end if;
Translate
(Filter => Filter,
In_Data => Buffer (Rest_First .. Rest_Last),
In_Last => In_Last,
Out_Data => Item (Item_First .. Item'Last),
Out_Last => Last,
Flush => V_Flush);
Rest_First := In_Last + 1;
exit when Stream_End (Filter)
or else Last = Item'Last
or else (Last >= Item'First and then Allow_Read_Some);
Item_First := Last + 1;
end loop;
end Read;
----------------
-- Stream_End --
----------------
function Stream_End (Filter : in Filter_Type) return Boolean is
begin
if Filter.Header = GZip and Filter.Compression then
return Filter.Stream_End
and then Filter.Offset = Footer_Array'Last + 1;
else
return Filter.Stream_End;
end if;
end Stream_End;
--------------
-- Total_In --
--------------
function Total_In (Filter : in Filter_Type) return Count is
begin
return Count (Thin.Total_In (To_Thin_Access (Filter.Strm).all));
end Total_In;
---------------
-- Total_Out --
---------------
function Total_Out (Filter : in Filter_Type) return Count is
begin
return Count (Thin.Total_Out (To_Thin_Access (Filter.Strm).all));
end Total_Out;
---------------
-- Translate --
---------------
procedure Translate
(Filter : in out Filter_Type;
In_Data : in Ada.Streams.Stream_Element_Array;
In_Last : out Ada.Streams.Stream_Element_Offset;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode) is
begin
if Filter.Header = GZip and then Filter.Compression then
Translate_GZip
(Filter => Filter,
In_Data => In_Data,
In_Last => In_Last,
Out_Data => Out_Data,
Out_Last => Out_Last,
Flush => Flush);
else
Translate_Auto
(Filter => Filter,
In_Data => In_Data,
In_Last => In_Last,
Out_Data => Out_Data,
Out_Last => Out_Last,
Flush => Flush);
end if;
end Translate;
--------------------
-- Translate_Auto --
--------------------
procedure Translate_Auto
(Filter : in out Filter_Type;
In_Data : in Ada.Streams.Stream_Element_Array;
In_Last : out Ada.Streams.Stream_Element_Offset;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode)
is
use type Thin.Int;
Code : Thin.Int;
begin
if not Is_Open (Filter) then
raise Status_Error;
end if;
if Out_Data'Length = 0 and then In_Data'Length = 0 then
raise Constraint_Error;
end if;
Set_Out (Filter.Strm.all, Out_Data'Address, Out_Data'Length);
Set_In (Filter.Strm.all, In_Data'Address, In_Data'Length);
Code := Flate (Filter.Compression).Step
(To_Thin_Access (Filter.Strm),
Thin.Int (Flush));
if Code = Thin.Z_STREAM_END then
Filter.Stream_End := True;
else
Check_Error (Filter.Strm.all, Code);
end if;
In_Last := In_Data'Last
- Stream_Element_Offset (Avail_In (Filter.Strm.all));
Out_Last := Out_Data'Last
- Stream_Element_Offset (Avail_Out (Filter.Strm.all));
end Translate_Auto;
--------------------
-- Translate_GZip --
--------------------
procedure Translate_GZip
(Filter : in out Filter_Type;
In_Data : in Ada.Streams.Stream_Element_Array;
In_Last : out Ada.Streams.Stream_Element_Offset;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode)
is
Out_First : Stream_Element_Offset;
procedure Add_Data (Data : in Stream_Element_Array);
-- Add data to stream from the Filter.Offset till necessary,
-- used for add gzip headr/footer.
procedure Put_32
(Item : in out Stream_Element_Array;
Data : in Unsigned_32);
pragma Inline (Put_32);
--------------
-- Add_Data --
--------------
procedure Add_Data (Data : in Stream_Element_Array) is
Data_First : Stream_Element_Offset renames Filter.Offset;
Data_Last : Stream_Element_Offset;
Data_Len : Stream_Element_Offset; -- -1
Out_Len : Stream_Element_Offset; -- -1
begin
Out_First := Out_Last + 1;
if Data_First > Data'Last then
return;
end if;
Data_Len := Data'Last - Data_First;
Out_Len := Out_Data'Last - Out_First;
if Data_Len <= Out_Len then
Out_Last := Out_First + Data_Len;
Data_Last := Data'Last;
else
Out_Last := Out_Data'Last;
Data_Last := Data_First + Out_Len;
end if;
Out_Data (Out_First .. Out_Last) := Data (Data_First .. Data_Last);
Data_First := Data_Last + 1;
Out_First := Out_Last + 1;
end Add_Data;
------------
-- Put_32 --
------------
procedure Put_32
(Item : in out Stream_Element_Array;
Data : in Unsigned_32)
is
D : Unsigned_32 := Data;
begin
for J in Item'First .. Item'First + 3 loop
Item (J) := Stream_Element (D and 16#FF#);
D := Shift_Right (D, 8);
end loop;
end Put_32;
begin
Out_Last := Out_Data'First - 1;
if not Filter.Stream_End then
Add_Data (Simple_GZip_Header);
Translate_Auto
(Filter => Filter,
In_Data => In_Data,
In_Last => In_Last,
Out_Data => Out_Data (Out_First .. Out_Data'Last),
Out_Last => Out_Last,
Flush => Flush);
CRC32 (Filter.CRC, In_Data (In_Data'First .. In_Last));
end if;
if Filter.Stream_End and then Out_Last <= Out_Data'Last then
-- This detection method would work only when
-- Simple_GZip_Header'Last > Footer_Array'Last
if Filter.Offset = Simple_GZip_Header'Last + 1 then
Filter.Offset := Footer_Array'First;
end if;
declare
Footer : Footer_Array;
begin
Put_32 (Footer, Filter.CRC);
Put_32 (Footer (Footer'First + 4 .. Footer'Last),
Unsigned_32 (Total_In (Filter)));
Add_Data (Footer);
end;
end if;
end Translate_GZip;
-------------
-- Version --
-------------
function Version return String is
begin
return Interfaces.C.Strings.Value (Thin.zlibVersion);
end Version;
-----------
-- Write --
-----------
procedure Write
(Filter : in out Filter_Type;
Item : in Ada.Streams.Stream_Element_Array;
Flush : in Flush_Mode := No_Flush)
is
Buffer : Stream_Element_Array (1 .. Buffer_Size);
In_Last : Stream_Element_Offset;
Out_Last : Stream_Element_Offset;
In_First : Stream_Element_Offset := Item'First;
begin
if Item'Length = 0 and Flush = No_Flush then
return;
end if;
loop
Translate
(Filter => Filter,
In_Data => Item (In_First .. Item'Last),
In_Last => In_Last,
Out_Data => Buffer,
Out_Last => Out_Last,
Flush => Flush);
if Out_Last >= Buffer'First then
Write (Buffer (1 .. Out_Last));
end if;
exit when In_Last = Item'Last or Stream_End (Filter);
In_First := In_Last + 1;
end loop;
end Write;
end ZLib;

View File

@@ -0,0 +1,328 @@
------------------------------------------------------------------------------
-- ZLib for Ada thick binding. --
-- --
-- Copyright (C) 2002-2004 Dmitriy Anisimkov --
-- --
-- This library is free software; you can redistribute it and/or modify --
-- it under the terms of the GNU General Public License as published by --
-- the Free Software Foundation; either version 2 of the License, or (at --
-- your option) any later version. --
-- --
-- This library is distributed in the hope that it will be useful, but --
-- WITHOUT ANY WARRANTY; without even the implied warranty of --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --
-- General Public License for more details. --
-- --
-- You should have received a copy of the GNU General Public License --
-- along with this library; if not, write to the Free Software Foundation, --
-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. --
-- --
-- As a special exception, if other files instantiate generics from this --
-- unit, or you link this unit with other files to produce an executable, --
-- this unit does not by itself cause the resulting executable to be --
-- covered by the GNU General Public License. This exception does not --
-- however invalidate any other reasons why the executable file might be --
-- covered by the GNU Public License. --
------------------------------------------------------------------------------
-- $Id: zlib.ads,v 1.26 2004/09/06 06:53:19 vagul Exp $
with Ada.Streams;
with Interfaces;
package ZLib is
ZLib_Error : exception;
Status_Error : exception;
type Compression_Level is new Integer range -1 .. 9;
type Flush_Mode is private;
type Compression_Method is private;
type Window_Bits_Type is new Integer range 8 .. 15;
type Memory_Level_Type is new Integer range 1 .. 9;
type Unsigned_32 is new Interfaces.Unsigned_32;
type Strategy_Type is private;
type Header_Type is (None, Auto, Default, GZip);
-- Header type usage have a some limitation for inflate.
-- See comment for Inflate_Init.
subtype Count is Ada.Streams.Stream_Element_Count;
Default_Memory_Level : constant Memory_Level_Type := 8;
Default_Window_Bits : constant Window_Bits_Type := 15;
----------------------------------
-- Compression method constants --
----------------------------------
Deflated : constant Compression_Method;
-- Only one method allowed in this ZLib version
---------------------------------
-- Compression level constants --
---------------------------------
No_Compression : constant Compression_Level := 0;
Best_Speed : constant Compression_Level := 1;
Best_Compression : constant Compression_Level := 9;
Default_Compression : constant Compression_Level := -1;
--------------------------
-- Flush mode constants --
--------------------------
No_Flush : constant Flush_Mode;
-- Regular way for compression, no flush
Partial_Flush : constant Flush_Mode;
-- Will be removed, use Z_SYNC_FLUSH instead
Sync_Flush : constant Flush_Mode;
-- All pending output is flushed to the output buffer and the output
-- is aligned on a byte boundary, so that the decompressor can get all
-- input data available so far. (In particular avail_in is zero after the
-- call if enough output space has been provided before the call.)
-- Flushing may degrade compression for some compression algorithms and so
-- it should be used only when necessary.
Block_Flush : constant Flush_Mode;
-- Z_BLOCK requests that inflate() stop
-- if and when it get to the next deflate block boundary. When decoding the
-- zlib or gzip format, this will cause inflate() to return immediately
-- after the header and before the first block. When doing a raw inflate,
-- inflate() will go ahead and process the first block, and will return
-- when it gets to the end of that block, or when it runs out of data.
Full_Flush : constant Flush_Mode;
-- All output is flushed as with SYNC_FLUSH, and the compression state
-- is reset so that decompression can restart from this point if previous
-- compressed data has been damaged or if random access is desired. Using
-- Full_Flush too often can seriously degrade the compression.
Finish : constant Flush_Mode;
-- Just for tell the compressor that input data is complete.
------------------------------------
-- Compression strategy constants --
------------------------------------
-- RLE stategy could be used only in version 1.2.0 and later.
Filtered : constant Strategy_Type;
Huffman_Only : constant Strategy_Type;
RLE : constant Strategy_Type;
Default_Strategy : constant Strategy_Type;
Default_Buffer_Size : constant := 4096;
type Filter_Type is tagged limited private;
-- The filter is for compression and for decompression.
-- The usage of the type is depend of its initialization.
function Version return String;
pragma Inline (Version);
-- Return string representation of the ZLib version.
procedure Deflate_Init
(Filter : in out Filter_Type;
Level : in Compression_Level := Default_Compression;
Strategy : in Strategy_Type := Default_Strategy;
Method : in Compression_Method := Deflated;
Window_Bits : in Window_Bits_Type := Default_Window_Bits;
Memory_Level : in Memory_Level_Type := Default_Memory_Level;
Header : in Header_Type := Default);
-- Compressor initialization.
-- When Header parameter is Auto or Default, then default zlib header
-- would be provided for compressed data.
-- When Header is GZip, then gzip header would be set instead of
-- default header.
-- When Header is None, no header would be set for compressed data.
procedure Inflate_Init
(Filter : in out Filter_Type;
Window_Bits : in Window_Bits_Type := Default_Window_Bits;
Header : in Header_Type := Default);
-- Decompressor initialization.
-- Default header type mean that ZLib default header is expecting in the
-- input compressed stream.
-- Header type None mean that no header is expecting in the input stream.
-- GZip header type mean that GZip header is expecting in the
-- input compressed stream.
-- Auto header type mean that header type (GZip or Native) would be
-- detected automatically in the input stream.
-- Note that header types parameter values None, GZip and Auto are
-- supported for inflate routine only in ZLib versions 1.2.0.2 and later.
-- Deflate_Init is supporting all header types.
function Is_Open (Filter : in Filter_Type) return Boolean;
pragma Inline (Is_Open);
-- Is the filter opened for compression or decompression.
procedure Close
(Filter : in out Filter_Type;
Ignore_Error : in Boolean := False);
-- Closing the compression or decompressor.
-- If stream is closing before the complete and Ignore_Error is False,
-- The exception would be raised.
generic
with procedure Data_In
(Item : out Ada.Streams.Stream_Element_Array;
Last : out Ada.Streams.Stream_Element_Offset);
with procedure Data_Out
(Item : in Ada.Streams.Stream_Element_Array);
procedure Generic_Translate
(Filter : in out Filter_Type;
In_Buffer_Size : in Integer := Default_Buffer_Size;
Out_Buffer_Size : in Integer := Default_Buffer_Size);
-- Compress/decompress data fetch from Data_In routine and pass the result
-- to the Data_Out routine. User should provide Data_In and Data_Out
-- for compression/decompression data flow.
-- Compression or decompression depend on Filter initialization.
function Total_In (Filter : in Filter_Type) return Count;
pragma Inline (Total_In);
-- Returns total number of input bytes read so far
function Total_Out (Filter : in Filter_Type) return Count;
pragma Inline (Total_Out);
-- Returns total number of bytes output so far
function CRC32
(CRC : in Unsigned_32;
Data : in Ada.Streams.Stream_Element_Array)
return Unsigned_32;
pragma Inline (CRC32);
-- Compute CRC32, it could be necessary for make gzip format
procedure CRC32
(CRC : in out Unsigned_32;
Data : in Ada.Streams.Stream_Element_Array);
pragma Inline (CRC32);
-- Compute CRC32, it could be necessary for make gzip format
-------------------------------------------------
-- Below is more complex low level routines. --
-------------------------------------------------
procedure Translate
(Filter : in out Filter_Type;
In_Data : in Ada.Streams.Stream_Element_Array;
In_Last : out Ada.Streams.Stream_Element_Offset;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode);
-- Compress/decompress the In_Data buffer and place the result into
-- Out_Data. In_Last is the index of last element from In_Data accepted by
-- the Filter. Out_Last is the last element of the received data from
-- Filter. To tell the filter that incoming data are complete put the
-- Flush parameter to Finish.
function Stream_End (Filter : in Filter_Type) return Boolean;
pragma Inline (Stream_End);
-- Return the true when the stream is complete.
procedure Flush
(Filter : in out Filter_Type;
Out_Data : out Ada.Streams.Stream_Element_Array;
Out_Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode);
pragma Inline (Flush);
-- Flushing the data from the compressor.
generic
with procedure Write
(Item : in Ada.Streams.Stream_Element_Array);
-- User should provide this routine for accept
-- compressed/decompressed data.
Buffer_Size : in Ada.Streams.Stream_Element_Offset
:= Default_Buffer_Size;
-- Buffer size for Write user routine.
procedure Write
(Filter : in out Filter_Type;
Item : in Ada.Streams.Stream_Element_Array;
Flush : in Flush_Mode := No_Flush);
-- Compress/Decompress data from Item to the generic parameter procedure
-- Write. Output buffer size could be set in Buffer_Size generic parameter.
generic
with procedure Read
(Item : out Ada.Streams.Stream_Element_Array;
Last : out Ada.Streams.Stream_Element_Offset);
-- User should provide data for compression/decompression
-- thru this routine.
Buffer : in out Ada.Streams.Stream_Element_Array;
-- Buffer for keep remaining data from the previous
-- back read.
Rest_First, Rest_Last : in out Ada.Streams.Stream_Element_Offset;
-- Rest_First have to be initialized to Buffer'Last + 1
-- Rest_Last have to be initialized to Buffer'Last
-- before usage.
Allow_Read_Some : in Boolean := False;
-- Is it allowed to return Last < Item'Last before end of data.
procedure Read
(Filter : in out Filter_Type;
Item : out Ada.Streams.Stream_Element_Array;
Last : out Ada.Streams.Stream_Element_Offset;
Flush : in Flush_Mode := No_Flush);
-- Compress/Decompress data from generic parameter procedure Read to the
-- Item. User should provide Buffer and initialized Rest_First, Rest_Last
-- indicators. If Allow_Read_Some is True, Read routines could return
-- Last < Item'Last only at end of stream.
private
use Ada.Streams;
pragma Assert (Ada.Streams.Stream_Element'Size = 8);
pragma Assert (Ada.Streams.Stream_Element'Modulus = 2**8);
type Flush_Mode is new Integer range 0 .. 5;
type Compression_Method is new Integer range 8 .. 8;
type Strategy_Type is new Integer range 0 .. 3;
No_Flush : constant Flush_Mode := 0;
Partial_Flush : constant Flush_Mode := 1;
Sync_Flush : constant Flush_Mode := 2;
Full_Flush : constant Flush_Mode := 3;
Finish : constant Flush_Mode := 4;
Block_Flush : constant Flush_Mode := 5;
Filtered : constant Strategy_Type := 1;
Huffman_Only : constant Strategy_Type := 2;
RLE : constant Strategy_Type := 3;
Default_Strategy : constant Strategy_Type := 0;
Deflated : constant Compression_Method := 8;
type Z_Stream;
type Z_Stream_Access is access all Z_Stream;
type Filter_Type is tagged limited record
Strm : Z_Stream_Access;
Compression : Boolean;
Stream_End : Boolean;
Header : Header_Type;
CRC : Unsigned_32;
Offset : Stream_Element_Offset;
-- Offset for gzip header/footer output.
end record;
end ZLib;

View File

@@ -0,0 +1,20 @@
project Zlib is
for Languages use ("Ada");
for Source_Dirs use (".");
for Object_Dir use ".";
for Main use ("test.adb", "mtest.adb", "read.adb", "buffer_demo");
package Compiler is
for Default_Switches ("ada") use ("-gnatwcfilopru", "-gnatVcdfimorst", "-gnatyabcefhiklmnoprst");
end Compiler;
package Linker is
for Default_Switches ("ada") use ("-lz");
end Linker;
package Builder is
for Default_Switches ("ada") use ("-s", "-gnatQ");
end Builder;
end Zlib;

Some files were not shown because too many files have changed in this diff Show More