summaryrefslogtreecommitdiff
path: root/graphics/tesseract
diff options
context:
space:
mode:
authorwiz <wiz@pkgsrc.org>2007-05-18 06:39:27 +0000
committerwiz <wiz@pkgsrc.org>2007-05-18 06:39:27 +0000
commite899e6021c1a45f4ad336cccb543f4abcbe88ab1 (patch)
treea705c8f6b7adc55c4b7c0be896b56cd6c86a9c90 /graphics/tesseract
parent539cc301cceb240b57c43baf494b8023f1d7ffe3 (diff)
downloadpkgsrc-e899e6021c1a45f4ad336cccb543f4abcbe88ab1.tar.gz
Initial import of tesseract-1.04b from pkgsrc-wip (packaged by heinz@
and myself): This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO OUTPUT FORMATTING, and NO UI. It can only process an image of a single column and create text from it. It can detect fixed pitch vs proportional text. Having said that, in 1995, this engine was in the top 3 in terms of character accuracy, and it compiles and runs on both Linux and Windows. Another current limitation is that it only recognizes English and its character set is only US-ASCII. Training code IS included in the open source release however, and will be included in a future release.
Diffstat (limited to 'graphics/tesseract')
-rw-r--r--graphics/tesseract/DESCR9
-rw-r--r--graphics/tesseract/Makefile32
-rw-r--r--graphics/tesseract/PLIST286
-rw-r--r--graphics/tesseract/distinfo9
-rw-r--r--graphics/tesseract/files/tesseract.sh2
-rw-r--r--graphics/tesseract/patches/patch-ae16
-rw-r--r--graphics/tesseract/patches/patch-ag15
-rw-r--r--graphics/tesseract/patches/patch-ah13
-rw-r--r--graphics/tesseract/patches/patch-ai14
9 files changed, 396 insertions, 0 deletions
diff --git a/graphics/tesseract/DESCR b/graphics/tesseract/DESCR
new file mode 100644
index 00000000000..dcc8fb3daad
--- /dev/null
+++ b/graphics/tesseract/DESCR
@@ -0,0 +1,9 @@
+This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO
+OUTPUT FORMATTING, and NO UI. It can only process an image of a
+single column and create text from it. It can detect fixed pitch
+vs proportional text. Having said that, in 1995, this engine was
+in the top 3 in terms of character accuracy, and it compiles and
+runs on both Linux and Windows. Another current limitation is that
+it only recognizes English and its character set is only US-ASCII.
+Training code IS included in the open source release however, and
+will be included in a future release.
diff --git a/graphics/tesseract/Makefile b/graphics/tesseract/Makefile
new file mode 100644
index 00000000000..3102c7a1479
--- /dev/null
+++ b/graphics/tesseract/Makefile
@@ -0,0 +1,32 @@
+# $NetBSD: Makefile,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
+#
+
+DISTNAME= tesseract-1.04b
+CATEGORIES= graphics
+MASTER_SITES= http://tesseract-ocr.googlecode.com/files/
+
+MAINTAINER= pkgsrc-users@NetBSD.org
+HOMEPAGE= http://code.google.com/p/tesseract-ocr/
+COMMENT= Commercial quality open source OCR engine
+
+PKG_DESTDIR_SUPPORT=user-destdir
+
+GNU_CONFIGURE= yes
+USE_LANGUAGES= c c++
+WRKSRC= ${WRKDIR}/tesseract-1.04
+
+post-build:
+ ${SED} -e "s,@PREFIX@,${PREFIX}," ${FILESDIR}/tesseract.sh \
+ > ${WRKSRC}/tesseract.sh
+
+post-install:
+ ${INSTALL_LIB_DIR} ${DESTDIR}${PREFIX}/libexec
+ ${MV} ${DESTDIR}${PREFIX}/bin/tesseract ${DESTDIR}${PREFIX}/libexec
+ ${INSTALL_SCRIPT} ${WRKSRC}/tesseract.sh ${DESTDIR}${PREFIX}/bin/tesseract
+ ${INSTALL_DATA_DIR} ${DESTDIR}${PREFIX}/share/doc/tesseract
+ ${INSTALL_DATA} ${WRKSRC}/README ${DESTDIR}${PREFIX}/share/doc/tesseract
+ ${INSTALL_DATA_DIR} ${DESTDIR}${PREFIX}/share/tesseract
+ ${INSTALL_DATA} ${WRKSRC}/phototest.tif ${DESTDIR}${PREFIX}/share/tesseract
+
+.include "../../graphics/tiff/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/graphics/tesseract/PLIST b/graphics/tesseract/PLIST
new file mode 100644
index 00000000000..29f995cfb63
--- /dev/null
+++ b/graphics/tesseract/PLIST
@@ -0,0 +1,286 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
+bin/cntraining
+bin/mftraining
+bin/tesseract
+include/tesseract/adaptions.h
+include/tesseract/adaptive.h
+include/tesseract/adaptmatch.h
+include/tesseract/applybox.h
+include/tesseract/associate.h
+include/tesseract/badwords.h
+include/tesseract/baseapi.h
+include/tesseract/basedir.h
+include/tesseract/baseline.h
+include/tesseract/bestfirst.h
+include/tesseract/bits16.h
+include/tesseract/bitstrm.h
+include/tesseract/bitvec.h
+include/tesseract/blckerr.h
+include/tesseract/blkocc.h
+include/tesseract/blobbox.h
+include/tesseract/blobclass.h
+include/tesseract/blobcmp.h
+include/tesseract/blobcmpl.h
+include/tesseract/blobs.h
+include/tesseract/blread.h
+include/tesseract/callcpp.h
+include/tesseract/callnet.h
+include/tesseract/charcut.h
+include/tesseract/charsample.h
+include/tesseract/chartoname.h
+include/tesseract/choicearr.h
+include/tesseract/choices.h
+include/tesseract/chop.h
+include/tesseract/chopper.h
+include/tesseract/closed.h
+include/tesseract/clst.h
+include/tesseract/cluster.h
+include/tesseract/clusttool.h
+include/tesseract/cmndwin.h
+include/tesseract/cnTraining.dsp
+include/tesseract/const.h
+include/tesseract/context.h
+include/tesseract/control.h
+include/tesseract/coutln.h
+include/tesseract/crakedge.h
+include/tesseract/cutil.h
+include/tesseract/cutoffs.h
+include/tesseract/danerror.h
+include/tesseract/dawg.h
+include/tesseract/debug.h
+include/tesseract/debugwin.h
+include/tesseract/djmenus.h
+include/tesseract/dlltest.cpp
+include/tesseract/dlltest.dsp
+include/tesseract/docqual.h
+include/tesseract/drawedg.h
+include/tesseract/drawfx.h
+include/tesseract/drawtord.h
+include/tesseract/edgblob.h
+include/tesseract/edgloop.h
+include/tesseract/efio.h
+include/tesseract/elst.h
+include/tesseract/elst2.h
+include/tesseract/emalloc.h
+include/tesseract/errcode.h
+include/tesseract/evntlst.h
+include/tesseract/evnts.h
+include/tesseract/expandblob.h
+include/tesseract/extern.h
+include/tesseract/extract.h
+include/tesseract/featdefs.h
+include/tesseract/fileerr.h
+include/tesseract/findseam.h
+include/tesseract/fixspace.h
+include/tesseract/fixxht.h
+include/tesseract/flexfx.h
+include/tesseract/float2int.h
+include/tesseract/fpchop.h
+include/tesseract/fpoint.h
+include/tesseract/freelist.h
+include/tesseract/funcdefs.h
+include/tesseract/fxdefs.h
+include/tesseract/fxid.h
+include/tesseract/gap_map.h
+include/tesseract/genblob.h
+include/tesseract/general.h
+include/tesseract/globaloc.h
+include/tesseract/globals.h
+include/tesseract/gradechop.h
+include/tesseract/grphics.h
+include/tesseract/grphshm.h
+include/tesseract/hashfn.h
+include/tesseract/heuristic.h
+include/tesseract/hideedge.h
+include/tesseract/host.h
+include/tesseract/hosthplb.h
+include/tesseract/hpddef.h
+include/tesseract/hpdsizes.h
+include/tesseract/hyphen.h
+include/tesseract/img.h
+include/tesseract/imgbmp.h
+include/tesseract/imgerrs.h
+include/tesseract/imgio.h
+include/tesseract/imgs.h
+include/tesseract/imgscale.h
+include/tesseract/imgtiff.h
+include/tesseract/imgunpk.h
+include/tesseract/intfx.h
+include/tesseract/intmatcher.h
+include/tesseract/intproto.h
+include/tesseract/ipoints.h
+include/tesseract/kdtree.h
+include/tesseract/labls.h
+include/tesseract/linlsq.h
+include/tesseract/listio.h
+include/tesseract/lmedsq.h
+include/tesseract/lsterr.h
+include/tesseract/mainblk.h
+include/tesseract/makechop.h
+include/tesseract/makerow.h
+include/tesseract/matchdefs.h
+include/tesseract/matchtab.h
+include/tesseract/matmatch.h
+include/tesseract/matrix.h
+include/tesseract/measure.h
+include/tesseract/memblk.h
+include/tesseract/memry.h
+include/tesseract/memryerr.h
+include/tesseract/mergenf.h
+include/tesseract/metrics.h
+include/tesseract/mf.h
+include/tesseract/mfTraining.dsp
+include/tesseract/mfcpch.cpp
+include/tesseract/mfcpch.h
+include/tesseract/mfdefs.h
+include/tesseract/mfoutline.h
+include/tesseract/mfvars.h
+include/tesseract/mfx.h
+include/tesseract/minmax.h
+include/tesseract/mod128.h
+include/tesseract/msmenus.h
+include/tesseract/name2char.h
+include/tesseract/ndminx.h
+include/tesseract/normalis.h
+include/tesseract/normfeat.h
+include/tesseract/normmatch.h
+include/tesseract/notdll.h
+include/tesseract/nwmain.h
+include/tesseract/ocrblock.h
+include/tesseract/ocrclass.h
+include/tesseract/ocrfeatures.h
+include/tesseract/ocrrow.h
+include/tesseract/ocrshell.h
+include/tesseract/oldbasel.h
+include/tesseract/oldheap.h
+include/tesseract/oldlist.h
+include/tesseract/olutil.h
+include/tesseract/outfeat.h
+include/tesseract/outlines.h
+include/tesseract/output.h
+include/tesseract/pageblk.h
+include/tesseract/pageres.h
+include/tesseract/pagewalk.h
+include/tesseract/paircmp.h
+include/tesseract/pdblock.h
+include/tesseract/pdclass.h
+include/tesseract/permdawg.h
+include/tesseract/permnum.h
+include/tesseract/permute.h
+include/tesseract/pgedit.h
+include/tesseract/pgeditx.h
+include/tesseract/picofeat.h
+include/tesseract/pieces.h
+include/tesseract/pithsync.h
+include/tesseract/pitsync1.h
+include/tesseract/platform.h
+include/tesseract/plotedges.h
+include/tesseract/plotseg.h
+include/tesseract/points.h
+include/tesseract/polyaprx.h
+include/tesseract/polyblk.h
+include/tesseract/polyblob.h
+include/tesseract/polyvert.h
+include/tesseract/poutline.h
+include/tesseract/protos.h
+include/tesseract/quadlsq.h
+include/tesseract/quadratc.h
+include/tesseract/quspline.h
+include/tesseract/ratngs.h
+include/tesseract/rect.h
+include/tesseract/rejctmap.h
+include/tesseract/reject.h
+include/tesseract/render.h
+include/tesseract/rwpoly.h
+include/tesseract/sbdmenu.h
+include/tesseract/sbgconst.h
+include/tesseract/sbgdefs.h
+include/tesseract/sbgtypes.h
+include/tesseract/scaleimg.h
+include/tesseract/scanedg.h
+include/tesseract/scanutils.cpp
+include/tesseract/scanutils.h
+include/tesseract/seam.h
+include/tesseract/secname.h
+include/tesseract/serialis.h
+include/tesseract/showim.h
+include/tesseract/sigmenu.h
+include/tesseract/sortflts.h
+include/tesseract/speckle.h
+include/tesseract/split.h
+include/tesseract/states.h
+include/tesseract/statistc.h
+include/tesseract/stderr.h
+include/tesseract/stepblob.h
+include/tesseract/stopper.h
+include/tesseract/strngs.h
+include/tesseract/structures.h
+include/tesseract/submen.h
+include/tesseract/tally.h
+include/tesseract/tessarray.h
+include/tesseract/tessbox.h
+include/tesseract/tessclas.h
+include/tesseract/tessedit.h
+include/tesseract/tessembedded.h
+include/tesseract/tesseractmain.h
+include/tesseract/tessinit.h
+include/tesseract/tessio.h
+include/tesseract/tessopt.h
+include/tesseract/tessout.h
+include/tesseract/tessvars.h
+include/tesseract/tface.h
+include/tesseract/tfacep.h
+include/tesseract/tfacepp.h
+include/tesseract/topitch.h
+include/tesseract/tordmain.h
+include/tesseract/tordvars.h
+include/tesseract/tospace.h
+include/tesseract/tovars.h
+include/tesseract/tprintf.h
+include/tesseract/training.h
+include/tesseract/trie.h
+include/tesseract/tstruct.h
+include/tesseract/txtregn.h
+include/tesseract/underlin.h
+include/tesseract/unichar.h
+include/tesseract/unicharmap.h
+include/tesseract/unicharset.h
+include/tesseract/varable.h
+include/tesseract/varabled.h
+include/tesseract/varblmen.h
+include/tesseract/varblwin.h
+include/tesseract/variables.h
+include/tesseract/vecfuncs.h
+include/tesseract/werd.h
+include/tesseract/werdit.h
+include/tesseract/wordclass.h
+include/tesseract/wordseg.h
+include/tesseract/xform2d.h
+lib/libtesseract_ccstruct.a
+lib/libtesseract_ccutil.a
+lib/libtesseract_classify.a
+lib/libtesseract_cutil.a
+lib/libtesseract_dict.a
+lib/libtesseract_display.a
+lib/libtesseract_image.a
+lib/libtesseract_main.a
+lib/libtesseract_textord.a
+lib/libtesseract_training.a
+lib/libtesseract_viewer.a
+lib/libtesseract_wordrec.a
+libexec/tesseract
+share/doc/tesseract/README
+share/tessdata/confsets
+share/tessdata/eng.DangAmbigs
+share/tessdata/eng.freq-dawg
+share/tessdata/eng.inttemp
+share/tessdata/eng.normproto
+share/tessdata/eng.pffmtable
+share/tessdata/eng.unicharset
+share/tessdata/eng.user-words
+share/tessdata/eng.word-dawg
+share/tesseract/phototest.tif
+@dirrm share/doc/tesseract
+@dirrm share/tesseract
+@dirrm share/tessdata
+@dirrm include/tesseract
diff --git a/graphics/tesseract/distinfo b/graphics/tesseract/distinfo
new file mode 100644
index 00000000000..3e8ed942a24
--- /dev/null
+++ b/graphics/tesseract/distinfo
@@ -0,0 +1,9 @@
+$NetBSD: distinfo,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
+
+SHA1 (tesseract-1.04b.tar.gz) = 263a65e462ed864c4da115cdcb3f3e78613de485
+RMD160 (tesseract-1.04b.tar.gz) = 5e9c70d4435a59157f0af6503a57b02a4a74350e
+Size (tesseract-1.04b.tar.gz) = 2899276 bytes
+SHA1 (patch-ae) = c22f254b73fb9bbd02cf8ef7b4ccbea475afd5df
+SHA1 (patch-ag) = 581ec7ac0528bb28fddb3fbaa35a87bb1835a82e
+SHA1 (patch-ah) = 22987d8523631c5c6e8b2fb5096ff87c5bc13124
+SHA1 (patch-ai) = e219077d2acf0652a9bf6418d3f8ce4e11782ed5
diff --git a/graphics/tesseract/files/tesseract.sh b/graphics/tesseract/files/tesseract.sh
new file mode 100644
index 00000000000..3871ab7bdcf
--- /dev/null
+++ b/graphics/tesseract/files/tesseract.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec @PREFIX@/libexec/tesseract "$@"
diff --git a/graphics/tesseract/patches/patch-ae b/graphics/tesseract/patches/patch-ae
new file mode 100644
index 00000000000..04ad0e37cfb
--- /dev/null
+++ b/graphics/tesseract/patches/patch-ae
@@ -0,0 +1,16 @@
+$NetBSD: patch-ae,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
+
+--- cutil/globals.h.orig 2006-06-17 00:17:07.000000000 +0200
++++ cutil/globals.h
+@@ -43,9 +43,11 @@ extern int acts[MAXPROC]; /*actio
+ extern int debugs[MAXPROC]; /*debug flags */
+ extern int plots[MAXPROC]; /*plot flags */
+ extern int corners[4]; /*corners of scan window */
++extern "C" {
+ extern int optind; /*option index */
+ extern char *optarg; /*option argument */
+ /*image file name */
++}
+ extern char imagefile[FILENAMESIZE];
+ /* main directory */
+ extern char directory[FILENAMESIZE];
diff --git a/graphics/tesseract/patches/patch-ag b/graphics/tesseract/patches/patch-ag
new file mode 100644
index 00000000000..280764380a6
--- /dev/null
+++ b/graphics/tesseract/patches/patch-ag
@@ -0,0 +1,15 @@
+$NetBSD: patch-ag,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
+
+--- cutil/tordvars.h.orig 2007-05-16 21:33:53.000000000 +0000
++++ cutil/tordvars.h
+@@ -45,8 +45,8 @@ extern int similarity_enable;
+ extern int similarity_debug; /* Level of debug output */
+ extern int write_raw_output; /* Text before context */
+ extern int write_output; /* Text file output */
+-//extern "C" { extern int display_ratings; } /* Show the ratings */
+-extern int display_ratings; /* Show the ratings */
++extern "C" { extern int display_ratings; } /* Show the ratings */
++//extern int display_ratings; /* Show the ratings */
+ extern int show_bold; /* Use bold text */
+ extern int display_text; /* Show word text */
+ extern int display_blocks; /* Show word as boxes */
diff --git a/graphics/tesseract/patches/patch-ah b/graphics/tesseract/patches/patch-ah
new file mode 100644
index 00000000000..ced73824249
--- /dev/null
+++ b/graphics/tesseract/patches/patch-ah
@@ -0,0 +1,13 @@
+$NetBSD: patch-ah,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
+
+--- ccutil/debugwin.cpp.orig 2006-06-16 22:17:04.000000000 +0000
++++ ccutil/debugwin.cpp
+@@ -229,7 +229,7 @@ DEBUG_WIN::DEBUG_WIN(
+ length += sprintf (command + length, "trap \"\" 1 2 3 13 15\n");
+ length +=
+ sprintf (command + length,
+- "/usr/bin/X11/xterm -sb -sl " INT32FORMAT " -geometry "
++ "/usr/X11R6/bin/xterm -sb -sl " INT32FORMAT " -geometry "
+ INT32FORMAT "x" INT32FORMAT "", buflines, xsize / 8, ysize / 16);
+ if (xpos >= 0)
+ command[length++] = '+';
diff --git a/graphics/tesseract/patches/patch-ai b/graphics/tesseract/patches/patch-ai
new file mode 100644
index 00000000000..45150d3da32
--- /dev/null
+++ b/graphics/tesseract/patches/patch-ai
@@ -0,0 +1,14 @@
+$NetBSD: patch-ai,v 1.1.1.1 2007/05/18 06:39:27 wiz Exp $
+
+--- configure.orig 2007-02-02 21:37:43.000000000 +0100
++++ configure
+@@ -7083,7 +7083,8 @@ else
+ if test "$cross_compiling" = yes; then
+ ac_cv_func_fork_works=cross
+ else
+- cat >conftest.$ac_ext <<_ACEOF
++ cat confdefs.h >conftest.$ac_ext
++ cat >>conftest.$ac_ext <<_ACEOF
+ /* By Ruediger Kuhlmann. */
+ #include <sys/types.h>
+ #if HAVE_UNISTD_H