summaryrefslogtreecommitdiff
path: root/graphics/tesseract
diff options
context:
space:
mode:
authorfhajny <fhajny@pkgsrc.org>2017-02-21 17:51:18 +0000
committerfhajny <fhajny@pkgsrc.org>2017-02-21 17:51:18 +0000
commit57ccbb913f15701b0d8dec62f7b78991a1638389 (patch)
tree59016f6836a475e0d9ad77cc2f9cad7e976ac036 /graphics/tesseract
parent8b616233616884854c9ffcfe094725094ccb36a8 (diff)
downloadpkgsrc-57ccbb913f15701b0d8dec62f7b78991a1638389.tar.gz
Update graphics/tesseract to 3.05.00
- Made some fine tuning to the hOCR output. - Added TSV as another optional output format. - Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method. - text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer. - Training tools - Replaced asserts with tprintf() and exit(1). - Fixed Cygwin compatibility. - Improved multipage tiff processing. - Improved the embedded pdf font (pdf.ttf). - Enable selection of OCR engine mode from command line. - Changed tesseract command line parameter '-psm' to '--psm'. - Added new C API for orientation and script detection, removed the old one. - Increased minimum autoconf version to 2.59. - Removed dead code. - Fixed many compiler warning. - Fixed memory and resource leaks. - Fixed some issues with the 'Cube' OCR engine. - Fixed some openCL issues. - Added option to build Tesseract with CMake build system. - Implemented CPPAN support for easy Windows building.
Diffstat (limited to 'graphics/tesseract')
-rw-r--r--graphics/tesseract/Makefile11
-rw-r--r--graphics/tesseract/PLIST3
-rw-r--r--graphics/tesseract/distinfo12
-rw-r--r--graphics/tesseract/patches/patch-tessdata_Makefile.am34
-rw-r--r--graphics/tesseract/patches/patch-viewer_svutil_cpp13
5 files changed, 49 insertions, 24 deletions
diff --git a/graphics/tesseract/Makefile b/graphics/tesseract/Makefile
index 0fe9713c7c0..8462e94eca4 100644
--- a/graphics/tesseract/Makefile
+++ b/graphics/tesseract/Makefile
@@ -1,7 +1,6 @@
-# $NetBSD: Makefile,v 1.24 2017/02/12 06:25:31 ryoon Exp $
+# $NetBSD: Makefile,v 1.25 2017/02/21 17:51:18 fhajny Exp $
-DISTNAME= tesseract-3.04.01
-PKGREVISION= 4
+DISTNAME= tesseract-3.05.00
CATEGORIES= graphics
MASTER_SITES= ${MASTER_SITE_GITHUB:=tesseract-ocr/}
DISTFILES= ${DEFAULT_DISTFILES}
@@ -17,7 +16,7 @@ SITES.tessdata-${LANGVER}.tar.gz= -${MASTER_SITES:Q}tessdata/archive/${LANGVER}.
USE_LANGUAGES= c c++
USE_LIBTOOL= yes
-USE_TOOLS+= gmake pax pkg-config
+USE_TOOLS+= autoconf automake make pax pkg-config
GNU_CONFIGURE= yes
CONFIGURE_ENV+= LIBLEPT_HEADERSDIR=${BUILDLINK_PREFIX.leptonica}/include
@@ -29,6 +28,10 @@ INSTALLATION_DIRS= libexec share/doc/tesseract share/tesseract
post-extract:
${MV} ${WRKDIR}/tessdata-${LANGVER}/* ${WRKSRC}/tessdata
+# For some reason this release tarball lacks these
+pre-configure:
+ cd ${WRKSRC} && ${SETENV} ${CONFIGURE_ENV} ${SH} ./autogen.sh
+
# Building the training tools requires that the base build is done
post-build:
cd ${WRKSRC} && ${BUILD_MAKE_CMD} training
diff --git a/graphics/tesseract/PLIST b/graphics/tesseract/PLIST
index 4e1b486824a..4a53239f7a6 100644
--- a/graphics/tesseract/PLIST
+++ b/graphics/tesseract/PLIST
@@ -1,4 +1,4 @@
-@comment $NetBSD: PLIST,v 1.8 2016/03/17 12:51:14 fhajny Exp $
+@comment $NetBSD: PLIST,v 1.9 2017/02/21 17:51:18 fhajny Exp $
bin/ambiguous_words
bin/classifier_tester
bin/cntraining
@@ -91,6 +91,7 @@ share/tessdata/configs/pdf
share/tessdata/configs/quiet
share/tessdata/configs/rebox
share/tessdata/configs/strokewidth
+share/tessdata/configs/tsv
share/tessdata/configs/txt
share/tessdata/configs/unlv
share/tessdata/cym.traineddata
diff --git a/graphics/tesseract/distinfo b/graphics/tesseract/distinfo
index 2cb1e430f90..a06b960db72 100644
--- a/graphics/tesseract/distinfo
+++ b/graphics/tesseract/distinfo
@@ -1,12 +1,12 @@
-$NetBSD: distinfo,v 1.13 2016/03/17 12:51:14 fhajny Exp $
+$NetBSD: distinfo,v 1.14 2017/02/21 17:51:18 fhajny Exp $
SHA1 (tessdata-3.04.00.tar.gz) = 6ea24cccf0e823da98589ccc75d51f0950618236
RMD160 (tessdata-3.04.00.tar.gz) = 0a3c3b3c127b6031e2e037d78e3a6f159fb9e869
SHA512 (tessdata-3.04.00.tar.gz) = 4fbb66137c729e16c7a9e35b09916a45c1bb5ec5a7002a22647e0b10975362cb44c6d6c0c997baf25866f78749ec2d4a86317ec3fb664bd963243e230516d162
Size (tessdata-3.04.00.tar.gz) = 499088801 bytes
-SHA1 (tesseract-3.04.01.tar.gz) = 359ffc1925f0270ca100a2b4c1d3b41f4b23701d
-RMD160 (tesseract-3.04.01.tar.gz) = 5e754411afa74cfc4e6b601fe2c770ba93a25f23
-SHA512 (tesseract-3.04.01.tar.gz) = 816f6c29f8efeaf157891ddb022bd0daf8eb27c0cb8b6df5542e745d209f0ce617245e4583c59a561ed3216044d3afb38f8620ee94cdea656364923dcb140b5b
-Size (tesseract-3.04.01.tar.gz) = 2269105 bytes
+SHA1 (tesseract-3.05.00.tar.gz) = 93c036ce510a6ae40db4d2f30844d3dba1e68faa
+RMD160 (tesseract-3.05.00.tar.gz) = 39705f462aaa160582d9f2a7886e692712523f88
+SHA512 (tesseract-3.05.00.tar.gz) = 54ba96bd1b0d19883e8a0f717ecfddd459945f9dda0fd340e29e2dab3af94d0eceb7f3d207f34685ee97d1ec763efac5d94dec7e0498a7ca85fec35da4ed76b1
+Size (tesseract-3.05.00.tar.gz) = 3581853 bytes
+SHA1 (patch-tessdata_Makefile.am) = 013c9b4bbf64a0948a362d334e6b86a240aa944f
SHA1 (patch-viewer_scrollview.cpp) = 05a9ff5d2a9e302b3a682144db54c612fd4eccc2
-SHA1 (patch-viewer_svutil_cpp) = ec6f8b79c66993e5272a4f2a13aa92f2432cbea0
diff --git a/graphics/tesseract/patches/patch-tessdata_Makefile.am b/graphics/tesseract/patches/patch-tessdata_Makefile.am
new file mode 100644
index 00000000000..922206f961d
--- /dev/null
+++ b/graphics/tesseract/patches/patch-tessdata_Makefile.am
@@ -0,0 +1,34 @@
+$NetBSD: patch-tessdata_Makefile.am,v 1.1 2017/02/21 17:51:18 fhajny Exp $
+
+Revert a trunk commit that broke install-lang for tesseract<4.
+
+--- tessdata/Makefile.am.orig 2017-02-16 17:59:48.000000000 +0000
++++ tessdata/Makefile.am
+@@ -44,6 +44,27 @@ langdata = bul.traineddata mlt.trainedda
+ ita.cube.nn fra.cube.size eng.cube.bigrams ara.cube.lm \
+ rus.cube.nn spa.cube.nn hin.cube.bigrams
+
++.PHONY: install-langs
++install-langs:
++ @if [ ! -d $(DESTDIR)$(datadir) ]; then mkdir -p $(DESTDIR)$(datadir); fi;
++ @if test "${LANGS}" != ""; then \
++ for lang_code in ${LANGS}; do \
++ echo "installing data for $$lang_code"; \
++ $(INSTALL) -m 644 $(srcdir)/$$lang_code.* $(DESTDIR)$(datadir); \
++ done; \
++ else \
++ for l in ./*.traineddata; do \
++ filename=`basename $$l`; \
++ lang_code=$${filename%.*}; \
++ if test "$$lang_code" == "*"; then \
++ echo "No lang present."; \
++ break; \
++ fi; \
++ echo "installing data for $$lang_code"; \
++ $(INSTALL) -m 644 $(srcdir)/$$lang_code.* $(DESTDIR)$(datadir); \
++ done; \
++ fi;
++
+ uninstall-local:
+ cd $(DESTDIR)$(datadir); \
+ rm -f $(langdata)
diff --git a/graphics/tesseract/patches/patch-viewer_svutil_cpp b/graphics/tesseract/patches/patch-viewer_svutil_cpp
deleted file mode 100644
index addfb8535ee..00000000000
--- a/graphics/tesseract/patches/patch-viewer_svutil_cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-$NetBSD: patch-viewer_svutil_cpp,v 1.2 2012/11/23 23:52:33 marino Exp $
-
---- viewer/svutil.cpp~ 2009-06-03 16:29:38.000000000 +0000
-+++ viewer/svutil.cpp
-@@ -33,6 +33,8 @@
- #include <pthread.h>
- #include <semaphore.h>
- #include <signal.h>
-+#include <stdio.h>
-+#include <unistd.h>
- #include <stdlib.h>
- #include <string.h>
- #include <netdb.h>