summaryrefslogtreecommitdiff
path: root/converters
diff options
context:
space:
mode:
Diffstat (limited to 'converters')
-rw-r--r--converters/doc2html/Makefile31
-rw-r--r--converters/doc2html/distinfo5
-rw-r--r--converters/doc2html/patches/patch-aa65
-rw-r--r--converters/doc2html/pkg/DESCR17
-rw-r--r--converters/doc2html/pkg/MESSAGE15
-rw-r--r--converters/doc2html/pkg/PLIST2
6 files changed, 135 insertions, 0 deletions
diff --git a/converters/doc2html/Makefile b/converters/doc2html/Makefile
new file mode 100644
index 00000000000..fa49d9d49e9
--- /dev/null
+++ b/converters/doc2html/Makefile
@@ -0,0 +1,31 @@
+# $NetBSD: Makefile,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $
+#
+
+DISTNAME= doc2html
+PKGNAME= doc2html-2.1
+CATEGORIES= converters
+MASTER_SITES= http://www.htdig.org/files/contrib/parsers/
+
+MAINTAINER= packages@netbsd.org
+HOMEPAGE= http://www.htdig.org
+COMMENT= PERL external filter for htdig to convert numerous doc formats to HTML
+
+DEPENDS+= xlHtml-*:../../converters/xlHtml
+DEPENDS+= rtf2html-*:../../converters/rtf2html
+DEPENDS+= ghostscript{,-nox11}-[6-9]*:../../print/ghostscript
+DEPENDS+= xpdf-*:../../graphics/xpdf
+DEPENDS+= catdoc-*:../../textproc/catdoc
+
+USE_PERL5= YES
+
+do-build:
+ ${SED} -e "s%@@LOCALBASE@@%${LOCALBASE}%g" \
+ -e "s%/usr/bin/perl%${PERL5}%g" \
+ -e "s%/bin/sed%${SED}%g" \
+ -e "s%@@X11BASE@@%${X11BASE}%g" < ${WRKSRC}/doc2html.pl \
+ > ${WRKSRC}/doc2html
+
+do-install:
+ ${INSTALL_SCRIPT} ${WRKSRC}/doc2html ${PREFIX}/bin/doc2html
+
+.include "../../mk/bsd.pkg.mk"
diff --git a/converters/doc2html/distinfo b/converters/doc2html/distinfo
new file mode 100644
index 00000000000..d1fb850c8b7
--- /dev/null
+++ b/converters/doc2html/distinfo
@@ -0,0 +1,5 @@
+$NetBSD: distinfo,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $
+
+SHA1 (doc2html.tar.gz) = 78f78950a87f9134dc871d68e897857c7fa76dbc
+Size (doc2html.tar.gz) = 12444 bytes
+SHA1 (patch-aa) = 86ca749c41251845b06bab5fe59cdddcde01ab63
diff --git a/converters/doc2html/patches/patch-aa b/converters/doc2html/patches/patch-aa
new file mode 100644
index 00000000000..1bd2b4476b2
--- /dev/null
+++ b/converters/doc2html/patches/patch-aa
@@ -0,0 +1,65 @@
+$NetBSD: patch-aa,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $
+
+--- doc2html.pl.orig Mon Sep 11 05:29:20 2000
++++ doc2html.pl
+@@ -20,48 +20,48 @@
+ # If all else fails, attempts to read file without conversion.
+
+ # wp2html binary
+-$WP2HTML = "/opt/local/wp2html-3.2/bin/wp2html";
++$WP2HTML = "";
+
+ # rtf2html converts Rich Text Font documents to HTML
+ # (get it from: http://www.res.bbsrc.ac.uk/wp2html/):
+-$RTF2HTML = "/opt/local/rtf2html-1.1/bin/rtf2html";
++$RTF2HTML = "@@LOCALBASE@@/bin/rtf2html";
+
+ # Catdoc converts MS Word to plain text
+ # (get it from: http://www.fe.msk.ru/~vitus/catdoc/):
+
+ #version of catdoc for Word6, Word7 & Word97 files:
+-$CATDOC = "/opt/local/catdoc-0.91.4/bin/catdoc";
++$CATDOC = "@@LOCALBASE@@/bin/catdoc";
+
+ #version of catdoc for Word2 files:
+-$CATDOC2 = "/opt/local/catdoc-0.91.4/bin/catdoc";
++$CATDOC2 = "@@LOCALBASE@@/bin/catdoc";
+
+ #version of catdoc for Word 5.1 for MAC:
+-$CATDOCM = "/opt/local/catdoc-0.91.4/bin/catdoc";
++$CATDOCM = "@@LOCALBASE@@/bin/catdoc";
+
+ # PostScript to text converter
+ # (get it from the ghostscript 3.33 (or later) package):
+-$CATPS = "/usr/freeware/bin/ps2ascii";
++$CATPS = "@@LOCALBASE@@/bin/ps2ascii";
+
+ # add to search path the directory which contains gs:
+-$ENV{PATH} .= ":/usr/freeware/bin";
++# $ENV{PATH} .= ":/usr/freeware/bin";
+
+ # PDF to text converter and pdfinfo tool
+ # (get them from the xpdf package at http://www.foolabs.com/xpdf/):
+-$CATPDF = "/opt/local/xpdf-0.9/bin/pdftotext";
+-$PDFINFO = "/opt/local/xpdf-0.9/bin/pdfinfo";
++$CATPDF = "@@X11BASE@@/bin/pdftotext";
++$PDFINFO = "@@X11BASE@@/bin/pdfinfo";
+
+ #Microsoft Excel to HTML converter
+ # (get it from www.xlHtml.org)
+-$XLS2HTML = "/opt/local/xlHtml-0.2.7.2/bin/xlHtml";
++$XLS2HTML = "@@LOCALBASE@@/bin/xlHtml";
+
+ #Microsoft Powerpoint to HTML converter
+ # (get it from www.xlHtml.org)
+-$PPT2HTML = "/opt/local/xlHtml-0.2.7.2/bin/pptHtml";
++$PPT2HTML = "@@LOCALBASE@@/bin/pptHtml";
+
+ #MicroSoft Excel to .CSV converter
+ # (you don't need this if you have xlHtml)
+ # (get it with catdoc)
+-$CATXLS = "/opt/local/catdoc-0.91.2/bin/xls2csv";
++$CATXLS = "";
+
+ ########################################################################################
+ # Written by David Adams <d.j.adams@soton.ac.uk>.
diff --git a/converters/doc2html/pkg/DESCR b/converters/doc2html/pkg/DESCR
new file mode 100644
index 00000000000..4e1130a0505
--- /dev/null
+++ b/converters/doc2html/pkg/DESCR
@@ -0,0 +1,17 @@
+External converter script for ht://Dig (version 3.1.4 and later), that
+converts Microsoft Word, Excel and Powerpoint files, and PDF,
+PostScript, RTF, and WordPerfect files to text (in HTML form) so they
+can be indexed. Uses a variety of conversion programs:
+
+ wp2html - to convert Wordperfect and Word7 & 97 documents to HTML
+ catdoc - to extract text from Word documents
+ rtf2html - to convert RTF documents to HTML
+ pdftotext - to extract text from Adobe PDFs
+ ps2ascii - to extract text from PostScript
+ pptHtml - to convert Powerpoint files to HTML
+ xlHtml - to convert Excel spreadsheets to HTML
+or
+ xls2csv - to obtain data from Excel spreadsheets.
+
+Written by David Adams (University of Southampton), and based on the
+conv_doc.pl script by Gilles Detillieux.
diff --git a/converters/doc2html/pkg/MESSAGE b/converters/doc2html/pkg/MESSAGE
new file mode 100644
index 00000000000..2092040533e
--- /dev/null
+++ b/converters/doc2html/pkg/MESSAGE
@@ -0,0 +1,15 @@
+==========================================================================
+$NetBSD: MESSAGE,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $
+
+To use with htdig, add the following to your htdig.conf file:
+
+external_parsers: application/rtf->text/html ${PREFIX}/bin/doc2html \
+ text/rtf->text/html ${PREFIX}/bin/doc2html \
+ application/pdf->text/html ${PREFIX}/bin/doc2html \
+ application/postscript->text/html ${PREFIX}/bin/doc2html \
+ application/msword->text/html ${PREFIX}/bin/doc2html \
+ application/msexcel->text/html ${PREFIX}/bin/doc2html \
+ application/vnd.ms-excel->text/html ${PREFIX}/bin/doc2html \
+ application/vnd.ms-powerpoint->text/html ${PREFIX}/bin/doc2html
+
+==========================================================================
diff --git a/converters/doc2html/pkg/PLIST b/converters/doc2html/pkg/PLIST
new file mode 100644
index 00000000000..d2928613c09
--- /dev/null
+++ b/converters/doc2html/pkg/PLIST
@@ -0,0 +1,2 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $
+bin/doc2html