diff options
author | agc <agc@pkgsrc.org> | 2001-05-14 14:03:20 +0000 |
---|---|---|
committer | agc <agc@pkgsrc.org> | 2001-05-14 14:03:20 +0000 |
commit | 9204924f9dc55716ae0b7c86089afe9369b07025 (patch) | |
tree | 21443ead9fdef073932ff126438e7e2241ba04a6 /converters | |
parent | e870d601510254c33660b6b4855a3fbeec019da0 (diff) | |
download | pkgsrc-9204924f9dc55716ae0b7c86089afe9369b07025.tar.gz |
Initial import of doc2html-2.1 into the packages collection.
Provided in PR 12884 by Jesse Off (joff@newmonics.com)
"External converter script for ht://Dig (version 3.1.4 and later), that
converts Microsoft Word, Excel and Powerpoint files, and PDF,
PostScript, RTF, and WordPerfect files to text (in HTML form) so they
can be indexed. Uses a variety of conversion programs:
wp2html - to convert Wordperfect and Word7 & 97 documents to HTML
catdoc - to extract text from Word documents
rtf2html - to convert RTF documents to HTML
pdftotext - to extract text from Adobe PDFs
ps2ascii - to extract text from PostScript
pptHtml - to convert Powerpoint files to HTML
xlHtml - to convert Excel spreadsheets to HTML
or
xls2csv - to obtain data from Excel spreadsheets.
Written by David Adams (University of Southampton), and based on the
conv_doc.pl script by Gilles Detillieux."
Diffstat (limited to 'converters')
-rw-r--r-- | converters/doc2html/Makefile | 31 | ||||
-rw-r--r-- | converters/doc2html/distinfo | 5 | ||||
-rw-r--r-- | converters/doc2html/patches/patch-aa | 65 | ||||
-rw-r--r-- | converters/doc2html/pkg/DESCR | 17 | ||||
-rw-r--r-- | converters/doc2html/pkg/MESSAGE | 15 | ||||
-rw-r--r-- | converters/doc2html/pkg/PLIST | 2 |
6 files changed, 135 insertions, 0 deletions
diff --git a/converters/doc2html/Makefile b/converters/doc2html/Makefile new file mode 100644 index 00000000000..fa49d9d49e9 --- /dev/null +++ b/converters/doc2html/Makefile @@ -0,0 +1,31 @@ +# $NetBSD: Makefile,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ +# + +DISTNAME= doc2html +PKGNAME= doc2html-2.1 +CATEGORIES= converters +MASTER_SITES= http://www.htdig.org/files/contrib/parsers/ + +MAINTAINER= packages@netbsd.org +HOMEPAGE= http://www.htdig.org +COMMENT= PERL external filter for htdig to convert numerous doc formats to HTML + +DEPENDS+= xlHtml-*:../../converters/xlHtml +DEPENDS+= rtf2html-*:../../converters/rtf2html +DEPENDS+= ghostscript{,-nox11}-[6-9]*:../../print/ghostscript +DEPENDS+= xpdf-*:../../graphics/xpdf +DEPENDS+= catdoc-*:../../textproc/catdoc + +USE_PERL5= YES + +do-build: + ${SED} -e "s%@@LOCALBASE@@%${LOCALBASE}%g" \ + -e "s%/usr/bin/perl%${PERL5}%g" \ + -e "s%/bin/sed%${SED}%g" \ + -e "s%@@X11BASE@@%${X11BASE}%g" < ${WRKSRC}/doc2html.pl \ + > ${WRKSRC}/doc2html + +do-install: + ${INSTALL_SCRIPT} ${WRKSRC}/doc2html ${PREFIX}/bin/doc2html + +.include "../../mk/bsd.pkg.mk" diff --git a/converters/doc2html/distinfo b/converters/doc2html/distinfo new file mode 100644 index 00000000000..d1fb850c8b7 --- /dev/null +++ b/converters/doc2html/distinfo @@ -0,0 +1,5 @@ +$NetBSD: distinfo,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +SHA1 (doc2html.tar.gz) = 78f78950a87f9134dc871d68e897857c7fa76dbc +Size (doc2html.tar.gz) = 12444 bytes +SHA1 (patch-aa) = 86ca749c41251845b06bab5fe59cdddcde01ab63 diff --git a/converters/doc2html/patches/patch-aa b/converters/doc2html/patches/patch-aa new file mode 100644 index 00000000000..1bd2b4476b2 --- /dev/null +++ b/converters/doc2html/patches/patch-aa @@ -0,0 +1,65 @@ +$NetBSD: patch-aa,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +--- doc2html.pl.orig Mon Sep 11 05:29:20 2000 ++++ doc2html.pl +@@ -20,48 +20,48 @@ + # If all else fails, attempts to read file without conversion. + + # wp2html binary +-$WP2HTML = "/opt/local/wp2html-3.2/bin/wp2html"; ++$WP2HTML = ""; + + # rtf2html converts Rich Text Font documents to HTML + # (get it from: http://www.res.bbsrc.ac.uk/wp2html/): +-$RTF2HTML = "/opt/local/rtf2html-1.1/bin/rtf2html"; ++$RTF2HTML = "@@LOCALBASE@@/bin/rtf2html"; + + # Catdoc converts MS Word to plain text + # (get it from: http://www.fe.msk.ru/~vitus/catdoc/): + + #version of catdoc for Word6, Word7 & Word97 files: +-$CATDOC = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOC = "@@LOCALBASE@@/bin/catdoc"; + + #version of catdoc for Word2 files: +-$CATDOC2 = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOC2 = "@@LOCALBASE@@/bin/catdoc"; + + #version of catdoc for Word 5.1 for MAC: +-$CATDOCM = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOCM = "@@LOCALBASE@@/bin/catdoc"; + + # PostScript to text converter + # (get it from the ghostscript 3.33 (or later) package): +-$CATPS = "/usr/freeware/bin/ps2ascii"; ++$CATPS = "@@LOCALBASE@@/bin/ps2ascii"; + + # add to search path the directory which contains gs: +-$ENV{PATH} .= ":/usr/freeware/bin"; ++# $ENV{PATH} .= ":/usr/freeware/bin"; + + # PDF to text converter and pdfinfo tool + # (get them from the xpdf package at http://www.foolabs.com/xpdf/): +-$CATPDF = "/opt/local/xpdf-0.9/bin/pdftotext"; +-$PDFINFO = "/opt/local/xpdf-0.9/bin/pdfinfo"; ++$CATPDF = "@@X11BASE@@/bin/pdftotext"; ++$PDFINFO = "@@X11BASE@@/bin/pdfinfo"; + + #Microsoft Excel to HTML converter + # (get it from www.xlHtml.org) +-$XLS2HTML = "/opt/local/xlHtml-0.2.7.2/bin/xlHtml"; ++$XLS2HTML = "@@LOCALBASE@@/bin/xlHtml"; + + #Microsoft Powerpoint to HTML converter + # (get it from www.xlHtml.org) +-$PPT2HTML = "/opt/local/xlHtml-0.2.7.2/bin/pptHtml"; ++$PPT2HTML = "@@LOCALBASE@@/bin/pptHtml"; + + #MicroSoft Excel to .CSV converter + # (you don't need this if you have xlHtml) + # (get it with catdoc) +-$CATXLS = "/opt/local/catdoc-0.91.2/bin/xls2csv"; ++$CATXLS = ""; + + ######################################################################################## + # Written by David Adams <d.j.adams@soton.ac.uk>. diff --git a/converters/doc2html/pkg/DESCR b/converters/doc2html/pkg/DESCR new file mode 100644 index 00000000000..4e1130a0505 --- /dev/null +++ b/converters/doc2html/pkg/DESCR @@ -0,0 +1,17 @@ +External converter script for ht://Dig (version 3.1.4 and later), that +converts Microsoft Word, Excel and Powerpoint files, and PDF, +PostScript, RTF, and WordPerfect files to text (in HTML form) so they +can be indexed. Uses a variety of conversion programs: + + wp2html - to convert Wordperfect and Word7 & 97 documents to HTML + catdoc - to extract text from Word documents + rtf2html - to convert RTF documents to HTML + pdftotext - to extract text from Adobe PDFs + ps2ascii - to extract text from PostScript + pptHtml - to convert Powerpoint files to HTML + xlHtml - to convert Excel spreadsheets to HTML +or + xls2csv - to obtain data from Excel spreadsheets. + +Written by David Adams (University of Southampton), and based on the +conv_doc.pl script by Gilles Detillieux. diff --git a/converters/doc2html/pkg/MESSAGE b/converters/doc2html/pkg/MESSAGE new file mode 100644 index 00000000000..2092040533e --- /dev/null +++ b/converters/doc2html/pkg/MESSAGE @@ -0,0 +1,15 @@ +========================================================================== +$NetBSD: MESSAGE,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +To use with htdig, add the following to your htdig.conf file: + +external_parsers: application/rtf->text/html ${PREFIX}/bin/doc2html \ + text/rtf->text/html ${PREFIX}/bin/doc2html \ + application/pdf->text/html ${PREFIX}/bin/doc2html \ + application/postscript->text/html ${PREFIX}/bin/doc2html \ + application/msword->text/html ${PREFIX}/bin/doc2html \ + application/msexcel->text/html ${PREFIX}/bin/doc2html \ + application/vnd.ms-excel->text/html ${PREFIX}/bin/doc2html \ + application/vnd.ms-powerpoint->text/html ${PREFIX}/bin/doc2html + +========================================================================== diff --git a/converters/doc2html/pkg/PLIST b/converters/doc2html/pkg/PLIST new file mode 100644 index 00000000000..d2928613c09 --- /dev/null +++ b/converters/doc2html/pkg/PLIST @@ -0,0 +1,2 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ +bin/doc2html |