From 9204924f9dc55716ae0b7c86089afe9369b07025 Mon Sep 17 00:00:00 2001 From: agc Date: Mon, 14 May 2001 14:03:20 +0000 Subject: Initial import of doc2html-2.1 into the packages collection. Provided in PR 12884 by Jesse Off (joff@newmonics.com) "External converter script for ht://Dig (version 3.1.4 and later), that converts Microsoft Word, Excel and Powerpoint files, and PDF, PostScript, RTF, and WordPerfect files to text (in HTML form) so they can be indexed. Uses a variety of conversion programs: wp2html - to convert Wordperfect and Word7 & 97 documents to HTML catdoc - to extract text from Word documents rtf2html - to convert RTF documents to HTML pdftotext - to extract text from Adobe PDFs ps2ascii - to extract text from PostScript pptHtml - to convert Powerpoint files to HTML xlHtml - to convert Excel spreadsheets to HTML or xls2csv - to obtain data from Excel spreadsheets. Written by David Adams (University of Southampton), and based on the conv_doc.pl script by Gilles Detillieux." --- converters/doc2html/Makefile | 31 +++++++++++++++++ converters/doc2html/distinfo | 5 +++ converters/doc2html/patches/patch-aa | 65 ++++++++++++++++++++++++++++++++++++ converters/doc2html/pkg/DESCR | 17 ++++++++++ converters/doc2html/pkg/MESSAGE | 15 +++++++++ converters/doc2html/pkg/PLIST | 2 ++ 6 files changed, 135 insertions(+) create mode 100644 converters/doc2html/Makefile create mode 100644 converters/doc2html/distinfo create mode 100644 converters/doc2html/patches/patch-aa create mode 100644 converters/doc2html/pkg/DESCR create mode 100644 converters/doc2html/pkg/MESSAGE create mode 100644 converters/doc2html/pkg/PLIST (limited to 'converters') diff --git a/converters/doc2html/Makefile b/converters/doc2html/Makefile new file mode 100644 index 00000000000..fa49d9d49e9 --- /dev/null +++ b/converters/doc2html/Makefile @@ -0,0 +1,31 @@ +# $NetBSD: Makefile,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ +# + +DISTNAME= doc2html +PKGNAME= doc2html-2.1 +CATEGORIES= converters +MASTER_SITES= http://www.htdig.org/files/contrib/parsers/ + +MAINTAINER= packages@netbsd.org +HOMEPAGE= http://www.htdig.org +COMMENT= PERL external filter for htdig to convert numerous doc formats to HTML + +DEPENDS+= xlHtml-*:../../converters/xlHtml +DEPENDS+= rtf2html-*:../../converters/rtf2html +DEPENDS+= ghostscript{,-nox11}-[6-9]*:../../print/ghostscript +DEPENDS+= xpdf-*:../../graphics/xpdf +DEPENDS+= catdoc-*:../../textproc/catdoc + +USE_PERL5= YES + +do-build: + ${SED} -e "s%@@LOCALBASE@@%${LOCALBASE}%g" \ + -e "s%/usr/bin/perl%${PERL5}%g" \ + -e "s%/bin/sed%${SED}%g" \ + -e "s%@@X11BASE@@%${X11BASE}%g" < ${WRKSRC}/doc2html.pl \ + > ${WRKSRC}/doc2html + +do-install: + ${INSTALL_SCRIPT} ${WRKSRC}/doc2html ${PREFIX}/bin/doc2html + +.include "../../mk/bsd.pkg.mk" diff --git a/converters/doc2html/distinfo b/converters/doc2html/distinfo new file mode 100644 index 00000000000..d1fb850c8b7 --- /dev/null +++ b/converters/doc2html/distinfo @@ -0,0 +1,5 @@ +$NetBSD: distinfo,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +SHA1 (doc2html.tar.gz) = 78f78950a87f9134dc871d68e897857c7fa76dbc +Size (doc2html.tar.gz) = 12444 bytes +SHA1 (patch-aa) = 86ca749c41251845b06bab5fe59cdddcde01ab63 diff --git a/converters/doc2html/patches/patch-aa b/converters/doc2html/patches/patch-aa new file mode 100644 index 00000000000..1bd2b4476b2 --- /dev/null +++ b/converters/doc2html/patches/patch-aa @@ -0,0 +1,65 @@ +$NetBSD: patch-aa,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +--- doc2html.pl.orig Mon Sep 11 05:29:20 2000 ++++ doc2html.pl +@@ -20,48 +20,48 @@ + # If all else fails, attempts to read file without conversion. + + # wp2html binary +-$WP2HTML = "/opt/local/wp2html-3.2/bin/wp2html"; ++$WP2HTML = ""; + + # rtf2html converts Rich Text Font documents to HTML + # (get it from: http://www.res.bbsrc.ac.uk/wp2html/): +-$RTF2HTML = "/opt/local/rtf2html-1.1/bin/rtf2html"; ++$RTF2HTML = "@@LOCALBASE@@/bin/rtf2html"; + + # Catdoc converts MS Word to plain text + # (get it from: http://www.fe.msk.ru/~vitus/catdoc/): + + #version of catdoc for Word6, Word7 & Word97 files: +-$CATDOC = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOC = "@@LOCALBASE@@/bin/catdoc"; + + #version of catdoc for Word2 files: +-$CATDOC2 = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOC2 = "@@LOCALBASE@@/bin/catdoc"; + + #version of catdoc for Word 5.1 for MAC: +-$CATDOCM = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOCM = "@@LOCALBASE@@/bin/catdoc"; + + # PostScript to text converter + # (get it from the ghostscript 3.33 (or later) package): +-$CATPS = "/usr/freeware/bin/ps2ascii"; ++$CATPS = "@@LOCALBASE@@/bin/ps2ascii"; + + # add to search path the directory which contains gs: +-$ENV{PATH} .= ":/usr/freeware/bin"; ++# $ENV{PATH} .= ":/usr/freeware/bin"; + + # PDF to text converter and pdfinfo tool + # (get them from the xpdf package at http://www.foolabs.com/xpdf/): +-$CATPDF = "/opt/local/xpdf-0.9/bin/pdftotext"; +-$PDFINFO = "/opt/local/xpdf-0.9/bin/pdfinfo"; ++$CATPDF = "@@X11BASE@@/bin/pdftotext"; ++$PDFINFO = "@@X11BASE@@/bin/pdfinfo"; + + #Microsoft Excel to HTML converter + # (get it from www.xlHtml.org) +-$XLS2HTML = "/opt/local/xlHtml-0.2.7.2/bin/xlHtml"; ++$XLS2HTML = "@@LOCALBASE@@/bin/xlHtml"; + + #Microsoft Powerpoint to HTML converter + # (get it from www.xlHtml.org) +-$PPT2HTML = "/opt/local/xlHtml-0.2.7.2/bin/pptHtml"; ++$PPT2HTML = "@@LOCALBASE@@/bin/pptHtml"; + + #MicroSoft Excel to .CSV converter + # (you don't need this if you have xlHtml) + # (get it with catdoc) +-$CATXLS = "/opt/local/catdoc-0.91.2/bin/xls2csv"; ++$CATXLS = ""; + + ######################################################################################## + # Written by David Adams . diff --git a/converters/doc2html/pkg/DESCR b/converters/doc2html/pkg/DESCR new file mode 100644 index 00000000000..4e1130a0505 --- /dev/null +++ b/converters/doc2html/pkg/DESCR @@ -0,0 +1,17 @@ +External converter script for ht://Dig (version 3.1.4 and later), that +converts Microsoft Word, Excel and Powerpoint files, and PDF, +PostScript, RTF, and WordPerfect files to text (in HTML form) so they +can be indexed. Uses a variety of conversion programs: + + wp2html - to convert Wordperfect and Word7 & 97 documents to HTML + catdoc - to extract text from Word documents + rtf2html - to convert RTF documents to HTML + pdftotext - to extract text from Adobe PDFs + ps2ascii - to extract text from PostScript + pptHtml - to convert Powerpoint files to HTML + xlHtml - to convert Excel spreadsheets to HTML +or + xls2csv - to obtain data from Excel spreadsheets. + +Written by David Adams (University of Southampton), and based on the +conv_doc.pl script by Gilles Detillieux. diff --git a/converters/doc2html/pkg/MESSAGE b/converters/doc2html/pkg/MESSAGE new file mode 100644 index 00000000000..2092040533e --- /dev/null +++ b/converters/doc2html/pkg/MESSAGE @@ -0,0 +1,15 @@ +========================================================================== +$NetBSD: MESSAGE,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +To use with htdig, add the following to your htdig.conf file: + +external_parsers: application/rtf->text/html ${PREFIX}/bin/doc2html \ + text/rtf->text/html ${PREFIX}/bin/doc2html \ + application/pdf->text/html ${PREFIX}/bin/doc2html \ + application/postscript->text/html ${PREFIX}/bin/doc2html \ + application/msword->text/html ${PREFIX}/bin/doc2html \ + application/msexcel->text/html ${PREFIX}/bin/doc2html \ + application/vnd.ms-excel->text/html ${PREFIX}/bin/doc2html \ + application/vnd.ms-powerpoint->text/html ${PREFIX}/bin/doc2html + +========================================================================== diff --git a/converters/doc2html/pkg/PLIST b/converters/doc2html/pkg/PLIST new file mode 100644 index 00000000000..d2928613c09 --- /dev/null +++ b/converters/doc2html/pkg/PLIST @@ -0,0 +1,2 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ +bin/doc2html -- cgit v1.2.3