diff options
Diffstat (limited to 'converters')
-rw-r--r-- | converters/doc2html/Makefile | 31 | ||||
-rw-r--r-- | converters/doc2html/distinfo | 5 | ||||
-rw-r--r-- | converters/doc2html/patches/patch-aa | 65 | ||||
-rw-r--r-- | converters/doc2html/pkg/DESCR | 17 | ||||
-rw-r--r-- | converters/doc2html/pkg/MESSAGE | 15 | ||||
-rw-r--r-- | converters/doc2html/pkg/PLIST | 2 |
6 files changed, 135 insertions, 0 deletions
diff --git a/converters/doc2html/Makefile b/converters/doc2html/Makefile new file mode 100644 index 00000000000..fa49d9d49e9 --- /dev/null +++ b/converters/doc2html/Makefile @@ -0,0 +1,31 @@ +# $NetBSD: Makefile,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ +# + +DISTNAME= doc2html +PKGNAME= doc2html-2.1 +CATEGORIES= converters +MASTER_SITES= http://www.htdig.org/files/contrib/parsers/ + +MAINTAINER= packages@netbsd.org +HOMEPAGE= http://www.htdig.org +COMMENT= PERL external filter for htdig to convert numerous doc formats to HTML + +DEPENDS+= xlHtml-*:../../converters/xlHtml +DEPENDS+= rtf2html-*:../../converters/rtf2html +DEPENDS+= ghostscript{,-nox11}-[6-9]*:../../print/ghostscript +DEPENDS+= xpdf-*:../../graphics/xpdf +DEPENDS+= catdoc-*:../../textproc/catdoc + +USE_PERL5= YES + +do-build: + ${SED} -e "s%@@LOCALBASE@@%${LOCALBASE}%g" \ + -e "s%/usr/bin/perl%${PERL5}%g" \ + -e "s%/bin/sed%${SED}%g" \ + -e "s%@@X11BASE@@%${X11BASE}%g" < ${WRKSRC}/doc2html.pl \ + > ${WRKSRC}/doc2html + +do-install: + ${INSTALL_SCRIPT} ${WRKSRC}/doc2html ${PREFIX}/bin/doc2html + +.include "../../mk/bsd.pkg.mk" diff --git a/converters/doc2html/distinfo b/converters/doc2html/distinfo new file mode 100644 index 00000000000..d1fb850c8b7 --- /dev/null +++ b/converters/doc2html/distinfo @@ -0,0 +1,5 @@ +$NetBSD: distinfo,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +SHA1 (doc2html.tar.gz) = 78f78950a87f9134dc871d68e897857c7fa76dbc +Size (doc2html.tar.gz) = 12444 bytes +SHA1 (patch-aa) = 86ca749c41251845b06bab5fe59cdddcde01ab63 diff --git a/converters/doc2html/patches/patch-aa b/converters/doc2html/patches/patch-aa new file mode 100644 index 00000000000..1bd2b4476b2 --- /dev/null +++ b/converters/doc2html/patches/patch-aa @@ -0,0 +1,65 @@ +$NetBSD: patch-aa,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +--- doc2html.pl.orig Mon Sep 11 05:29:20 2000 ++++ doc2html.pl +@@ -20,48 +20,48 @@ + # If all else fails, attempts to read file without conversion. + + # wp2html binary +-$WP2HTML = "/opt/local/wp2html-3.2/bin/wp2html"; ++$WP2HTML = ""; + + # rtf2html converts Rich Text Font documents to HTML + # (get it from: http://www.res.bbsrc.ac.uk/wp2html/): +-$RTF2HTML = "/opt/local/rtf2html-1.1/bin/rtf2html"; ++$RTF2HTML = "@@LOCALBASE@@/bin/rtf2html"; + + # Catdoc converts MS Word to plain text + # (get it from: http://www.fe.msk.ru/~vitus/catdoc/): + + #version of catdoc for Word6, Word7 & Word97 files: +-$CATDOC = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOC = "@@LOCALBASE@@/bin/catdoc"; + + #version of catdoc for Word2 files: +-$CATDOC2 = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOC2 = "@@LOCALBASE@@/bin/catdoc"; + + #version of catdoc for Word 5.1 for MAC: +-$CATDOCM = "/opt/local/catdoc-0.91.4/bin/catdoc"; ++$CATDOCM = "@@LOCALBASE@@/bin/catdoc"; + + # PostScript to text converter + # (get it from the ghostscript 3.33 (or later) package): +-$CATPS = "/usr/freeware/bin/ps2ascii"; ++$CATPS = "@@LOCALBASE@@/bin/ps2ascii"; + + # add to search path the directory which contains gs: +-$ENV{PATH} .= ":/usr/freeware/bin"; ++# $ENV{PATH} .= ":/usr/freeware/bin"; + + # PDF to text converter and pdfinfo tool + # (get them from the xpdf package at http://www.foolabs.com/xpdf/): +-$CATPDF = "/opt/local/xpdf-0.9/bin/pdftotext"; +-$PDFINFO = "/opt/local/xpdf-0.9/bin/pdfinfo"; ++$CATPDF = "@@X11BASE@@/bin/pdftotext"; ++$PDFINFO = "@@X11BASE@@/bin/pdfinfo"; + + #Microsoft Excel to HTML converter + # (get it from www.xlHtml.org) +-$XLS2HTML = "/opt/local/xlHtml-0.2.7.2/bin/xlHtml"; ++$XLS2HTML = "@@LOCALBASE@@/bin/xlHtml"; + + #Microsoft Powerpoint to HTML converter + # (get it from www.xlHtml.org) +-$PPT2HTML = "/opt/local/xlHtml-0.2.7.2/bin/pptHtml"; ++$PPT2HTML = "@@LOCALBASE@@/bin/pptHtml"; + + #MicroSoft Excel to .CSV converter + # (you don't need this if you have xlHtml) + # (get it with catdoc) +-$CATXLS = "/opt/local/catdoc-0.91.2/bin/xls2csv"; ++$CATXLS = ""; + + ######################################################################################## + # Written by David Adams <d.j.adams@soton.ac.uk>. diff --git a/converters/doc2html/pkg/DESCR b/converters/doc2html/pkg/DESCR new file mode 100644 index 00000000000..4e1130a0505 --- /dev/null +++ b/converters/doc2html/pkg/DESCR @@ -0,0 +1,17 @@ +External converter script for ht://Dig (version 3.1.4 and later), that +converts Microsoft Word, Excel and Powerpoint files, and PDF, +PostScript, RTF, and WordPerfect files to text (in HTML form) so they +can be indexed. Uses a variety of conversion programs: + + wp2html - to convert Wordperfect and Word7 & 97 documents to HTML + catdoc - to extract text from Word documents + rtf2html - to convert RTF documents to HTML + pdftotext - to extract text from Adobe PDFs + ps2ascii - to extract text from PostScript + pptHtml - to convert Powerpoint files to HTML + xlHtml - to convert Excel spreadsheets to HTML +or + xls2csv - to obtain data from Excel spreadsheets. + +Written by David Adams (University of Southampton), and based on the +conv_doc.pl script by Gilles Detillieux. diff --git a/converters/doc2html/pkg/MESSAGE b/converters/doc2html/pkg/MESSAGE new file mode 100644 index 00000000000..2092040533e --- /dev/null +++ b/converters/doc2html/pkg/MESSAGE @@ -0,0 +1,15 @@ +========================================================================== +$NetBSD: MESSAGE,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ + +To use with htdig, add the following to your htdig.conf file: + +external_parsers: application/rtf->text/html ${PREFIX}/bin/doc2html \ + text/rtf->text/html ${PREFIX}/bin/doc2html \ + application/pdf->text/html ${PREFIX}/bin/doc2html \ + application/postscript->text/html ${PREFIX}/bin/doc2html \ + application/msword->text/html ${PREFIX}/bin/doc2html \ + application/msexcel->text/html ${PREFIX}/bin/doc2html \ + application/vnd.ms-excel->text/html ${PREFIX}/bin/doc2html \ + application/vnd.ms-powerpoint->text/html ${PREFIX}/bin/doc2html + +========================================================================== diff --git a/converters/doc2html/pkg/PLIST b/converters/doc2html/pkg/PLIST new file mode 100644 index 00000000000..d2928613c09 --- /dev/null +++ b/converters/doc2html/pkg/PLIST @@ -0,0 +1,2 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2001/05/14 14:03:20 agc Exp $ +bin/doc2html |