diff options
Diffstat (limited to 'www')
-rw-r--r-- | www/crawl/DESCR | 16 | ||||
-rw-r--r-- | www/crawl/Makefile | 30 | ||||
-rw-r--r-- | www/crawl/PLIST | 5 | ||||
-rw-r--r-- | www/crawl/distinfo | 7 | ||||
-rw-r--r-- | www/crawl/patches/patch-aa | 19 | ||||
-rw-r--r-- | www/crawl/patches/patch-ab | 14 | ||||
-rw-r--r-- | www/crawl/patches/patch-ac | 13 |
7 files changed, 104 insertions, 0 deletions
diff --git a/www/crawl/DESCR b/www/crawl/DESCR new file mode 100644 index 00000000000..854815c3ab0 --- /dev/null +++ b/www/crawl/DESCR @@ -0,0 +1,16 @@ +The crawl utility starts a depth-first traversal of the web at the specified +URLs. It stores all JPEG images that match the configured constraints. +Crawl is fairly fast and allows for graceful termination. After terminating +crawl, it is possible to restart it at exactly the same spot where it was +terminated. Crawl keeps a persistent database that allows multiple crawls +without revisiting sites. + +The main features of crawl are: + + * Saves encountered images or other media types + * Media selection based on regular expressions and size contraints + * Resume previous crawl after graceful termination + * Persistent database of visited URLs + * Very small and efficient code + * Asynchronous DNS lookups + * Supports robots.txt diff --git a/www/crawl/Makefile b/www/crawl/Makefile new file mode 100644 index 00000000000..17166b1a98e --- /dev/null +++ b/www/crawl/Makefile @@ -0,0 +1,30 @@ +# $NetBSD: Makefile,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $ + +DISTNAME= crawl-0.4 +CATEGORIES= www +MASTER_SITES= http://monkey.org/~provos/ + +MAINTAINER= peter@pointless.nl +HOMEPAGE= http://monkey.org/~provos/crawl/ +COMMENT= Small and efficient HTTP crawler + +GNU_CONFIGURE= yes +USE_PKGINSTALL= yes +USE_BUILDLINK3= yes +USE_DB185= yes + +CONF_FILES= ${PREFIX}/share/examples/${PKGBASE}/crawl.conf ${PKG_SYSCONFDIR}/crawl.conf + +post-install: + ${INSTALL_DATA_DIR} ${PREFIX}/share/examples/${PKGBASE} + ${INSTALL_DATA} ${WRKSRC}/crawl.conf ${PREFIX}/share/examples/${PKGBASE}/crawl.conf + +SUBST_CLASSES= path +SUBST_STAGE.path= post-patch +SUBST_FILES.path= cfg.h +SUBST_SED.path= -e 's,crawl.conf,${PKG_SYSCONFDIR}/crawl.conf,g' +SUBST_MESSAGE.path= "Fixing hardcoded path." + +.include "../../devel/libevent/buildlink3.mk" +.include "../../mk/bdb.buildlink3.mk" +.include "../../mk/bsd.pkg.mk" diff --git a/www/crawl/PLIST b/www/crawl/PLIST new file mode 100644 index 00000000000..af0eb30128f --- /dev/null +++ b/www/crawl/PLIST @@ -0,0 +1,5 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $ +bin/crawl +man/man1/crawl.1 +share/examples/${PKGBASE}/crawl.conf +@dirrm share/examples/${PKGBASE} diff --git a/www/crawl/distinfo b/www/crawl/distinfo new file mode 100644 index 00000000000..a5fd8ee0979 --- /dev/null +++ b/www/crawl/distinfo @@ -0,0 +1,7 @@ +$NetBSD: distinfo,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $ + +SHA1 (crawl-0.4.tar.gz) = b53be27b572ba6a88ab80243b177873aed0b314b +Size (crawl-0.4.tar.gz) = 111084 bytes +SHA1 (patch-aa) = 874cb3b73cbc56e320c58039ecc9fd98ab258a0b +SHA1 (patch-ab) = 9c934c5c7f03e4acbd02222a30267aded4d01e26 +SHA1 (patch-ac) = 079c792e55fa3e60dead7ff9c1c46132d01a00d4 diff --git a/www/crawl/patches/patch-aa b/www/crawl/patches/patch-aa new file mode 100644 index 00000000000..01fbfe2b1bf --- /dev/null +++ b/www/crawl/patches/patch-aa @@ -0,0 +1,19 @@ +$NetBSD: patch-aa,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $ + +--- configure.orig 2003-05-18 03:50:55.000000000 +0200 ++++ configure 2004-06-11 23:51:00.000000000 +0200 +@@ -2669,6 +2669,14 @@ + DBINC="-I/usr/include/db2" + DBLIB="-ldb2" + have_db=yes ++ elif test -f /usr/include/db1/db.h; then ++ ++cat >>confdefs.h <<\_ACEOF ++#define HAVE_DB1_H 1 ++_ACEOF ++ ++ DBLIB="-ldb" ++ have_db=yes + elif test -f /usr/include/db_185.h; then + + cat >>confdefs.h <<\_ACEOF diff --git a/www/crawl/patches/patch-ab b/www/crawl/patches/patch-ab new file mode 100644 index 00000000000..c3ba1e7ce79 --- /dev/null +++ b/www/crawl/patches/patch-ab @@ -0,0 +1,14 @@ +$NetBSD: patch-ab,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $ + +--- config.h.in.orig 2003-05-18 02:54:45.000000000 +0200 ++++ config.h.in 2004-06-12 00:06:58.000000000 +0200 +@@ -42,6 +42,9 @@ + /* Define if your system has libdb */ + #undef HAVE_DB_H + ++/* Define if your system has libdb */ ++#undef HAVE_DB1_H ++ + /* Define to 1 if you have the `dirname' function. */ + #undef HAVE_DIRNAME + diff --git a/www/crawl/patches/patch-ac b/www/crawl/patches/patch-ac new file mode 100644 index 00000000000..6812a77a969 --- /dev/null +++ b/www/crawl/patches/patch-ac @@ -0,0 +1,13 @@ +$NetBSD: patch-ac,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $ + +--- crawldb.c.orig 2003-05-17 18:59:51.000000000 +0200 ++++ crawldb.c 2004-06-11 23:56:47.000000000 +0200 +@@ -44,6 +44,8 @@ + #include <db_185.h> + #elif HAVE_DB_H + #include <db.h> ++#elif HAVE_DB1_H ++#include <db1/db.h> + #endif + #include <compat/md5.h> + |