summaryrefslogtreecommitdiff
path: root/www
diff options
context:
space:
mode:
Diffstat (limited to 'www')
-rw-r--r--www/crawl/DESCR16
-rw-r--r--www/crawl/Makefile30
-rw-r--r--www/crawl/PLIST5
-rw-r--r--www/crawl/distinfo7
-rw-r--r--www/crawl/patches/patch-aa19
-rw-r--r--www/crawl/patches/patch-ab14
-rw-r--r--www/crawl/patches/patch-ac13
7 files changed, 104 insertions, 0 deletions
diff --git a/www/crawl/DESCR b/www/crawl/DESCR
new file mode 100644
index 00000000000..854815c3ab0
--- /dev/null
+++ b/www/crawl/DESCR
@@ -0,0 +1,16 @@
+The crawl utility starts a depth-first traversal of the web at the specified
+URLs. It stores all JPEG images that match the configured constraints.
+Crawl is fairly fast and allows for graceful termination. After terminating
+crawl, it is possible to restart it at exactly the same spot where it was
+terminated. Crawl keeps a persistent database that allows multiple crawls
+without revisiting sites.
+
+The main features of crawl are:
+
+ * Saves encountered images or other media types
+ * Media selection based on regular expressions and size contraints
+ * Resume previous crawl after graceful termination
+ * Persistent database of visited URLs
+ * Very small and efficient code
+ * Asynchronous DNS lookups
+ * Supports robots.txt
diff --git a/www/crawl/Makefile b/www/crawl/Makefile
new file mode 100644
index 00000000000..17166b1a98e
--- /dev/null
+++ b/www/crawl/Makefile
@@ -0,0 +1,30 @@
+# $NetBSD: Makefile,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $
+
+DISTNAME= crawl-0.4
+CATEGORIES= www
+MASTER_SITES= http://monkey.org/~provos/
+
+MAINTAINER= peter@pointless.nl
+HOMEPAGE= http://monkey.org/~provos/crawl/
+COMMENT= Small and efficient HTTP crawler
+
+GNU_CONFIGURE= yes
+USE_PKGINSTALL= yes
+USE_BUILDLINK3= yes
+USE_DB185= yes
+
+CONF_FILES= ${PREFIX}/share/examples/${PKGBASE}/crawl.conf ${PKG_SYSCONFDIR}/crawl.conf
+
+post-install:
+ ${INSTALL_DATA_DIR} ${PREFIX}/share/examples/${PKGBASE}
+ ${INSTALL_DATA} ${WRKSRC}/crawl.conf ${PREFIX}/share/examples/${PKGBASE}/crawl.conf
+
+SUBST_CLASSES= path
+SUBST_STAGE.path= post-patch
+SUBST_FILES.path= cfg.h
+SUBST_SED.path= -e 's,crawl.conf,${PKG_SYSCONFDIR}/crawl.conf,g'
+SUBST_MESSAGE.path= "Fixing hardcoded path."
+
+.include "../../devel/libevent/buildlink3.mk"
+.include "../../mk/bdb.buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/www/crawl/PLIST b/www/crawl/PLIST
new file mode 100644
index 00000000000..af0eb30128f
--- /dev/null
+++ b/www/crawl/PLIST
@@ -0,0 +1,5 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $
+bin/crawl
+man/man1/crawl.1
+share/examples/${PKGBASE}/crawl.conf
+@dirrm share/examples/${PKGBASE}
diff --git a/www/crawl/distinfo b/www/crawl/distinfo
new file mode 100644
index 00000000000..a5fd8ee0979
--- /dev/null
+++ b/www/crawl/distinfo
@@ -0,0 +1,7 @@
+$NetBSD: distinfo,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $
+
+SHA1 (crawl-0.4.tar.gz) = b53be27b572ba6a88ab80243b177873aed0b314b
+Size (crawl-0.4.tar.gz) = 111084 bytes
+SHA1 (patch-aa) = 874cb3b73cbc56e320c58039ecc9fd98ab258a0b
+SHA1 (patch-ab) = 9c934c5c7f03e4acbd02222a30267aded4d01e26
+SHA1 (patch-ac) = 079c792e55fa3e60dead7ff9c1c46132d01a00d4
diff --git a/www/crawl/patches/patch-aa b/www/crawl/patches/patch-aa
new file mode 100644
index 00000000000..01fbfe2b1bf
--- /dev/null
+++ b/www/crawl/patches/patch-aa
@@ -0,0 +1,19 @@
+$NetBSD: patch-aa,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $
+
+--- configure.orig 2003-05-18 03:50:55.000000000 +0200
++++ configure 2004-06-11 23:51:00.000000000 +0200
+@@ -2669,6 +2669,14 @@
+ DBINC="-I/usr/include/db2"
+ DBLIB="-ldb2"
+ have_db=yes
++ elif test -f /usr/include/db1/db.h; then
++
++cat >>confdefs.h <<\_ACEOF
++#define HAVE_DB1_H 1
++_ACEOF
++
++ DBLIB="-ldb"
++ have_db=yes
+ elif test -f /usr/include/db_185.h; then
+
+ cat >>confdefs.h <<\_ACEOF
diff --git a/www/crawl/patches/patch-ab b/www/crawl/patches/patch-ab
new file mode 100644
index 00000000000..c3ba1e7ce79
--- /dev/null
+++ b/www/crawl/patches/patch-ab
@@ -0,0 +1,14 @@
+$NetBSD: patch-ab,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $
+
+--- config.h.in.orig 2003-05-18 02:54:45.000000000 +0200
++++ config.h.in 2004-06-12 00:06:58.000000000 +0200
+@@ -42,6 +42,9 @@
+ /* Define if your system has libdb */
+ #undef HAVE_DB_H
+
++/* Define if your system has libdb */
++#undef HAVE_DB1_H
++
+ /* Define to 1 if you have the `dirname' function. */
+ #undef HAVE_DIRNAME
+
diff --git a/www/crawl/patches/patch-ac b/www/crawl/patches/patch-ac
new file mode 100644
index 00000000000..6812a77a969
--- /dev/null
+++ b/www/crawl/patches/patch-ac
@@ -0,0 +1,13 @@
+$NetBSD: patch-ac,v 1.1.1.1 2005/01/18 17:46:31 peter Exp $
+
+--- crawldb.c.orig 2003-05-17 18:59:51.000000000 +0200
++++ crawldb.c 2004-06-11 23:56:47.000000000 +0200
+@@ -44,6 +44,8 @@
+ #include <db_185.h>
+ #elif HAVE_DB_H
+ #include <db.h>
++#elif HAVE_DB1_H
++#include <db1/db.h>
+ #endif
+ #include <compat/md5.h>
+