summaryrefslogtreecommitdiff
path: root/mail/OSBF-lua
diff options
context:
space:
mode:
authorshannonjr <shannonjr@pkgsrc.org>2008-10-13 11:29:53 +0000
committershannonjr <shannonjr@pkgsrc.org>2008-10-13 11:29:53 +0000
commit0756a64028e43be62e5d6cbae4d34b3b254ff2c1 (patch)
treeacffac4ce9b9c4dbfce6cfb6348a63a4eb1a158e /mail/OSBF-lua
parentbbeebe2dbf0bd52727b6d2781d7e2dc6ea13da39 (diff)
downloadpkgsrc-0756a64028e43be62e5d6cbae4d34b3b254ff2c1.tar.gz
Rename lua-OSBF to OSBL-lua for consistency with package name
Diffstat (limited to 'mail/OSBF-lua')
-rw-r--r--mail/OSBF-lua/DESCR21
-rw-r--r--mail/OSBF-lua/Makefile37
-rw-r--r--mail/OSBF-lua/PLIST18
-rw-r--r--mail/OSBF-lua/distinfo7
-rw-r--r--mail/OSBF-lua/patches/patch-aa27
-rw-r--r--mail/OSBF-lua/patches/patch-bb13
6 files changed, 123 insertions, 0 deletions
diff --git a/mail/OSBF-lua/DESCR b/mail/OSBF-lua/DESCR
new file mode 100644
index 00000000000..b9a121a32f0
--- /dev/null
+++ b/mail/OSBF-lua/DESCR
@@ -0,0 +1,21 @@
+OSBF-Lua (Orthogonal Sparse Bigrams with confidence Factor) is a Lua C module
+for text classification. It is a port of the OSBF classifier implemented in
+the CRM114 project. This implementation attempts to put focus on the
+classification task itself by using Lua as the scripting language, a powerful
+yet light-weight and fast language, which makes it easier to build and test
+more elaborated filters and training methods.
+
+The OSBF algorithm is a typical Bayesian classifier but enhanced with two
+techniques originally developed for the CRM114 project: Orthogonal Sparse
+Bigrams - OSB, for feature extraction, and Exponential Differential Document
+Count - EDDC (a.k.a Confidence Factor), for automatic feature selection.
+Combined, these two techniques produce a highly accurate classifier. OSBF
+was developed focused on two classes, SPAM and NON-SPAM, so the performance
+for more than two classes may not be the same.
+
+spamfilter.lua is an anti-spam filter written in Lua using the OSBF-lua
+module. It takes special advantage of EDDC to introduce TONE-HR, a highly
+effective training method. The combination of OSB, EDDC and TONE-HR to
+enhance a classical Bayesian classifier resulted in the best spam filtering
+performance in TREC's Spam Track 2006 and the CEAS 2008 Live Spam Filter
+Challenge.
diff --git a/mail/OSBF-lua/Makefile b/mail/OSBF-lua/Makefile
new file mode 100644
index 00000000000..1f11f9e5b58
--- /dev/null
+++ b/mail/OSBF-lua/Makefile
@@ -0,0 +1,37 @@
+# $NetBSD: Makefile,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $
+#
+
+DISTNAME= osbf-lua-2.0.4
+CATEGORIES= mail
+MASTER_SITES= http://luaforge.net/frs/download.php/2094/
+
+MAINTAINER= shannonjr@NetBSD.org
+HOMEPAGE= http://osbf-lua.luaforge.net/
+COMMENT= Lua C module for text classification
+#LICENSE= gnu-gpl-v2
+
+USE_TOOLS+= gmake
+
+INSTALL_TARGET= install install_spamfilter
+
+SUBST_CLASSES+= path
+SUBST_STAGE.path= post-patch
+SUBST_MESSAGE.path= Fixing paths in config
+SUBST_FILES.path= config
+SUBST_FILES.path+= spamfilter/cache_report.lua
+SUBST_FILES.path+= spamfilter/classify.sample
+SUBST_FILES.path+= spamfilter/create_databases.lua
+SUBST_FILES.path+= spamfilter/database_status.lua
+SUBST_FILES.path+= spamfilter/getopt.lua
+SUBST_FILES.path+= spamfilter/promailrc.sample
+SUBST_FILES.path+= spamfilter/random.lua
+SUBST_FILES.path+= spamfilter/roc.lua
+SUBST_FILES.path+= spamfilter/spamfilter.lua
+SUBST_FILES.path+= spamfilter/spamfilter_commands.lua
+SUBST_FILES.path+= spamfilter/toer.lua
+SUBST_FILES.path+= spamfilter/train.sample
+SUBST_FILES.path+= docs/index.html
+SUBST_SED.path= -e 's,/usr/local,${PREFIX},g'
+
+.include "../../lang/lua/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/mail/OSBF-lua/PLIST b/mail/OSBF-lua/PLIST
new file mode 100644
index 00000000000..4be9d3472f7
--- /dev/null
+++ b/mail/OSBF-lua/PLIST
@@ -0,0 +1,18 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $
+lib/lua/5.1/libosbf.so.${PKGVERSION}
+lib/lua/5.1/osbf.so
+osbf-lua/cache_report.lua
+osbf-lua/classify.sample
+osbf-lua/create_databases.lua
+osbf-lua/database_status.lua
+osbf-lua/getopt.lua
+osbf-lua/promailrc.sample
+osbf-lua/random.lua
+osbf-lua/roc.lua
+osbf-lua/spamfilter.help
+osbf-lua/spamfilter.lua
+osbf-lua/spamfilter_commands.lua
+osbf-lua/spamfilter_config.lua
+osbf-lua/toer.lua
+osbf-lua/train.sample
+@dirrm osbf-lua
diff --git a/mail/OSBF-lua/distinfo b/mail/OSBF-lua/distinfo
new file mode 100644
index 00000000000..e8b386647cc
--- /dev/null
+++ b/mail/OSBF-lua/distinfo
@@ -0,0 +1,7 @@
+$NetBSD: distinfo,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $
+
+SHA1 (osbf-lua-2.0.4.tar.gz) = 6fd4fb6496c20e9340cdcff4820c50a793e2ea27
+RMD160 (osbf-lua-2.0.4.tar.gz) = ba808072739de2bcb40ce81f0177ef7588508670
+Size (osbf-lua-2.0.4.tar.gz) = 82343 bytes
+SHA1 (patch-aa) = 25fe0abc6543893d88d147e418027cee9a544502
+SHA1 (patch-bb) = a03ceac6e22461359ef3a44564fde12a62056d82
diff --git a/mail/OSBF-lua/patches/patch-aa b/mail/OSBF-lua/patches/patch-aa
new file mode 100644
index 00000000000..a27b1644a28
--- /dev/null
+++ b/mail/OSBF-lua/patches/patch-aa
@@ -0,0 +1,27 @@
+$NetBSD: patch-aa,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $
+
+--- config.orig 2007-01-14 11:57:19.000000000 -0700
++++ config
+@@ -20,20 +20,8 @@ LIB_DIR= /usr/local/lib
+ # OS dependent
+ LIB_EXT= .so
+
+-# if this "autoconf" doesn't work for you, set LIB_OPTION for shared
+-# object manually.
+-LD=$(shell ld -V -o /dev/null 2>&1)
+-ifneq (,$(findstring Solaris,$(LD)))
+- # Solaris - tested with 2.6, gcc 2.95.3 20010315 and Solaris ld
+- LIB_OPTION= -G -dy
+-else
+- ifneq (,$(findstring GNU,$(LD)))
+- # GNU ld
+- LIB_OPTION= -shared -dy
+- else
+- $(error couldn't identify your ld. Please set the shared option manually)
+- endif
+-endif
++# GNU ld
++LIB_OPTION= -shared -dy
+
+ # Choose the PIC option
+ # safest, works on most systems
diff --git a/mail/OSBF-lua/patches/patch-bb b/mail/OSBF-lua/patches/patch-bb
new file mode 100644
index 00000000000..cdad522acd9
--- /dev/null
+++ b/mail/OSBF-lua/patches/patch-bb
@@ -0,0 +1,13 @@
+$NetBSD: patch-bb,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $
+
+--- Makefile.orig 2007-01-14 11:57:19.000000000 -0700
++++ Makefile
+@@ -18,6 +18,8 @@ lib: $(LIBNAME)
+ $(LIBNAME): $(OBJS)
+ $(CC) $(CFLAGS) $(LIB_OPTION) -o $(LIBNAME) $(OBJS) $(LIBS)
+
++all: $(LIBNAME)
++
+ install: $(LIBNAME)
+ mkdir -p $(LUA_LIBDIR)
+ strip $(LIBNAME)