summaryrefslogtreecommitdiff
path: root/mail
diff options
context:
space:
mode:
authorshannonjr <shannonjr>2008-09-08 10:31:23 +0000
committershannonjr <shannonjr>2008-09-08 10:31:23 +0000
commit2838723f28afc263e3b62b9ed5822adcca57bae5 (patch)
tree0c680084d53298baacf335f1b6be186ba6e57e7a /mail
parent5b46cb7ff4c1acb17855c3179d49f0d5b9cd1724 (diff)
downloadpkgsrc-2838723f28afc263e3b62b9ed5822adcca57bae5.tar.gz
OSBF-Lua (Orthogonal Sparse Bigrams with confidence Factor) is a Lua C module
for text classification. It is a port of the OSBF classifier implemented in the CRM114 project. This implementation attempts to put focus on the classification task itself by using Lua as the scripting language, a powerful yet light-weight and fast language, which makes it easier to build and test more elaborated filters and training methods. The OSBF algorithm is a typical Bayesian classifier but enhanced with two techniques originally developed for the CRM114 project: Orthogonal Sparse Bigrams - OSB, for feature extraction, and Exponential Differential Document Count - EDDC (a.k.a Confidence Factor), for automatic feature selection. Combined, these two techniques produce a highly accurate classifier. OSBF was developed focused on two classes, SPAM and NON-SPAM, so the performance for more than two classes may not be the same. spamfilter.lua is an anti-spam filter written in Lua using the OSBF-lua module. It takes special advantage of EDDC to introduce TONE-HR, a highly effective training method. The combination of OSB, EDDC and TONE-HR to enhance a classical Bayesian classifier resulted in the best spam filtering performance in TREC's Spam Track 2006 and the CEAS 2008 Live Spam Filter Challenge.
Diffstat (limited to 'mail')
-rw-r--r--mail/lua-OSBF/DESCR21
-rw-r--r--mail/lua-OSBF/Makefile36
-rw-r--r--mail/lua-OSBF/PLIST20
-rw-r--r--mail/lua-OSBF/distinfo7
-rw-r--r--mail/lua-OSBF/patches/patch-aa27
-rw-r--r--mail/lua-OSBF/patches/patch-bb13
6 files changed, 124 insertions, 0 deletions
diff --git a/mail/lua-OSBF/DESCR b/mail/lua-OSBF/DESCR
new file mode 100644
index 00000000000..b9a121a32f0
--- /dev/null
+++ b/mail/lua-OSBF/DESCR
@@ -0,0 +1,21 @@
+OSBF-Lua (Orthogonal Sparse Bigrams with confidence Factor) is a Lua C module
+for text classification. It is a port of the OSBF classifier implemented in
+the CRM114 project. This implementation attempts to put focus on the
+classification task itself by using Lua as the scripting language, a powerful
+yet light-weight and fast language, which makes it easier to build and test
+more elaborated filters and training methods.
+
+The OSBF algorithm is a typical Bayesian classifier but enhanced with two
+techniques originally developed for the CRM114 project: Orthogonal Sparse
+Bigrams - OSB, for feature extraction, and Exponential Differential Document
+Count - EDDC (a.k.a Confidence Factor), for automatic feature selection.
+Combined, these two techniques produce a highly accurate classifier. OSBF
+was developed focused on two classes, SPAM and NON-SPAM, so the performance
+for more than two classes may not be the same.
+
+spamfilter.lua is an anti-spam filter written in Lua using the OSBF-lua
+module. It takes special advantage of EDDC to introduce TONE-HR, a highly
+effective training method. The combination of OSB, EDDC and TONE-HR to
+enhance a classical Bayesian classifier resulted in the best spam filtering
+performance in TREC's Spam Track 2006 and the CEAS 2008 Live Spam Filter
+Challenge.
diff --git a/mail/lua-OSBF/Makefile b/mail/lua-OSBF/Makefile
new file mode 100644
index 00000000000..03d48e902e6
--- /dev/null
+++ b/mail/lua-OSBF/Makefile
@@ -0,0 +1,36 @@
+# $NetBSD: Makefile,v 1.1.1.1 2008/09/08 10:31:23 shannonjr Exp $
+#
+
+DISTNAME= osbf-lua-2.0.4
+CATEGORIES= local
+MASTER_SITES= http://luaforge.net/frs/download.php/2094/
+
+MAINTAINER= shannonjr@NetBSD.org
+HOMEPAGE= http://osbf-lua.luaforge.net/
+COMMENT= Lua C module for text classification
+
+USE_TOOLS+= gmake
+
+INSTALL_TARGET= install install_spamfilter
+
+SUBST_CLASSES+= path
+SUBST_STAGE.path= post-patch
+SUBST_MESSAGE.path= Fixing paths in config
+SUBST_FILES.path= config
+SUBST_FILES.path+= spamfilter/cache_report.lua
+SUBST_FILES.path+= spamfilter/classify.sample
+SUBST_FILES.path+= spamfilter/create_databases.lua
+SUBST_FILES.path+= spamfilter/database_status.lua
+SUBST_FILES.path+= spamfilter/getopt.lua
+SUBST_FILES.path+= spamfilter/promailrc.sample
+SUBST_FILES.path+= spamfilter/random.lua
+SUBST_FILES.path+= spamfilter/roc.lua
+SUBST_FILES.path+= spamfilter/spamfilter.lua
+SUBST_FILES.path+= spamfilter/spamfilter_commands.lua
+SUBST_FILES.path+= spamfilter/toer.lua
+SUBST_FILES.path+= spamfilter/train.sample
+SUBST_FILES.path+= docs/index.html
+SUBST_SED.path= -e 's,/usr/local,${PREFIX},g'
+
+.include "../../lang/lua/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/mail/lua-OSBF/PLIST b/mail/lua-OSBF/PLIST
new file mode 100644
index 00000000000..8c04beb26f6
--- /dev/null
+++ b/mail/lua-OSBF/PLIST
@@ -0,0 +1,20 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2008/09/08 10:31:23 shannonjr Exp $
+lib/lua/5.1/libosbf.so.${PKGVERSION}
+lib/lua/5.1/osbf.so
+osbf-lua/cache_report.lua
+osbf-lua/classify.sample
+osbf-lua/create_databases.lua
+osbf-lua/database_status.lua
+osbf-lua/getopt.lua
+osbf-lua/promailrc.sample
+osbf-lua/random.lua
+osbf-lua/roc.lua
+osbf-lua/spamfilter.help
+osbf-lua/spamfilter.lua
+osbf-lua/spamfilter_commands.lua
+osbf-lua/spamfilter_config.lua
+osbf-lua/toer.lua
+osbf-lua/train.sample
+@dirrm osbf-lua
+@dirrm lib/lua/5.1
+@dirrm lib/lua
diff --git a/mail/lua-OSBF/distinfo b/mail/lua-OSBF/distinfo
new file mode 100644
index 00000000000..f875f18fd05
--- /dev/null
+++ b/mail/lua-OSBF/distinfo
@@ -0,0 +1,7 @@
+$NetBSD: distinfo,v 1.1.1.1 2008/09/08 10:31:23 shannonjr Exp $
+
+SHA1 (osbf-lua-2.0.4.tar.gz) = 6fd4fb6496c20e9340cdcff4820c50a793e2ea27
+RMD160 (osbf-lua-2.0.4.tar.gz) = ba808072739de2bcb40ce81f0177ef7588508670
+Size (osbf-lua-2.0.4.tar.gz) = 82343 bytes
+SHA1 (patch-aa) = 25fe0abc6543893d88d147e418027cee9a544502
+SHA1 (patch-bb) = a03ceac6e22461359ef3a44564fde12a62056d82
diff --git a/mail/lua-OSBF/patches/patch-aa b/mail/lua-OSBF/patches/patch-aa
new file mode 100644
index 00000000000..9e27699c42f
--- /dev/null
+++ b/mail/lua-OSBF/patches/patch-aa
@@ -0,0 +1,27 @@
+$NetBSD: patch-aa,v 1.1.1.1 2008/09/08 10:31:23 shannonjr Exp $
+
+--- config.orig 2007-01-14 11:57:19.000000000 -0700
++++ config
+@@ -20,20 +20,8 @@ LIB_DIR= /usr/local/lib
+ # OS dependent
+ LIB_EXT= .so
+
+-# if this "autoconf" doesn't work for you, set LIB_OPTION for shared
+-# object manually.
+-LD=$(shell ld -V -o /dev/null 2>&1)
+-ifneq (,$(findstring Solaris,$(LD)))
+- # Solaris - tested with 2.6, gcc 2.95.3 20010315 and Solaris ld
+- LIB_OPTION= -G -dy
+-else
+- ifneq (,$(findstring GNU,$(LD)))
+- # GNU ld
+- LIB_OPTION= -shared -dy
+- else
+- $(error couldn't identify your ld. Please set the shared option manually)
+- endif
+-endif
++# GNU ld
++LIB_OPTION= -shared -dy
+
+ # Choose the PIC option
+ # safest, works on most systems
diff --git a/mail/lua-OSBF/patches/patch-bb b/mail/lua-OSBF/patches/patch-bb
new file mode 100644
index 00000000000..ac4aabbb332
--- /dev/null
+++ b/mail/lua-OSBF/patches/patch-bb
@@ -0,0 +1,13 @@
+$NetBSD: patch-bb,v 1.1.1.1 2008/09/08 10:31:23 shannonjr Exp $
+
+--- Makefile.orig 2007-01-14 11:57:19.000000000 -0700
++++ Makefile
+@@ -18,6 +18,8 @@ lib: $(LIBNAME)
+ $(LIBNAME): $(OBJS)
+ $(CC) $(CFLAGS) $(LIB_OPTION) -o $(LIBNAME) $(OBJS) $(LIBS)
+
++all: $(LIBNAME)
++
+ install: $(LIBNAME)
+ mkdir -p $(LUA_LIBDIR)
+ strip $(LIBNAME)