diff options
author | shannonjr <shannonjr> | 2008-10-13 11:29:53 +0000 |
---|---|---|
committer | shannonjr <shannonjr> | 2008-10-13 11:29:53 +0000 |
commit | 90bfda788faa89b1cb2f9c4a2b0afecd0f57f78b (patch) | |
tree | acffac4ce9b9c4dbfce6cfb6348a63a4eb1a158e /mail/OSBF-lua | |
parent | b93590833e1dafaec827a30edc218ec646dd9a5e (diff) | |
download | pkgsrc-90bfda788faa89b1cb2f9c4a2b0afecd0f57f78b.tar.gz |
Rename lua-OSBF to OSBL-lua for consistency with package name
Diffstat (limited to 'mail/OSBF-lua')
-rw-r--r-- | mail/OSBF-lua/DESCR | 21 | ||||
-rw-r--r-- | mail/OSBF-lua/Makefile | 37 | ||||
-rw-r--r-- | mail/OSBF-lua/PLIST | 18 | ||||
-rw-r--r-- | mail/OSBF-lua/distinfo | 7 | ||||
-rw-r--r-- | mail/OSBF-lua/patches/patch-aa | 27 | ||||
-rw-r--r-- | mail/OSBF-lua/patches/patch-bb | 13 |
6 files changed, 123 insertions, 0 deletions
diff --git a/mail/OSBF-lua/DESCR b/mail/OSBF-lua/DESCR new file mode 100644 index 00000000000..b9a121a32f0 --- /dev/null +++ b/mail/OSBF-lua/DESCR @@ -0,0 +1,21 @@ +OSBF-Lua (Orthogonal Sparse Bigrams with confidence Factor) is a Lua C module +for text classification. It is a port of the OSBF classifier implemented in +the CRM114 project. This implementation attempts to put focus on the +classification task itself by using Lua as the scripting language, a powerful +yet light-weight and fast language, which makes it easier to build and test +more elaborated filters and training methods. + +The OSBF algorithm is a typical Bayesian classifier but enhanced with two +techniques originally developed for the CRM114 project: Orthogonal Sparse +Bigrams - OSB, for feature extraction, and Exponential Differential Document +Count - EDDC (a.k.a Confidence Factor), for automatic feature selection. +Combined, these two techniques produce a highly accurate classifier. OSBF +was developed focused on two classes, SPAM and NON-SPAM, so the performance +for more than two classes may not be the same. + +spamfilter.lua is an anti-spam filter written in Lua using the OSBF-lua +module. It takes special advantage of EDDC to introduce TONE-HR, a highly +effective training method. The combination of OSB, EDDC and TONE-HR to +enhance a classical Bayesian classifier resulted in the best spam filtering +performance in TREC's Spam Track 2006 and the CEAS 2008 Live Spam Filter +Challenge. diff --git a/mail/OSBF-lua/Makefile b/mail/OSBF-lua/Makefile new file mode 100644 index 00000000000..1f11f9e5b58 --- /dev/null +++ b/mail/OSBF-lua/Makefile @@ -0,0 +1,37 @@ +# $NetBSD: Makefile,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $ +# + +DISTNAME= osbf-lua-2.0.4 +CATEGORIES= mail +MASTER_SITES= http://luaforge.net/frs/download.php/2094/ + +MAINTAINER= shannonjr@NetBSD.org +HOMEPAGE= http://osbf-lua.luaforge.net/ +COMMENT= Lua C module for text classification +#LICENSE= gnu-gpl-v2 + +USE_TOOLS+= gmake + +INSTALL_TARGET= install install_spamfilter + +SUBST_CLASSES+= path +SUBST_STAGE.path= post-patch +SUBST_MESSAGE.path= Fixing paths in config +SUBST_FILES.path= config +SUBST_FILES.path+= spamfilter/cache_report.lua +SUBST_FILES.path+= spamfilter/classify.sample +SUBST_FILES.path+= spamfilter/create_databases.lua +SUBST_FILES.path+= spamfilter/database_status.lua +SUBST_FILES.path+= spamfilter/getopt.lua +SUBST_FILES.path+= spamfilter/promailrc.sample +SUBST_FILES.path+= spamfilter/random.lua +SUBST_FILES.path+= spamfilter/roc.lua +SUBST_FILES.path+= spamfilter/spamfilter.lua +SUBST_FILES.path+= spamfilter/spamfilter_commands.lua +SUBST_FILES.path+= spamfilter/toer.lua +SUBST_FILES.path+= spamfilter/train.sample +SUBST_FILES.path+= docs/index.html +SUBST_SED.path= -e 's,/usr/local,${PREFIX},g' + +.include "../../lang/lua/buildlink3.mk" +.include "../../mk/bsd.pkg.mk" diff --git a/mail/OSBF-lua/PLIST b/mail/OSBF-lua/PLIST new file mode 100644 index 00000000000..4be9d3472f7 --- /dev/null +++ b/mail/OSBF-lua/PLIST @@ -0,0 +1,18 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $ +lib/lua/5.1/libosbf.so.${PKGVERSION} +lib/lua/5.1/osbf.so +osbf-lua/cache_report.lua +osbf-lua/classify.sample +osbf-lua/create_databases.lua +osbf-lua/database_status.lua +osbf-lua/getopt.lua +osbf-lua/promailrc.sample +osbf-lua/random.lua +osbf-lua/roc.lua +osbf-lua/spamfilter.help +osbf-lua/spamfilter.lua +osbf-lua/spamfilter_commands.lua +osbf-lua/spamfilter_config.lua +osbf-lua/toer.lua +osbf-lua/train.sample +@dirrm osbf-lua diff --git a/mail/OSBF-lua/distinfo b/mail/OSBF-lua/distinfo new file mode 100644 index 00000000000..e8b386647cc --- /dev/null +++ b/mail/OSBF-lua/distinfo @@ -0,0 +1,7 @@ +$NetBSD: distinfo,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $ + +SHA1 (osbf-lua-2.0.4.tar.gz) = 6fd4fb6496c20e9340cdcff4820c50a793e2ea27 +RMD160 (osbf-lua-2.0.4.tar.gz) = ba808072739de2bcb40ce81f0177ef7588508670 +Size (osbf-lua-2.0.4.tar.gz) = 82343 bytes +SHA1 (patch-aa) = 25fe0abc6543893d88d147e418027cee9a544502 +SHA1 (patch-bb) = a03ceac6e22461359ef3a44564fde12a62056d82 diff --git a/mail/OSBF-lua/patches/patch-aa b/mail/OSBF-lua/patches/patch-aa new file mode 100644 index 00000000000..a27b1644a28 --- /dev/null +++ b/mail/OSBF-lua/patches/patch-aa @@ -0,0 +1,27 @@ +$NetBSD: patch-aa,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $ + +--- config.orig 2007-01-14 11:57:19.000000000 -0700 ++++ config +@@ -20,20 +20,8 @@ LIB_DIR= /usr/local/lib + # OS dependent + LIB_EXT= .so + +-# if this "autoconf" doesn't work for you, set LIB_OPTION for shared +-# object manually. +-LD=$(shell ld -V -o /dev/null 2>&1) +-ifneq (,$(findstring Solaris,$(LD))) +- # Solaris - tested with 2.6, gcc 2.95.3 20010315 and Solaris ld +- LIB_OPTION= -G -dy +-else +- ifneq (,$(findstring GNU,$(LD))) +- # GNU ld +- LIB_OPTION= -shared -dy +- else +- $(error couldn't identify your ld. Please set the shared option manually) +- endif +-endif ++# GNU ld ++LIB_OPTION= -shared -dy + + # Choose the PIC option + # safest, works on most systems diff --git a/mail/OSBF-lua/patches/patch-bb b/mail/OSBF-lua/patches/patch-bb new file mode 100644 index 00000000000..cdad522acd9 --- /dev/null +++ b/mail/OSBF-lua/patches/patch-bb @@ -0,0 +1,13 @@ +$NetBSD: patch-bb,v 1.1.1.1 2008/10/13 11:29:53 shannonjr Exp $ + +--- Makefile.orig 2007-01-14 11:57:19.000000000 -0700 ++++ Makefile +@@ -18,6 +18,8 @@ lib: $(LIBNAME) + $(LIBNAME): $(OBJS) + $(CC) $(CFLAGS) $(LIB_OPTION) -o $(LIBNAME) $(OBJS) $(LIBS) + ++all: $(LIBNAME) ++ + install: $(LIBNAME) + mkdir -p $(LUA_LIBDIR) + strip $(LIBNAME) |