summaryrefslogtreecommitdiff
path: root/math
diff options
context:
space:
mode:
authorcheusov <cheusov@pkgsrc.org>2014-10-29 17:06:40 +0000
committercheusov <cheusov@pkgsrc.org>2014-10-29 17:06:40 +0000
commit7d3a4376cb48055b7edbf40cf09fd39c287d9665 (patch)
treeb62dcbafd4cb5c9dcac5d2e0603fc7a83078b99d /math
parent2d883486e69c9e80c3e2b29b8fa7141dbc212cba (diff)
downloadpkgsrc-7d3a4376cb48055b7edbf40cf09fd39c287d9665.tar.gz
LibShortText is an open source tool for short-text classification and
analysis. It can handle the classification of, for example, titles, questions, sentences, and short messages. Main features of LibShortText include * It is more efficient than general text-mining packages. On a typical computer, processing and training 10 million short texts takes only around half an hour. * The fast training and testing is built upon the linear classifier * LIBLINEAR * Default options often work well without tedious tuning. * An interactive tool for error analysis is included. Based on the property that each short text contains few words, LibShortText provides details in predicting each text.
Diffstat (limited to 'math')
-rw-r--r--math/libshorttext/DESCR12
-rw-r--r--math/libshorttext/Makefile62
-rw-r--r--math/libshorttext/PLIST35
-rw-r--r--math/libshorttext/distinfo6
-rw-r--r--math/libshorttext/patches/patch-text-train.py17
5 files changed, 132 insertions, 0 deletions
diff --git a/math/libshorttext/DESCR b/math/libshorttext/DESCR
new file mode 100644
index 00000000000..cdfa5f8087d
--- /dev/null
+++ b/math/libshorttext/DESCR
@@ -0,0 +1,12 @@
+LibShortText is an open source tool for short-text classification and
+analysis. It can handle the classification of, for example, titles,
+questions, sentences, and short messages. Main features of
+LibShortText include
+ * It is more efficient than general text-mining packages. On a
+ typical computer, processing and training 10 million short texts
+ takes only around half an hour.
+ * The fast training and testing is built upon the linear classifier LIBLINEAR
+ * Default options often work well without tedious tuning.
+ * An interactive tool for error analysis is included. Based on the
+ property that each short text contains few words, LibShortText
+ provides details in predicting each text.
diff --git a/math/libshorttext/Makefile b/math/libshorttext/Makefile
new file mode 100644
index 00000000000..7a0e9e8f549
--- /dev/null
+++ b/math/libshorttext/Makefile
@@ -0,0 +1,62 @@
+# $NetBSD: Makefile,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $
+
+DISTNAME= libshorttext-1.1
+CATEGORIES= math textproc
+MASTER_SITES= http://www.csie.ntu.edu.tw/~cjlin/libshorttext/
+
+MAINTAINER= cheusov@NetBSD.org
+HOMEPAGE= http://www.csie.ntu.edu.tw/~cjlin/libshorttext/
+COMMENT= Library for short-text classification and analysis
+LICENSE= modified-bsd
+
+DEPENDS+= liblinear-[0-9]*:../../math/liblinear \
+ libsvm-[0-9]*:../../math/libsvm
+
+USE_LANGUAGES= c c++
+
+REPLACE_PYTHON= *.py demo/*.py ${PYUTILS}
+PLIST_SUBST+= PYSITELIB=${PYSITELIB}
+INSTALLATION_DIRS= bin share/examples/libshorttext/demo \
+ share/doc/libshorttext ${ADDITIONAL_FILES:H:S,^,${PYSITELIB}/,:O:u}
+
+PYUTILS+= \
+ libshorttext/classifier/classifier_impl.py \
+ libshorttext/classifier/learner/learner_impl.py \
+ libshorttext/analyzer/analyzer_impl.py \
+ libshorttext/analyzer/selector.py \
+ libshorttext/classifier/learner/liblinear/python/liblinear.py \
+ libshorttext/classifier/learner/liblinear/python/liblinearutil.py \
+ libshorttext/converter/converter_impl.py \
+ libshorttext/converter/stemmer/porter.py \
+ libshorttext/classifier/grid.py
+
+ADDITIONAL_FILES+= \
+ ${PYUTILS} \
+ libshorttext/analyzer/__init__.py \
+ libshorttext/converter/stop-words/stoplist-nsp.regex.pickle \
+ libshorttext/converter/stop-words/stoplist-nsp.regex \
+ libshorttext/converter/stemmer/__init__.py \
+ libshorttext/converter/stemmer/porter.so.1 \
+ libshorttext/converter/__init__.py \
+ libshorttext/__init__.py \
+ libshorttext/classifier/__init__.py \
+ libshorttext/classifier/learner/liblinear/train \
+ libshorttext/classifier/learner/liblinear/predict \
+ libshorttext/classifier/learner/liblinear/liblinear.so.1 \
+ libshorttext/classifier/learner/__init__.py \
+ libshorttext/classifier/learner/util.so.1
+
+do-install:
+ set -e; cd ${WRKSRC}; \
+ ${INSTALL_SCRIPT} text-predict.py text-train.py text2svm.py \
+ ${DESTDIR}${PREFIX}/bin; \
+ ${INSTALL_DATA} README \
+ ${DESTDIR}${PREFIX}/share/doc/${PKGBASE}; \
+ ${CP} -Rp demo ${DESTDIR}${PREFIX}/share/examples/${PKGBASE}
+.for f in ${ADDITIONAL_FILES}
+ ${CP} -p ${WRKSRC}/${f} ${DESTDIR}${PREFIX}/${PYSITELIB}/${f}
+.endfor
+
+.include "../../lang/python/extension.mk"
+.include "../../lang/python/application.mk"
+.include "../../mk/bsd.pkg.mk"
diff --git a/math/libshorttext/PLIST b/math/libshorttext/PLIST
new file mode 100644
index 00000000000..bd4831f4a60
--- /dev/null
+++ b/math/libshorttext/PLIST
@@ -0,0 +1,35 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $
+bin/text-predict.py
+bin/text-train.py
+bin/text2svm.py
+share/doc/libshorttext/README
+share/examples/libshorttext/demo/demo.py
+share/examples/libshorttext/demo/demo.sh
+share/examples/libshorttext/demo/test_feats1
+share/examples/libshorttext/demo/test_feats2
+share/examples/libshorttext/demo/test_file
+share/examples/libshorttext/demo/train_feats1
+share/examples/libshorttext/demo/train_feats2
+share/examples/libshorttext/demo/train_file
+${PYSITELIB}/libshorttext/analyzer/analyzer_impl.py
+${PYSITELIB}/libshorttext/__init__.py
+${PYSITELIB}/libshorttext/analyzer/__init__.py
+${PYSITELIB}/libshorttext/analyzer/selector.py
+${PYSITELIB}/libshorttext/classifier/__init__.py
+${PYSITELIB}/libshorttext/classifier/classifier_impl.py
+${PYSITELIB}/libshorttext/classifier/grid.py
+${PYSITELIB}/libshorttext/classifier/learner/__init__.py
+${PYSITELIB}/libshorttext/classifier/learner/learner_impl.py
+${PYSITELIB}/libshorttext/classifier/learner/liblinear/liblinear.so.1
+${PYSITELIB}/libshorttext/classifier/learner/liblinear/predict
+${PYSITELIB}/libshorttext/classifier/learner/liblinear/python/liblinear.py
+${PYSITELIB}/libshorttext/classifier/learner/liblinear/python/liblinearutil.py
+${PYSITELIB}/libshorttext/classifier/learner/liblinear/train
+${PYSITELIB}/libshorttext/classifier/learner/util.so.1
+${PYSITELIB}/libshorttext/converter/__init__.py
+${PYSITELIB}/libshorttext/converter/converter_impl.py
+${PYSITELIB}/libshorttext/converter/stemmer/__init__.py
+${PYSITELIB}/libshorttext/converter/stemmer/porter.py
+${PYSITELIB}/libshorttext/converter/stemmer/porter.so.1
+${PYSITELIB}/libshorttext/converter/stop-words/stoplist-nsp.regex
+${PYSITELIB}/libshorttext/converter/stop-words/stoplist-nsp.regex.pickle
diff --git a/math/libshorttext/distinfo b/math/libshorttext/distinfo
new file mode 100644
index 00000000000..0d8ed751caf
--- /dev/null
+++ b/math/libshorttext/distinfo
@@ -0,0 +1,6 @@
+$NetBSD: distinfo,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $
+
+SHA1 (libshorttext-1.1.tar.gz) = 2d9705195682fa1f25de30bd66711685f974a8c0
+RMD160 (libshorttext-1.1.tar.gz) = 569d2f2a64f8fc311766b08cbef7086e1340ce55
+Size (libshorttext-1.1.tar.gz) = 817775 bytes
+SHA1 (patch-text-train.py) = 6b99bdfcfd69a9d7e83cafc852202dc23485dfeb
diff --git a/math/libshorttext/patches/patch-text-train.py b/math/libshorttext/patches/patch-text-train.py
new file mode 100644
index 00000000000..3843f381712
--- /dev/null
+++ b/math/libshorttext/patches/patch-text-train.py
@@ -0,0 +1,17 @@
+$NetBSD: patch-text-train.py,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $
+
+# differnt executables are provided by libsvm and liblinear packages
+--- text-train.py.orig 2013-09-09 08:53:54.000000000 +0000
++++ text-train.py
+@@ -150,9 +150,9 @@ if __name__ == '__main__':
+ extra_svm_files += [value]
+ elif argv[i] == '-x':
+ if value.lower() == 'grid':
+- system(path.dirname(LIBLINEAR_HOME) + '/../grid.py')
++ system('svm-grid')
+ elif value.lower() == 'liblinear':
+- system(LIBLINEAR_HOME + '/train')
++ system('liblinear-train')
+ else:
+ stderr.write('Error: Invalid usage of option -x. No command ' + value + '\n')
+ exit_with_help()