diff options
author | cheusov <cheusov@pkgsrc.org> | 2014-10-29 17:06:40 +0000 |
---|---|---|
committer | cheusov <cheusov@pkgsrc.org> | 2014-10-29 17:06:40 +0000 |
commit | 2ecf8ca2090297aa34ddb3aacbc219670d9efc11 (patch) | |
tree | b62dcbafd4cb5c9dcac5d2e0603fc7a83078b99d /math/libshorttext | |
parent | 33bea8f957d29ab76f94ff21f3221bfe027d6eff (diff) | |
download | pkgsrc-2ecf8ca2090297aa34ddb3aacbc219670d9efc11.tar.gz |
LibShortText is an open source tool for short-text classification and
analysis. It can handle the classification of, for example, titles,
questions, sentences, and short messages. Main features of
LibShortText include
* It is more efficient than general text-mining packages. On a
typical computer, processing and training 10 million short texts
takes only around half an hour.
* The fast training and testing is built upon the linear classifier
* LIBLINEAR
* Default options often work well without tedious tuning.
* An interactive tool for error analysis is included. Based on the
property that each short text contains few words, LibShortText
provides details in predicting each text.
Diffstat (limited to 'math/libshorttext')
-rw-r--r-- | math/libshorttext/DESCR | 12 | ||||
-rw-r--r-- | math/libshorttext/Makefile | 62 | ||||
-rw-r--r-- | math/libshorttext/PLIST | 35 | ||||
-rw-r--r-- | math/libshorttext/distinfo | 6 | ||||
-rw-r--r-- | math/libshorttext/patches/patch-text-train.py | 17 |
5 files changed, 132 insertions, 0 deletions
diff --git a/math/libshorttext/DESCR b/math/libshorttext/DESCR new file mode 100644 index 00000000000..cdfa5f8087d --- /dev/null +++ b/math/libshorttext/DESCR @@ -0,0 +1,12 @@ +LibShortText is an open source tool for short-text classification and +analysis. It can handle the classification of, for example, titles, +questions, sentences, and short messages. Main features of +LibShortText include + * It is more efficient than general text-mining packages. On a + typical computer, processing and training 10 million short texts + takes only around half an hour. + * The fast training and testing is built upon the linear classifier LIBLINEAR + * Default options often work well without tedious tuning. + * An interactive tool for error analysis is included. Based on the + property that each short text contains few words, LibShortText + provides details in predicting each text. diff --git a/math/libshorttext/Makefile b/math/libshorttext/Makefile new file mode 100644 index 00000000000..7a0e9e8f549 --- /dev/null +++ b/math/libshorttext/Makefile @@ -0,0 +1,62 @@ +# $NetBSD: Makefile,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $ + +DISTNAME= libshorttext-1.1 +CATEGORIES= math textproc +MASTER_SITES= http://www.csie.ntu.edu.tw/~cjlin/libshorttext/ + +MAINTAINER= cheusov@NetBSD.org +HOMEPAGE= http://www.csie.ntu.edu.tw/~cjlin/libshorttext/ +COMMENT= Library for short-text classification and analysis +LICENSE= modified-bsd + +DEPENDS+= liblinear-[0-9]*:../../math/liblinear \ + libsvm-[0-9]*:../../math/libsvm + +USE_LANGUAGES= c c++ + +REPLACE_PYTHON= *.py demo/*.py ${PYUTILS} +PLIST_SUBST+= PYSITELIB=${PYSITELIB} +INSTALLATION_DIRS= bin share/examples/libshorttext/demo \ + share/doc/libshorttext ${ADDITIONAL_FILES:H:S,^,${PYSITELIB}/,:O:u} + +PYUTILS+= \ + libshorttext/classifier/classifier_impl.py \ + libshorttext/classifier/learner/learner_impl.py \ + libshorttext/analyzer/analyzer_impl.py \ + libshorttext/analyzer/selector.py \ + libshorttext/classifier/learner/liblinear/python/liblinear.py \ + libshorttext/classifier/learner/liblinear/python/liblinearutil.py \ + libshorttext/converter/converter_impl.py \ + libshorttext/converter/stemmer/porter.py \ + libshorttext/classifier/grid.py + +ADDITIONAL_FILES+= \ + ${PYUTILS} \ + libshorttext/analyzer/__init__.py \ + libshorttext/converter/stop-words/stoplist-nsp.regex.pickle \ + libshorttext/converter/stop-words/stoplist-nsp.regex \ + libshorttext/converter/stemmer/__init__.py \ + libshorttext/converter/stemmer/porter.so.1 \ + libshorttext/converter/__init__.py \ + libshorttext/__init__.py \ + libshorttext/classifier/__init__.py \ + libshorttext/classifier/learner/liblinear/train \ + libshorttext/classifier/learner/liblinear/predict \ + libshorttext/classifier/learner/liblinear/liblinear.so.1 \ + libshorttext/classifier/learner/__init__.py \ + libshorttext/classifier/learner/util.so.1 + +do-install: + set -e; cd ${WRKSRC}; \ + ${INSTALL_SCRIPT} text-predict.py text-train.py text2svm.py \ + ${DESTDIR}${PREFIX}/bin; \ + ${INSTALL_DATA} README \ + ${DESTDIR}${PREFIX}/share/doc/${PKGBASE}; \ + ${CP} -Rp demo ${DESTDIR}${PREFIX}/share/examples/${PKGBASE} +.for f in ${ADDITIONAL_FILES} + ${CP} -p ${WRKSRC}/${f} ${DESTDIR}${PREFIX}/${PYSITELIB}/${f} +.endfor + +.include "../../lang/python/extension.mk" +.include "../../lang/python/application.mk" +.include "../../mk/bsd.pkg.mk" diff --git a/math/libshorttext/PLIST b/math/libshorttext/PLIST new file mode 100644 index 00000000000..bd4831f4a60 --- /dev/null +++ b/math/libshorttext/PLIST @@ -0,0 +1,35 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $ +bin/text-predict.py +bin/text-train.py +bin/text2svm.py +share/doc/libshorttext/README +share/examples/libshorttext/demo/demo.py +share/examples/libshorttext/demo/demo.sh +share/examples/libshorttext/demo/test_feats1 +share/examples/libshorttext/demo/test_feats2 +share/examples/libshorttext/demo/test_file +share/examples/libshorttext/demo/train_feats1 +share/examples/libshorttext/demo/train_feats2 +share/examples/libshorttext/demo/train_file +${PYSITELIB}/libshorttext/analyzer/analyzer_impl.py +${PYSITELIB}/libshorttext/__init__.py +${PYSITELIB}/libshorttext/analyzer/__init__.py +${PYSITELIB}/libshorttext/analyzer/selector.py +${PYSITELIB}/libshorttext/classifier/__init__.py +${PYSITELIB}/libshorttext/classifier/classifier_impl.py +${PYSITELIB}/libshorttext/classifier/grid.py +${PYSITELIB}/libshorttext/classifier/learner/__init__.py +${PYSITELIB}/libshorttext/classifier/learner/learner_impl.py +${PYSITELIB}/libshorttext/classifier/learner/liblinear/liblinear.so.1 +${PYSITELIB}/libshorttext/classifier/learner/liblinear/predict +${PYSITELIB}/libshorttext/classifier/learner/liblinear/python/liblinear.py +${PYSITELIB}/libshorttext/classifier/learner/liblinear/python/liblinearutil.py +${PYSITELIB}/libshorttext/classifier/learner/liblinear/train +${PYSITELIB}/libshorttext/classifier/learner/util.so.1 +${PYSITELIB}/libshorttext/converter/__init__.py +${PYSITELIB}/libshorttext/converter/converter_impl.py +${PYSITELIB}/libshorttext/converter/stemmer/__init__.py +${PYSITELIB}/libshorttext/converter/stemmer/porter.py +${PYSITELIB}/libshorttext/converter/stemmer/porter.so.1 +${PYSITELIB}/libshorttext/converter/stop-words/stoplist-nsp.regex +${PYSITELIB}/libshorttext/converter/stop-words/stoplist-nsp.regex.pickle diff --git a/math/libshorttext/distinfo b/math/libshorttext/distinfo new file mode 100644 index 00000000000..0d8ed751caf --- /dev/null +++ b/math/libshorttext/distinfo @@ -0,0 +1,6 @@ +$NetBSD: distinfo,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $ + +SHA1 (libshorttext-1.1.tar.gz) = 2d9705195682fa1f25de30bd66711685f974a8c0 +RMD160 (libshorttext-1.1.tar.gz) = 569d2f2a64f8fc311766b08cbef7086e1340ce55 +Size (libshorttext-1.1.tar.gz) = 817775 bytes +SHA1 (patch-text-train.py) = 6b99bdfcfd69a9d7e83cafc852202dc23485dfeb diff --git a/math/libshorttext/patches/patch-text-train.py b/math/libshorttext/patches/patch-text-train.py new file mode 100644 index 00000000000..3843f381712 --- /dev/null +++ b/math/libshorttext/patches/patch-text-train.py @@ -0,0 +1,17 @@ +$NetBSD: patch-text-train.py,v 1.1.1.1 2014/10/29 17:06:40 cheusov Exp $ + +# differnt executables are provided by libsvm and liblinear packages +--- text-train.py.orig 2013-09-09 08:53:54.000000000 +0000 ++++ text-train.py +@@ -150,9 +150,9 @@ if __name__ == '__main__': + extra_svm_files += [value] + elif argv[i] == '-x': + if value.lower() == 'grid': +- system(path.dirname(LIBLINEAR_HOME) + '/../grid.py') ++ system('svm-grid') + elif value.lower() == 'liblinear': +- system(LIBLINEAR_HOME + '/train') ++ system('liblinear-train') + else: + stderr.write('Error: Invalid usage of option -x. No command ' + value + '\n') + exit_with_help() |