summaryrefslogtreecommitdiff
path: root/textproc
diff options
context:
space:
mode:
authorscole <scole@pkgsrc.org>2020-08-15 16:52:28 +0000
committerscole <scole@pkgsrc.org>2020-08-15 16:52:28 +0000
commit8df841212df4829f12349b3dcc18e0ccb1fc4085 (patch)
tree19db3cf9f93b72f3da232eb689ddfbc9886c72a5 /textproc
parentbed3d4d17b9cc60ace23d59f9d86845290a89d1a (diff)
downloadpkgsrc-8df841212df4829f12349b3dcc18e0ccb1fc4085.tar.gz
Update to version 0.3
all changes for emacs splitter: - load custom dictionary first because 'thai-word-table is a defvar - add count function and return word counts for a few funcs - add lisp wrappers functions split-thai, split-thai-line which can split thai text in an emacs buffer using 'thai-break-words
Diffstat (limited to 'textproc')
-rw-r--r--textproc/split-thai/Makefile4
-rwxr-xr-xtextproc/split-thai/files/st-emacs5
-rw-r--r--textproc/split-thai/files/thai-utility.el70
3 files changed, 61 insertions, 18 deletions
diff --git a/textproc/split-thai/Makefile b/textproc/split-thai/Makefile
index 76ae72dd071..738e2c98170 100644
--- a/textproc/split-thai/Makefile
+++ b/textproc/split-thai/Makefile
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.2 2020/08/14 17:31:34 scole Exp $
+# $NetBSD: Makefile,v 1.3 2020/08/15 16:52:28 scole Exp $
-PKGNAME= split-thai-0.2
+PKGNAME= split-thai-0.3
CATEGORIES= textproc
MAINTAINER= pkgsrc-users@NetBSD.org
COMMENT= Utilities to split UTF-8 Thai text into words
diff --git a/textproc/split-thai/files/st-emacs b/textproc/split-thai/files/st-emacs
index 693c90ab117..7b31f12a3d5 100755
--- a/textproc/split-thai/files/st-emacs
+++ b/textproc/split-thai/files/st-emacs
@@ -8,11 +8,10 @@
;;
;;(toggle-debug-on-error) ;; debug
-(require 'thai-word)
-;; load custom dictionary
-(load "ST_SHARE_DIR/thai-utility" nil t)
+;; load custom dictionary first, 'thai-word-table is defvar
(load "ST_SHARE_DIR/thai-dict" nil t)
+(load "ST_SHARE_DIR/thai-utility" nil t)
;; split a thai line by spaces, return new line
(defun process-thai-line(line)
diff --git a/textproc/split-thai/files/thai-utility.el b/textproc/split-thai/files/thai-utility.el
index 61163248260..5ee04a4b9a9 100644
--- a/textproc/split-thai/files/thai-utility.el
+++ b/textproc/split-thai/files/thai-utility.el
@@ -44,10 +44,12 @@ uses recursion"
(defun thai-word-table-save(filename &optional alist)
"save thai words extracted from a nested-alist table to
filename in utf8 format, one word per line. default is to save
-'thai-word-table if no alist argument given."
- (interactive)
+'thai-word-table if no alist argument given. Returns number of
+dictionary words."
+ (interactive "FName of file to save to: \nP")
(let ((thaiwords)
(elem)
+ (line_count)
(coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
(buffer-file-coding-system 'utf-8))
@@ -72,8 +74,29 @@ filename in utf8 format, one word per line. default is to save
(insert elem "\n")))
(sort-lines nil (point-min) (point-max))
+ (setq line_count (count-lines (point-min) (point-max)))
(write-region nil nil filename)
- (buffer-string))))
+ line_count)))
+
+(defun count-words-nested-alist (&optional alist)
+ "Count number of words in a nested alist. if no arg given,
+count 'thai-word-table words"
+ (interactive)
+ (let ((count 0)
+ (elem)
+ (thaiwords))
+ ;; default list or not
+ (setq alist (or alist thai-word-table))
+ (or (nested-alist-p alist)
+ (error "Invalid argument %s" alist))
+ ;; remove 'thai-words from 'thai-word-table
+ (setq alist (cdr alist))
+ (while (setq elem (car alist))
+ (setq alist (cdr alist))
+ (setq thaiwords (extract-thai-na elem ""))
+ (setq count (+ count (length thaiwords))))
+ (message "%d words in nested alist" count)
+ count))
;; 'thai-tis620 is default for emacs <= 28
(defun thai-update-word-table-utf8 (file &optional append)
@@ -99,25 +122,32 @@ is appended instead to the current word list. Does the same as
(defun thai-word-table-save-defvar(dictfile lispfile)
"read a utf8 thai dictionary file and save to a lisp file
suitable for initializing the 'thai-word-table as a \"defvar\".
-Overwrites the lisp file if it exists."
+Overwrites the lisp file if it exists. Returns count of
+dictionary words."
(interactive)
(let ((header)
(footer)
(elem)
+ (line_count)
(coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
(buffer-file-coding-system 'utf-8))
- (setq header (list "(defvar thai-word-table"
- "(let ((table (list 'thai-words)))"
- "(dolist (elt"
- "'(" ))
- (setq footer (list "))"
- "(set-nested-alist elt 1 table))"
- "table)"
- "\"Nested alist of Thai words.\")" ))
+ (setq header (list
+ ";; file auto-generated from thai-word-table-save-defvar"
+ ""
+ "(defvar thai-word-table"
+ "(let ((table (list 'thai-words)))"
+ "(dolist (elt"
+ "'(" ))
+ (setq footer (list
+ "))"
+ "(set-nested-alist elt 1 table))"
+ "table)"
+ "\"Nested alist of Thai words.\")" ))
(with-temp-buffer
(insert-file-contents dictfile)
(goto-char (point-min))
+ (setq line_count (count-lines (point-min) (point-max)))
;; quote each thai word
(while (not (eobp))
(beginning-of-line)
@@ -135,4 +165,18 @@ Overwrites the lisp file if it exists."
(insert elem "\n"))
(lisp-mode)
(indent-region (point-min) (point-max))
- (write-region nil nil lispfile))))
+ (write-region nil nil lispfile))
+ line_count))
+
+(defun split-thai-line(&optional separator)
+ "Break Thai words from point to end of line by inserting a
+separator string at word boundaries. (wrapper for 'thai-break-words)"
+ (interactive)
+ (thai-break-words (or separator " ") (line-end-position)))
+
+(defun split-thai(&optional separator)
+ "Break Thai words from point to end of buffer by inserting a
+separator string at word boundaries. (wrapper for
+'thai-break-words)"
+ (interactive)
+ (thai-break-words (or separator " ") (point-max)))