diff options
author | scole <scole@pkgsrc.org> | 2020-08-15 16:52:28 +0000 |
---|---|---|
committer | scole <scole@pkgsrc.org> | 2020-08-15 16:52:28 +0000 |
commit | 8df841212df4829f12349b3dcc18e0ccb1fc4085 (patch) | |
tree | 19db3cf9f93b72f3da232eb689ddfbc9886c72a5 /textproc | |
parent | bed3d4d17b9cc60ace23d59f9d86845290a89d1a (diff) | |
download | pkgsrc-8df841212df4829f12349b3dcc18e0ccb1fc4085.tar.gz |
Update to version 0.3
all changes for emacs splitter:
- load custom dictionary first because 'thai-word-table is a defvar
- add count function and return word counts for a few funcs
- add lisp wrappers functions split-thai, split-thai-line which
can split thai text in an emacs buffer using 'thai-break-words
Diffstat (limited to 'textproc')
-rw-r--r-- | textproc/split-thai/Makefile | 4 | ||||
-rwxr-xr-x | textproc/split-thai/files/st-emacs | 5 | ||||
-rw-r--r-- | textproc/split-thai/files/thai-utility.el | 70 |
3 files changed, 61 insertions, 18 deletions
diff --git a/textproc/split-thai/Makefile b/textproc/split-thai/Makefile index 76ae72dd071..738e2c98170 100644 --- a/textproc/split-thai/Makefile +++ b/textproc/split-thai/Makefile @@ -1,6 +1,6 @@ -# $NetBSD: Makefile,v 1.2 2020/08/14 17:31:34 scole Exp $ +# $NetBSD: Makefile,v 1.3 2020/08/15 16:52:28 scole Exp $ -PKGNAME= split-thai-0.2 +PKGNAME= split-thai-0.3 CATEGORIES= textproc MAINTAINER= pkgsrc-users@NetBSD.org COMMENT= Utilities to split UTF-8 Thai text into words diff --git a/textproc/split-thai/files/st-emacs b/textproc/split-thai/files/st-emacs index 693c90ab117..7b31f12a3d5 100755 --- a/textproc/split-thai/files/st-emacs +++ b/textproc/split-thai/files/st-emacs @@ -8,11 +8,10 @@ ;; ;;(toggle-debug-on-error) ;; debug -(require 'thai-word) -;; load custom dictionary -(load "ST_SHARE_DIR/thai-utility" nil t) +;; load custom dictionary first, 'thai-word-table is defvar (load "ST_SHARE_DIR/thai-dict" nil t) +(load "ST_SHARE_DIR/thai-utility" nil t) ;; split a thai line by spaces, return new line (defun process-thai-line(line) diff --git a/textproc/split-thai/files/thai-utility.el b/textproc/split-thai/files/thai-utility.el index 61163248260..5ee04a4b9a9 100644 --- a/textproc/split-thai/files/thai-utility.el +++ b/textproc/split-thai/files/thai-utility.el @@ -44,10 +44,12 @@ uses recursion" (defun thai-word-table-save(filename &optional alist) "save thai words extracted from a nested-alist table to filename in utf8 format, one word per line. default is to save -'thai-word-table if no alist argument given." - (interactive) +'thai-word-table if no alist argument given. Returns number of +dictionary words." + (interactive "FName of file to save to: \nP") (let ((thaiwords) (elem) + (line_count) (coding-system-for-read 'utf-8) (coding-system-for-write 'utf-8) (buffer-file-coding-system 'utf-8)) @@ -72,8 +74,29 @@ filename in utf8 format, one word per line. default is to save (insert elem "\n"))) (sort-lines nil (point-min) (point-max)) + (setq line_count (count-lines (point-min) (point-max))) (write-region nil nil filename) - (buffer-string)))) + line_count))) + +(defun count-words-nested-alist (&optional alist) + "Count number of words in a nested alist. if no arg given, +count 'thai-word-table words" + (interactive) + (let ((count 0) + (elem) + (thaiwords)) + ;; default list or not + (setq alist (or alist thai-word-table)) + (or (nested-alist-p alist) + (error "Invalid argument %s" alist)) + ;; remove 'thai-words from 'thai-word-table + (setq alist (cdr alist)) + (while (setq elem (car alist)) + (setq alist (cdr alist)) + (setq thaiwords (extract-thai-na elem "")) + (setq count (+ count (length thaiwords)))) + (message "%d words in nested alist" count) + count)) ;; 'thai-tis620 is default for emacs <= 28 (defun thai-update-word-table-utf8 (file &optional append) @@ -99,25 +122,32 @@ is appended instead to the current word list. Does the same as (defun thai-word-table-save-defvar(dictfile lispfile) "read a utf8 thai dictionary file and save to a lisp file suitable for initializing the 'thai-word-table as a \"defvar\". -Overwrites the lisp file if it exists." +Overwrites the lisp file if it exists. Returns count of +dictionary words." (interactive) (let ((header) (footer) (elem) + (line_count) (coding-system-for-read 'utf-8) (coding-system-for-write 'utf-8) (buffer-file-coding-system 'utf-8)) - (setq header (list "(defvar thai-word-table" - "(let ((table (list 'thai-words)))" - "(dolist (elt" - "'(" )) - (setq footer (list "))" - "(set-nested-alist elt 1 table))" - "table)" - "\"Nested alist of Thai words.\")" )) + (setq header (list + ";; file auto-generated from thai-word-table-save-defvar" + "" + "(defvar thai-word-table" + "(let ((table (list 'thai-words)))" + "(dolist (elt" + "'(" )) + (setq footer (list + "))" + "(set-nested-alist elt 1 table))" + "table)" + "\"Nested alist of Thai words.\")" )) (with-temp-buffer (insert-file-contents dictfile) (goto-char (point-min)) + (setq line_count (count-lines (point-min) (point-max))) ;; quote each thai word (while (not (eobp)) (beginning-of-line) @@ -135,4 +165,18 @@ Overwrites the lisp file if it exists." (insert elem "\n")) (lisp-mode) (indent-region (point-min) (point-max)) - (write-region nil nil lispfile)))) + (write-region nil nil lispfile)) + line_count)) + +(defun split-thai-line(&optional separator) + "Break Thai words from point to end of line by inserting a +separator string at word boundaries. (wrapper for 'thai-break-words)" + (interactive) + (thai-break-words (or separator " ") (line-end-position))) + +(defun split-thai(&optional separator) + "Break Thai words from point to end of buffer by inserting a +separator string at word boundaries. (wrapper for +'thai-break-words)" + (interactive) + (thai-break-words (or separator " ") (point-max))) |