summaryrefslogtreecommitdiff
path: root/textproc
diff options
context:
space:
mode:
authorcheusov <cheusov@pkgsrc.org>2011-05-08 18:19:54 +0000
committercheusov <cheusov@pkgsrc.org>2011-05-08 18:19:54 +0000
commitaf4d9d2d3f55bb9a55d30735b7463af0276fa81f (patch)
tree11d28bf160d7f46f27ad3bf9f34197988e333dde /textproc
parent43531543526aac558de9b206b42642f9ba698327 (diff)
downloadpkgsrc-af4d9d2d3f55bb9a55d30735b7463af0276fa81f.tar.gz
This is the GPL'd 7th edition of the very well known English-Russian
dictionary by V.K.Mueller compiled for dictd (dictionary protocol server).
Diffstat (limited to 'textproc')
-rw-r--r--textproc/dict-mueller7/DESCR2
-rw-r--r--textproc/dict-mueller7/Makefile46
-rw-r--r--textproc/dict-mueller7/PLIST3
-rw-r--r--textproc/dict-mueller7/distinfo5
-rwxr-xr-xtextproc/dict-mueller7/files/mueller2utf8125
-rwxr-xr-xtextproc/dict-mueller7/files/to-dict194
6 files changed, 375 insertions, 0 deletions
diff --git a/textproc/dict-mueller7/DESCR b/textproc/dict-mueller7/DESCR
new file mode 100644
index 00000000000..11b17ff0c02
--- /dev/null
+++ b/textproc/dict-mueller7/DESCR
@@ -0,0 +1,2 @@
+This is the GPL'd 7th edition of the very well known English-Russian
+dictionary by V.K.Mueller compiled for dictd (dictionary protocol server).
diff --git a/textproc/dict-mueller7/Makefile b/textproc/dict-mueller7/Makefile
new file mode 100644
index 00000000000..180a83d9828
--- /dev/null
+++ b/textproc/dict-mueller7/Makefile
@@ -0,0 +1,46 @@
+# $NetBSD: Makefile,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $
+
+DISTNAME= Mueller7GPL
+PKGNAME= dict-mueller7-1.2
+CATEGORIES= textproc
+MASTER_SITES= http://dict.osdn.org.ua/ \
+ http://mova.org/~cheusov/pub/dict-mueller/
+EXTRACT_SUFX= .tgz
+
+MAINTAINER= cheusov@tut.by
+HOMEPAGE= http://mueller-dic.chat.ru/
+COMMENT= English-Russian dictionary by Mueller for dictd
+LICENSE= gnu-gpl-v2
+
+DEPENDS+= dict-server>=1.9.14:../../textproc/dict-server
+
+BUILD_DEPENDS+= dict-server>=1.10.2:../../textproc/dict-server
+BUILD_DEPENDS+= p5-Unicode-Map8-[0-9]*:../../converters/p5-Unicode-Map8
+BUILD_DEPENDS+= coreutils-[0-9]*:../../sysutils/coreutils # we need GNU fmt
+
+WRKSRC= ${WRKDIR}/usr/local/share/dict
+
+PKG_DESTDIR_SUPPORT= user-destdir
+
+USE_TOOLS+= gsed perl tar gzip
+
+INSTALLATION_DIRS+= share/dictd
+
+pre-configure:
+ cp files/mueller2utf8 files/to-dict ${WRKSRC}
+
+do-build:
+ cd ${WRKSRC} && \
+ sh to-dict --src-data Mueller7GPL.koi mueller7.data > /dev/null && \
+ perl mueller2utf8 < mueller7.data > tmp_1 && \
+ ${PREFIX}/bin/dictfmt --utf8 -p --columns 0 \
+ -s 'Mueller English-Russian Dictionary' \
+ -u 'http://www.chat.ru/~mueller_dic' \
+ --headword-separator ', ' mueller7 < tmp_1 && \
+ ${PREFIX}/bin/dictzip *.dict
+
+do-install:
+ ${INSTALL_DATA} ${WRKSRC}/*.dict.dz ${WRKSRC}/*.index \
+ ${DESTDIR}${PREFIX}/share/dictd
+
+.include "../../mk/bsd.pkg.mk"
diff --git a/textproc/dict-mueller7/PLIST b/textproc/dict-mueller7/PLIST
new file mode 100644
index 00000000000..fe12897cb0d
--- /dev/null
+++ b/textproc/dict-mueller7/PLIST
@@ -0,0 +1,3 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $
+share/dictd/mueller7.dict.dz
+share/dictd/mueller7.index
diff --git a/textproc/dict-mueller7/distinfo b/textproc/dict-mueller7/distinfo
new file mode 100644
index 00000000000..a36c61806a0
--- /dev/null
+++ b/textproc/dict-mueller7/distinfo
@@ -0,0 +1,5 @@
+$NetBSD: distinfo,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $
+
+SHA1 (Mueller7GPL.tgz) = 5d2566e4a312d21c2cf4231771f4e405d54affc1
+RMD160 (Mueller7GPL.tgz) = 1d1a674313d871482f326a898e3d1189e461baf2
+Size (Mueller7GPL.tgz) = 2386052 bytes
diff --git a/textproc/dict-mueller7/files/mueller2utf8 b/textproc/dict-mueller7/files/mueller2utf8
new file mode 100755
index 00000000000..eb27dc9d7c4
--- /dev/null
+++ b/textproc/dict-mueller7/files/mueller2utf8
@@ -0,0 +1,125 @@
+#!/usr/bin/perl
+# Written by Alexey Dyachenko <alexd@altlinux.ru> (c) 2002
+# Licensed under GNU GPL
+
+ require Unicode::Map8;
+ my $koi_map = Unicode::Map8->new("koi8-r") || die;
+
+ my $ipa_map = Unicode::Map8->new();
+
+ $ipa_map->default_to16( 0x20 );
+
+$ipa_map->addpair( 0x61, 0x61 );
+$ipa_map->addpair( 0x62, 0x62 );
+$ipa_map->addpair( 0x63, 0x63 );
+$ipa_map->addpair( 0x64, 0x64 );
+$ipa_map->addpair( 0x65, 0x65 );
+$ipa_map->addpair( 0x66, 0x66 );
+$ipa_map->addpair( 0x67, 0x67 );
+$ipa_map->addpair( 0x68, 0x68 );
+$ipa_map->addpair( 0x69, 0x69 );
+$ipa_map->addpair( 0x6a, 0x6a );
+$ipa_map->addpair( 0x6b, 0x6b );
+$ipa_map->addpair( 0x6c, 0x6c );
+$ipa_map->addpair( 0x6d, 0x6d );
+$ipa_map->addpair( 0x6e, 0x6e );
+$ipa_map->addpair( 0x6f, 0x6f );
+$ipa_map->addpair( 0x70, 0x70 );
+$ipa_map->addpair( 0x71, 0x71 );
+$ipa_map->addpair( 0x72, 0x72 );
+$ipa_map->addpair( 0x73, 0x73 );
+$ipa_map->addpair( 0x74, 0x74 );
+$ipa_map->addpair( 0x75, 0x75 );
+$ipa_map->addpair( 0x76, 0x76 );
+$ipa_map->addpair( 0x77, 0x77 );
+$ipa_map->addpair( 0x78, 0x78 );
+$ipa_map->addpair( 0x79, 0x79 );
+$ipa_map->addpair( 0x7a, 0x7a );
+
+$ipa_map->addpair( 0x2d, 0x2e );
+$ipa_map->addpair( 0x2e, 0x2e );
+
+
+$ipa_map->addpair( 0x54, 0x03b8 );
+$ipa_map->addpair( 0x53, 0x0283 );
+$ipa_map->addpair( 0x4e, 0x014b );
+$ipa_map->addpair( 0xd7, 0x02a7 );
+$ipa_map->addpair( 0x44, 0x00F0 );
+
+$ipa_map->addpair( 0x5a, 0x0292 );
+
+$ipa_map->addpair( 0x51, 0x00e6 );
+$ipa_map->addpair( 0x49, 0x0131 );
+$ipa_map->addpair( 0xc3, 0x028c );
+$ipa_map->addpair( 0x55, 0x028a );
+$ipa_map->addpair( 0x81, 0x0252 );
+$ipa_map->addpair( 0x45, 0x025b );
+$ipa_map->addpair( 0xab, 0x0259 );
+
+$ipa_map->addpair( 0x41, 0x0251 );
+$ipa_map->addpair( 0xce, 0x025c );
+$ipa_map->addpair( 0x8d, 0x0254 );
+
+$ipa_map->addpair( 0xc7, 0x02cc );
+$ipa_map->addpair( 0xc8, 0x02c8 );
+
+$ipa_map->addpair( 0xf9, 0x02d0 );
+$ipa_map->addpair( 0x3e, 0x02d1 );
+
+# dump_map( $ipa_map );
+#
+# exit;
+
+sub to_utf {
+ return $koi_map->tou($_[0])->utf8;
+}
+
+sub ipa_code {
+ return $ipa_map->tou($_[0])->utf8;
+}
+
+
+while( <> ) {
+ if( /\[\S+\]/ ) {
+ if( /(.*) (.*)/ ){
+ my $leftpart = $1;
+ my $rightpart = $2;
+ print to_utf($leftpart)." ";
+ if( substr( $rightpart, 0, 1 ) != "[" ) {
+ if( $rightpart =~ /([^[]+)/ ){
+ print to_utf($1);
+ }
+ };
+
+ while( $rightpart =~ /\[([^]]+)\]([^[]*)/g ){
+ print "[".ipa_code($1)."]".to_utf($2);
+ };
+
+ print "\n";
+ }else{
+ print to_utf($_);
+ };
+ }else{
+ print to_utf($_);
+ }
+};
+
+sub dump_map
+{
+ my $m = shift;
+ for (my $i = 0; $i < 256; $i++) {
+ my $u = $m->to_char16($i);
+ next if $u == NOCHAR;
+ printf "0x%02X 0x%04X\n", $i, $u;
+ }
+ for (my $block = 0; $block < 256; $block++) {
+ next if $m->_empty_block($block);
+ print "# BLOCK $block\n";
+ for (my $i = 0; $i < 256; $i++) {
+ my $u = $block*256 + $i;
+ my $c = $m->to_char8($u);
+ next if $c == NOCHAR;
+ printf "0x%04X 0x%02X\n", $u, $c;
+ }
+ }
+}
diff --git a/textproc/dict-mueller7/files/to-dict b/textproc/dict-mueller7/files/to-dict
new file mode 100755
index 00000000000..b77d3dc7640
--- /dev/null
+++ b/textproc/dict-mueller7/files/to-dict
@@ -0,0 +1,194 @@
+#!/bin/sh
+# A shell script for conversion of MOVA MuellerXX.koi dictionaries
+# into DICT format.
+# Written by Andrew Comech <comech@math.sunysb.edu>
+# GNU GPL (2000)
+# The latest version is available from
+# http://www.math.sunysb.edu/~comech/tools/to-dict
+
+version="0.1"
+versiondate="November 11, 2000"
+
+# We need the following binaries:
+DICTFMT=`which dictfmt`
+DICTZIP=`which dictzip`
+
+INFO () {
+ echo "
+to-dict, version $version ($versiondate).
+Conversion of MOVA MuellerXX.koi dictionaries into DICT format.
+Written by Andrew Comech <comech@math.sunysb.edu>. GNU GPL (2000)
+
+The latest version is available from
+http://www.math.sunysb.edu/~comech/tools/to-dict
+"
+}
+
+REQUIREMENTS () {
+ echo "
+REQUIREMENTS: you need the binaries \`dictfmt' and \`dictzip'.
+
+dictzip.c can be found in dictd-1.5.0.tar.gz (or later version) at
+ftp://ftp.cs.unc.edu/pub/users/faith/dict/
+
+dictfmt.c can be found in Debian/GNU Linux package dict-elements at
+ftp://ftp.debian.org/debian/dists/potato/main/source/text/
+
+Compiled binaries (dictfmt and dictzip) could be downloaded from
+http://www.wh9.tu-dresden.de/~heinrich/dict/dict_leo_ftp/static-binaries/
+or
+http://iris.ltas.ulg.ac.be/download/apps/dict/
+"
+}
+
+USAGE () {
+ echo "
+USAGE:
+ -version: show version
+ -h, --help, or no parameters: show this help
+
+(*) To make DICT database from Mueller7GPL.koi available from
+http://www.chat.ru/~mueller_dic/Mueller7GPL.tgz
+
+# Remove transcription:
+./to-dict --no-trans Mueller7GPL.koi mueller7.notr
+# Convert <source> into <data> (a file with %h, %d-headers):
+./to-dict --src-data mueller7.notr mueller7.data && rm -i mueller7.notr
+# Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
+./to-dict --data-dict mueller7.data mueller7 && rm -i mueller7.data
+# Expand index file (to be able to access lines like \"A, a\" by \"A\" and \"a\"):
+./to-dict --expand-index mueller7.index mueller7.index.exp
+# Install a new dictionary with expanded index (RUN AS ROOT).
+# The location of files may depend on your distribution!!!
+cp mueller7.dict.dz /usr/share/dictd/mueller7.dict.dz
+cp mueller7.index.exp /usr/share/dictd/mueller7.index
+dictdconfig -w && (killall dictd; dictd)
+
+(*) To make DICT database from Mueller24.koi available from
+http://www.chat.ru/~mueller_dic/Mueller24.tgz (this one is preferred)
+
+# Convert <source> into <data> (a file with %h, %d):
+./to-dict --src-data Mueller24.koi mueller24.data
+# Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
+./to-dict --data-dict mueller24.data mueller24 && rm -i mueller24.data
+# Install a new dictionary with expanded index (RUN AS ROOT).
+# The location of files may depend on your distribution!!!
+cp mueller24.dict.dz /usr/share/dictd/mueller24.dict.dz
+cp mueller24.index /usr/share/dictd/mueller24.index
+dictdconfig -w && (killall dictd; dictd)
+
+(*) To re-convert <dict> into <data> (a file with %h, %d-headers):
+
+./to-dict --dict-data <dict> <data>
+
+ *************************************************************
+ !!WARNING!! !!WARNING!! !!WARNING!! !!WARNING!!
+
+ Temporary files created by this script occupy a lot of drive space!
+ 15 MB for Mueller7GPL.koi (have to strip off transcription first)
+ 12 MB for Mueller24.koi
+ *************************************************************
+"
+}
+
+# To remove the transcription except for [r] and [ju:] which found in the text.
+# This procedure should not change Mueller24.koi if applied to it.
+NO_TRANS () {
+sed 's/ËÁË\ \[juù\]/ËÁË\ "ju:"/; s/\[l\],/"l",/g; s/\[r\]/"r"/g; s/\], \[/A/g; s/\]\; _ÁÍ\. \[/A/g; s/\]\; _pl\. \[/A/g; s/\[[^]]*\]\ (ÐÏÌÎ.. ÆÏÒÍ.). \[[^]]*\] (ÒÅÄÕÃÉÒÏ×ÁÎÎ[^)]*)\ //g; s/\[[^]]*\]\ //g; s/\[[^]]*\],\ //; s/\ \[[^]]*\],/,/g; s/\ \[[^]]*\])/)/g; s/\ \[[^]]*\]:/:/g; s/\ \[[^]]*\];/;/g; s/\ \[[^]]*\]$//g; s/ËÁË\ "ju:"/ËÁË\ \[juù\]/g; s/"l"/\[l\]/g; s/"r"/\[r\]/g '
+}
+
+# Strip the copyright/info
+STRIP () {
+sed -n '/^_[aA]/,$p'
+}
+
+# Format the file
+MK_DATA () {
+sed 's/$/\
+/g; s/[^]]*\ \ /%h&\
+%d/; s/_[IVX][IVX]* /\
+ &/g; s/ [1-9]\. /\
+ &/g; s/[1-9][0-9]*>/\
+ &/g; s/[ÁÂ×ÇÄÅÖÚÉËÌÍÎÏÐÒÓÔÕÆÞÃÞÛÈÝßØÜÀÑ]>/(&>/g; s/>>/)/g; s/\ \_[AISE][a-z]*:/\
+ &/g; s/>/:/g'\
+|sed ' s/%d$/%z/; s/%d/%d\
+ / ; s/%z/%d/; s/%h/%h / ' \
+| gfmt -s -w 74;}
+
+########################################################################
+
+if [ "$1" = "-version" ]; then
+ INFO
+ exit 0
+fi
+
+if [ "$#" = 0 -o "$1" = "-h" -o "$1" = "--help" -o "$1" = "-help" ]; then
+ USAGE
+ exit 0
+fi
+
+if [ "$#" != 3 ]; then
+ USAGE; exit 1;
+fi
+
+## Will not go further if there are no dictfmt and dictzip binaries:
+if [ "$DICTFMT" = "" -o "$DICTZIP" = "" ]; then
+ REQUIREMENTS
+ exit 1
+fi
+##
+
+if [ ! -f "$2" ]; then
+ echo "No input file: $2"; USAGE; exit 1
+fi
+
+case $1 in
+ "--no-trans")
+ echo "Removing transcription ($2 -> $3)..";
+ cat $2 | NO_TRANS > $3 || exit 1
+ echo "."; exit 0
+ ;;
+ "--src-data")
+ echo "Writing the header of $3.."
+ echo -e "%h 00-database-info\n%d" > $3
+ cat $2 | sed -n '1p' | sed 's/^/ /' | gfmt -s -w 74 >> $3;
+ cat $2 | sed -n '/^_/,/_ÑÐ. Japan ÑÐÏÎÓËÉÊ/p' | sed 's/^/ /' | gfmt -s -w 74 >> $3;
+ echo "" >> $3
+ echo "Formatting data ($2 -> $3).."
+ cat $2 | sed -n '/^_[aA]/,$p' | MK_DATA >> $3 || exit 1
+ echo "."; exit 0
+ ;;
+ "--data-dict")
+ TITLE="Mueller English-Russian Dictionary"
+ echo "dictfmt: $2 -> $3.dict and $3.index.."
+ dictfmt -p -u "http://www.chat.ru/~mueller_dic" \
+ -s "$TITLE" $3 < $2 || exit 1
+ echo "Compressing $3.dict.."; dictzip $3.dict || exit 1
+# echo -n "Restarting daemons"; killall dictd; dictd
+ echo "."; exit 0
+ ;;
+ "--expand-index")
+# So that the line
+# ``whisky, whiskey a sort of spirit I like''
+# could be found not only by /usr/bin/dict "whisky, whiskey", but also by
+# /usr/bin/dict "whisky" and /usr/bin/dict "whiskey"
+ cat $2 | sed 's/^[^,]*, [^,]*/%TAG1&\
+%REM2&\
+%TAG3&/; s/^%TAG1[^,]*, /&%REM1/; s/, %REM1[^'$'\t'']*//; s/%REM2[^,]*, //; s/%TAG[13]//g' > $3 || exit 1
+ exit 0
+ ;;
+ "--dict-data")
+ if [ "` file $2 | grep gzip`" != "0" ]; then
+ CAT=zcat;
+ else
+ CAT=cat;
+ fi
+ $CAT $2 | sed 's/^[^\ ].*/%h &\
+%d/; s/^[\ ][\ ]*/ /' >$3 || exit 1
+ echo "."; exit 0
+ ;;
+ *) INFO; USAGE; exit 1
+esac
+
+echo "You are not supposed to be here."
+exit 1