This is the GPL'd 7th edition of the very well known English-Russian

dictionary by V.K.Mueller compiled for dictd (dictionary protocol server).
author: cheusov <cheusov@pkgsrc.org> 2011-05-08 18:19:54 +0000
committer: cheusov <cheusov@pkgsrc.org> 2011-05-08 18:19:54 +0000
commit: af4d9d2d3f55bb9a55d30735b7463af0276fa81f (patch)
tree: 11d28bf160d7f46f27ad3bf9f34197988e333dde /textproc/dict-mueller7
parent: 43531543526aac558de9b206b42642f9ba698327 (diff)
download: pkgsrc-af4d9d2d3f55bb9a55d30735b7463af0276fa81f.tar.gz
6 files changed, 375 insertions, 0 deletions
diff --git a/textproc/dict-mueller7/DESCR b/textproc/dict-mueller7/DESCR
new file mode 100644
index 00000000000..11b17ff0c02
--- /dev/null
+++ b/textproc/dict-mueller7/DESCR
@@ -0,0 +1,2 @@
+This is the GPL'd 7th edition of the very well known English-Russian
+dictionary by V.K.Mueller compiled for dictd (dictionary protocol server).
diff --git a/textproc/dict-mueller7/Makefile b/textproc/dict-mueller7/Makefile
new file mode 100644
index 00000000000..180a83d9828
--- /dev/null
+++ b/textproc/dict-mueller7/Makefile
@@ -0,0 +1,46 @@
+# $NetBSD: Makefile,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $
+
+DISTNAME=	Mueller7GPL
+PKGNAME=	dict-mueller7-1.2
+CATEGORIES=	textproc
+MASTER_SITES=	http://dict.osdn.org.ua/ \
+		http://mova.org/~cheusov/pub/dict-mueller/
+EXTRACT_SUFX=	.tgz
+
+MAINTAINER=	cheusov@tut.by
+HOMEPAGE=	http://mueller-dic.chat.ru/
+COMMENT=	English-Russian dictionary by Mueller for dictd
+LICENSE=	gnu-gpl-v2
+
+DEPENDS+=	dict-server>=1.9.14:../../textproc/dict-server
+
+BUILD_DEPENDS+=	dict-server>=1.10.2:../../textproc/dict-server
+BUILD_DEPENDS+=	p5-Unicode-Map8-[0-9]*:../../converters/p5-Unicode-Map8
+BUILD_DEPENDS+= coreutils-[0-9]*:../../sysutils/coreutils # we need GNU fmt
+
+WRKSRC=		${WRKDIR}/usr/local/share/dict
+
+PKG_DESTDIR_SUPPORT=	user-destdir
+
+USE_TOOLS+=		gsed perl tar gzip
+
+INSTALLATION_DIRS+=	share/dictd
+
+pre-configure:
+	cp files/mueller2utf8 files/to-dict ${WRKSRC}
+
+do-build:
+	cd ${WRKSRC} && \
+	sh to-dict --src-data Mueller7GPL.koi mueller7.data > /dev/null && \
+	perl mueller2utf8 < mueller7.data > tmp_1 && \
+	${PREFIX}/bin/dictfmt --utf8 -p --columns 0 \
+		-s 'Mueller English-Russian Dictionary' \
+		-u 'http://www.chat.ru/~mueller_dic' \
+		--headword-separator ', ' mueller7 < tmp_1 && \
+	${PREFIX}/bin/dictzip *.dict
+
+do-install:
+	${INSTALL_DATA} ${WRKSRC}/*.dict.dz ${WRKSRC}/*.index \
+		${DESTDIR}${PREFIX}/share/dictd
+
+.include "../../mk/bsd.pkg.mk"
diff --git a/textproc/dict-mueller7/PLIST b/textproc/dict-mueller7/PLIST
new file mode 100644
index 00000000000..fe12897cb0d
--- /dev/null
+++ b/textproc/dict-mueller7/PLIST
@@ -0,0 +1,3 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $
+share/dictd/mueller7.dict.dz
+share/dictd/mueller7.index
diff --git a/textproc/dict-mueller7/distinfo b/textproc/dict-mueller7/distinfo
new file mode 100644
index 00000000000..a36c61806a0
--- /dev/null
+++ b/textproc/dict-mueller7/distinfo
@@ -0,0 +1,5 @@
+$NetBSD: distinfo,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $
+
+SHA1 (Mueller7GPL.tgz) = 5d2566e4a312d21c2cf4231771f4e405d54affc1
+RMD160 (Mueller7GPL.tgz) = 1d1a674313d871482f326a898e3d1189e461baf2
+Size (Mueller7GPL.tgz) = 2386052 bytes
diff --git a/textproc/dict-mueller7/files/mueller2utf8 b/textproc/dict-mueller7/files/mueller2utf8
new file mode 100755
index 00000000000..eb27dc9d7c4
--- /dev/null
+++ b/textproc/dict-mueller7/files/mueller2utf8
@@ -0,0 +1,125 @@
+#!/usr/bin/perl
+# Written by Alexey Dyachenko <alexd@altlinux.ru> (c) 2002
+# Licensed under GNU GPL
+
+ require Unicode::Map8;
+ my $koi_map = Unicode::Map8->new("koi8-r")    || die;
+
+ my $ipa_map = Unicode::Map8->new();
+
+ $ipa_map->default_to16( 0x20 );
+
+$ipa_map->addpair( 0x61,  0x61 );
+$ipa_map->addpair( 0x62,  0x62 );
+$ipa_map->addpair( 0x63,  0x63 );
+$ipa_map->addpair( 0x64,  0x64 );
+$ipa_map->addpair( 0x65,  0x65 );
+$ipa_map->addpair( 0x66,  0x66 );
+$ipa_map->addpair( 0x67,  0x67 );
+$ipa_map->addpair( 0x68,  0x68 );
+$ipa_map->addpair( 0x69,  0x69 );
+$ipa_map->addpair( 0x6a,  0x6a );
+$ipa_map->addpair( 0x6b,  0x6b );
+$ipa_map->addpair( 0x6c,  0x6c );
+$ipa_map->addpair( 0x6d,  0x6d );
+$ipa_map->addpair( 0x6e,  0x6e );
+$ipa_map->addpair( 0x6f,  0x6f );
+$ipa_map->addpair( 0x70,  0x70 );
+$ipa_map->addpair( 0x71,  0x71 );
+$ipa_map->addpair( 0x72,  0x72 );
+$ipa_map->addpair( 0x73,  0x73 );
+$ipa_map->addpair( 0x74,  0x74 );
+$ipa_map->addpair( 0x75,  0x75 );
+$ipa_map->addpair( 0x76,  0x76 );
+$ipa_map->addpair( 0x77,  0x77 );
+$ipa_map->addpair( 0x78,  0x78 );
+$ipa_map->addpair( 0x79,  0x79 );
+$ipa_map->addpair( 0x7a,  0x7a );
+
+$ipa_map->addpair( 0x2d,  0x2e );
+$ipa_map->addpair( 0x2e,  0x2e );
+
+
+$ipa_map->addpair( 0x54,  0x03b8 );
+$ipa_map->addpair( 0x53,  0x0283 );
+$ipa_map->addpair( 0x4e,  0x014b );
+$ipa_map->addpair( 0xd7,  0x02a7 );
+$ipa_map->addpair( 0x44,  0x00F0 );
+
+$ipa_map->addpair( 0x5a,  0x0292 );
+
+$ipa_map->addpair( 0x51,  0x00e6 );
+$ipa_map->addpair( 0x49,  0x0131 );
+$ipa_map->addpair( 0xc3,  0x028c );
+$ipa_map->addpair( 0x55,  0x028a );
+$ipa_map->addpair( 0x81,  0x0252 );
+$ipa_map->addpair( 0x45,  0x025b );
+$ipa_map->addpair( 0xab,  0x0259 );
+
+$ipa_map->addpair( 0x41,  0x0251 );
+$ipa_map->addpair( 0xce,  0x025c );
+$ipa_map->addpair( 0x8d,  0x0254 );
+
+$ipa_map->addpair( 0xc7,  0x02cc );
+$ipa_map->addpair( 0xc8,  0x02c8 );
+
+$ipa_map->addpair( 0xf9,  0x02d0 );
+$ipa_map->addpair( 0x3e,  0x02d1 );
+
+# dump_map( $ipa_map );
+#
+# exit;
+
+sub to_utf {
+	return $koi_map->tou($_[0])->utf8;
+}
+
+sub ipa_code {
+	return $ipa_map->tou($_[0])->utf8;
+}
+
+
+while( <> ) {
+	if( /\[\S+\]/ ) {
+		if( /(.*)  (.*)/ ){
+			my $leftpart = $1;
+			my $rightpart = $2;
+			print to_utf($leftpart)."  ";
+			if( substr( $rightpart, 0, 1 ) != "[" ) {
+				if( $rightpart =~ /([^[]+)/ ){
+					print to_utf($1);
+				}
+			};
+
+			while( $rightpart =~ /\[([^]]+)\]([^[]*)/g ){
+				print "[".ipa_code($1)."]".to_utf($2);
+			};
+			
+			print "\n";
+		}else{
+			print to_utf($_);
+		};
+	}else{
+		print to_utf($_);
+	}
+};
+
+sub dump_map
+{
+  my $m = shift;
+  for (my $i = 0; $i < 256; $i++) {
+    my $u = $m->to_char16($i);
+    next if $u == NOCHAR;
+    printf "0x%02X 0x%04X\n", $i, $u;
+  }
+  for (my $block = 0; $block < 256; $block++) {
+    next if $m->_empty_block($block);
+    print "# BLOCK $block\n";
+    for (my $i = 0; $i < 256; $i++) {
+      my $u = $block*256 + $i;
+      my $c = $m->to_char8($u);
+      next if $c == NOCHAR;
+      printf "0x%04X 0x%02X\n", $u, $c;
+    }
+  }
+}
diff --git a/textproc/dict-mueller7/files/to-dict b/textproc/dict-mueller7/files/to-dict
new file mode 100755
index 00000000000..b77d3dc7640
--- /dev/null
+++ b/textproc/dict-mueller7/files/to-dict
@@ -0,0 +1,194 @@
+#!/bin/sh
+# A shell script for conversion of MOVA MuellerXX.koi dictionaries 
+# into DICT format. 
+# Written by Andrew Comech <comech@math.sunysb.edu>
+# GNU GPL (2000)
+# The latest version is available from
+# http://www.math.sunysb.edu/~comech/tools/to-dict
+
+version="0.1"
+versiondate="November 11, 2000"
+
+# We need the following binaries:
+DICTFMT=`which dictfmt`
+DICTZIP=`which dictzip`
+
+INFO () {
+  echo "
+to-dict, version $version ($versiondate).
+Conversion of MOVA MuellerXX.koi dictionaries into DICT format.
+Written by Andrew Comech <comech@math.sunysb.edu>. GNU GPL (2000)
+
+The latest version is available from
+http://www.math.sunysb.edu/~comech/tools/to-dict
+"
+}
+
+REQUIREMENTS () {
+  echo "
+REQUIREMENTS: you need the binaries \`dictfmt' and \`dictzip'. 
+
+dictzip.c can be found in dictd-1.5.0.tar.gz (or later version) at
+ftp://ftp.cs.unc.edu/pub/users/faith/dict/
+
+dictfmt.c can be found in Debian/GNU Linux package dict-elements at
+ftp://ftp.debian.org/debian/dists/potato/main/source/text/
+
+Compiled binaries (dictfmt and dictzip) could be downloaded from
+http://www.wh9.tu-dresden.de/~heinrich/dict/dict_leo_ftp/static-binaries/
+or
+http://iris.ltas.ulg.ac.be/download/apps/dict/
+"
+}
+
+USAGE () {
+    echo "
+USAGE: 
+ -version: show version
+ -h, --help, or no parameters: show this help
+
+(*) To make DICT database from Mueller7GPL.koi available from
+http://www.chat.ru/~mueller_dic/Mueller7GPL.tgz
+
+# Remove transcription:
+./to-dict --no-trans Mueller7GPL.koi mueller7.notr
+# Convert <source> into <data> (a file with %h, %d-headers):
+./to-dict --src-data mueller7.notr mueller7.data && rm -i mueller7.notr
+# Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
+./to-dict --data-dict mueller7.data mueller7 && rm -i mueller7.data
+# Expand index file (to be able to access lines like \"A, a\" by \"A\" and \"a\"):
+./to-dict --expand-index mueller7.index mueller7.index.exp
+# Install a new dictionary with expanded index (RUN AS ROOT).
+# The location of files may depend on your distribution!!!
+cp mueller7.dict.dz /usr/share/dictd/mueller7.dict.dz
+cp mueller7.index.exp /usr/share/dictd/mueller7.index
+dictdconfig -w && (killall dictd; dictd)
+
+(*) To make DICT database from Mueller24.koi available from 
+http://www.chat.ru/~mueller_dic/Mueller24.tgz (this one is preferred)
+
+# Convert <source> into <data> (a file with %h, %d):
+./to-dict --src-data Mueller24.koi mueller24.data
+# Convert <data> into DICT-format (files <name>.dict.dz and <name>.index):
+./to-dict --data-dict mueller24.data mueller24 && rm -i mueller24.data
+# Install a new dictionary with expanded index (RUN AS ROOT).
+# The location of files may depend on your distribution!!!
+cp mueller24.dict.dz /usr/share/dictd/mueller24.dict.dz
+cp mueller24.index /usr/share/dictd/mueller24.index
+dictdconfig -w && (killall dictd; dictd)
+
+(*) To re-convert <dict> into <data> (a file with %h, %d-headers):
+
+./to-dict --dict-data <dict> <data>
+
+ *************************************************************
+    !!WARNING!!    !!WARNING!!    !!WARNING!!    !!WARNING!!   
+
+ Temporary files created by this script occupy a lot of drive space!
+ 15 MB for Mueller7GPL.koi (have to strip off transcription first)
+ 12 MB for Mueller24.koi
+ *************************************************************
+"
+}
+
+# To remove the transcription except for [r] and [ju:] which found in the text. 
+# This procedure should not change Mueller24.koi if applied to it.
+NO_TRANS () {
+sed 's/���\ \[ju�\]/���\ "ju:"/; s/\[l\],/"l",/g; s/\[r\]/"r"/g; s/\], \[/A/g; s/\]\; _��\. \[/A/g; s/\]\; _pl\. \[/A/g; s/\[[^]]*\]\ (����.. ����.). \[[^]]*\] (������������[^)]*)\ //g; s/\[[^]]*\]\ //g; s/\[[^]]*\],\ //; s/\ \[[^]]*\],/,/g; s/\ \[[^]]*\])/)/g; s/\ \[[^]]*\]:/:/g; s/\ \[[^]]*\];/;/g; s/\ \[[^]]*\]$//g; s/���\ "ju:"/���\ \[ju�\]/g; s/"l"/\[l\]/g; s/"r"/\[r\]/g '
+}
+
+# Strip the copyright/info
+STRIP () {
+sed -n '/^_[aA]/,$p'
+}
+
+# Format the file
+MK_DATA () {
+sed 's/$/\
+/g; s/[^]]*\ \ /%h&\
+%d/; s/_[IVX][IVX]* /\
+ &/g; s/ [1-9]\. /\
+  &/g; s/[1-9][0-9]*>/\
+      &/g; s/[�������������������������������]>/(&>/g; s/>>/)/g; s/\ \_[AISE][a-z]*:/\
+  &/g; s/>/:/g'\
+|sed ' s/%d$/%z/; s/%d/%d\
+   / ; s/%z/%d/; s/%h/%h / '  \
+| gfmt -s -w 74;}
+
+########################################################################
+
+if [ "$1" = "-version" ]; then 
+    INFO
+    exit 0
+fi
+
+if [ "$#" = 0 -o "$1" = "-h" -o "$1" = "--help" -o "$1" = "-help" ]; then 
+    USAGE
+    exit 0
+fi
+
+if [ "$#" != 3 ]; then 
+    USAGE; exit 1;
+fi
+
+## Will not go further if there are no dictfmt and dictzip binaries:
+if [ "$DICTFMT" = "" -o "$DICTZIP" = "" ]; then
+    REQUIREMENTS
+    exit 1
+fi
+##
+
+if [ ! -f "$2" ]; then
+    echo "No input file: $2"; USAGE; exit 1
+fi
+
+case $1 in
+    "--no-trans")
+	echo  "Removing transcription ($2 -> $3)..";
+	cat $2 | NO_TRANS > $3 || exit 1
+	echo "."; exit 0
+	;;
+    "--src-data")
+	echo "Writing the header of $3.."
+	echo -e "%h 00-database-info\n%d" > $3
+	cat $2 | sed -n '1p' | sed 's/^/  /' | gfmt -s -w 74 >> $3;
+	cat $2 | sed -n '/^_/,/_��.  Japan ��������/p' | sed 's/^/  /' | gfmt -s -w 74 >> $3;
+	echo "" >> $3
+	echo "Formatting data ($2 -> $3).."
+	cat $2 | sed -n '/^_[aA]/,$p' | MK_DATA >> $3 || exit 1
+	echo "."; exit 0
+	;;
+    "--data-dict")
+	TITLE="Mueller English-Russian Dictionary"
+	echo "dictfmt: $2 -> $3.dict and $3.index.."
+	dictfmt -p -u "http://www.chat.ru/~mueller_dic" \
+	    -s "$TITLE" $3 < $2 || exit 1
+	echo "Compressing $3.dict.."; dictzip $3.dict || exit 1
+#	echo -n "Restarting daemons"; killall dictd; dictd
+	echo "."; exit 0
+	;;
+    "--expand-index")
+# So that the line
+# ``whisky, whiskey   a sort of spirit I like''
+# could be found not only by /usr/bin/dict "whisky, whiskey", but also by 
+# /usr/bin/dict "whisky" and /usr/bin/dict "whiskey"
+	cat $2 | sed 's/^[^,]*, [^,]*/%TAG1&\
+%REM2&\
+%TAG3&/; s/^%TAG1[^,]*, /&%REM1/; s/, %REM1[^'$'\t'']*//; s/%REM2[^,]*, //; s/%TAG[13]//g' > $3 || exit 1
+	exit 0
+	;;
+    "--dict-data")
+	if [ "` file $2 | grep  gzip`" != "0" ]; then
+	    CAT=zcat;
+	else
+	    CAT=cat;
+	fi
+	$CAT $2 | sed 's/^[^\ ].*/%h &\
+%d/; s/^[\ ][\ ]*/   /' >$3 || exit 1
+	echo "."; exit 0
+	;;
+    *) INFO; USAGE; exit 1
+esac
+
+echo "You are not supposed to be here."
+exit 1
author	cheusov <cheusov@pkgsrc.org>	2011-05-08 18:19:54 +0000
committer	cheusov <cheusov@pkgsrc.org>	2011-05-08 18:19:54 +0000
commit	af4d9d2d3f55bb9a55d30735b7463af0276fa81f (patch)
tree	11d28bf160d7f46f27ad3bf9f34197988e333dde /textproc/dict-mueller7
parent	43531543526aac558de9b206b42642f9ba698327 (diff)
download	pkgsrc-af4d9d2d3f55bb9a55d30735b7463af0276fa81f.tar.gz