From af4d9d2d3f55bb9a55d30735b7463af0276fa81f Mon Sep 17 00:00:00 2001 From: cheusov Date: Sun, 8 May 2011 18:19:54 +0000 Subject: This is the GPL'd 7th edition of the very well known English-Russian dictionary by V.K.Mueller compiled for dictd (dictionary protocol server). --- textproc/dict-mueller7/DESCR | 2 + textproc/dict-mueller7/Makefile | 46 +++++++ textproc/dict-mueller7/PLIST | 3 + textproc/dict-mueller7/distinfo | 5 + textproc/dict-mueller7/files/mueller2utf8 | 125 +++++++++++++++++++ textproc/dict-mueller7/files/to-dict | 194 ++++++++++++++++++++++++++++++ 6 files changed, 375 insertions(+) create mode 100644 textproc/dict-mueller7/DESCR create mode 100644 textproc/dict-mueller7/Makefile create mode 100644 textproc/dict-mueller7/PLIST create mode 100644 textproc/dict-mueller7/distinfo create mode 100755 textproc/dict-mueller7/files/mueller2utf8 create mode 100755 textproc/dict-mueller7/files/to-dict (limited to 'textproc') diff --git a/textproc/dict-mueller7/DESCR b/textproc/dict-mueller7/DESCR new file mode 100644 index 00000000000..11b17ff0c02 --- /dev/null +++ b/textproc/dict-mueller7/DESCR @@ -0,0 +1,2 @@ +This is the GPL'd 7th edition of the very well known English-Russian +dictionary by V.K.Mueller compiled for dictd (dictionary protocol server). diff --git a/textproc/dict-mueller7/Makefile b/textproc/dict-mueller7/Makefile new file mode 100644 index 00000000000..180a83d9828 --- /dev/null +++ b/textproc/dict-mueller7/Makefile @@ -0,0 +1,46 @@ +# $NetBSD: Makefile,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $ + +DISTNAME= Mueller7GPL +PKGNAME= dict-mueller7-1.2 +CATEGORIES= textproc +MASTER_SITES= http://dict.osdn.org.ua/ \ + http://mova.org/~cheusov/pub/dict-mueller/ +EXTRACT_SUFX= .tgz + +MAINTAINER= cheusov@tut.by +HOMEPAGE= http://mueller-dic.chat.ru/ +COMMENT= English-Russian dictionary by Mueller for dictd +LICENSE= gnu-gpl-v2 + +DEPENDS+= dict-server>=1.9.14:../../textproc/dict-server + +BUILD_DEPENDS+= dict-server>=1.10.2:../../textproc/dict-server +BUILD_DEPENDS+= p5-Unicode-Map8-[0-9]*:../../converters/p5-Unicode-Map8 +BUILD_DEPENDS+= coreutils-[0-9]*:../../sysutils/coreutils # we need GNU fmt + +WRKSRC= ${WRKDIR}/usr/local/share/dict + +PKG_DESTDIR_SUPPORT= user-destdir + +USE_TOOLS+= gsed perl tar gzip + +INSTALLATION_DIRS+= share/dictd + +pre-configure: + cp files/mueller2utf8 files/to-dict ${WRKSRC} + +do-build: + cd ${WRKSRC} && \ + sh to-dict --src-data Mueller7GPL.koi mueller7.data > /dev/null && \ + perl mueller2utf8 < mueller7.data > tmp_1 && \ + ${PREFIX}/bin/dictfmt --utf8 -p --columns 0 \ + -s 'Mueller English-Russian Dictionary' \ + -u 'http://www.chat.ru/~mueller_dic' \ + --headword-separator ', ' mueller7 < tmp_1 && \ + ${PREFIX}/bin/dictzip *.dict + +do-install: + ${INSTALL_DATA} ${WRKSRC}/*.dict.dz ${WRKSRC}/*.index \ + ${DESTDIR}${PREFIX}/share/dictd + +.include "../../mk/bsd.pkg.mk" diff --git a/textproc/dict-mueller7/PLIST b/textproc/dict-mueller7/PLIST new file mode 100644 index 00000000000..fe12897cb0d --- /dev/null +++ b/textproc/dict-mueller7/PLIST @@ -0,0 +1,3 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $ +share/dictd/mueller7.dict.dz +share/dictd/mueller7.index diff --git a/textproc/dict-mueller7/distinfo b/textproc/dict-mueller7/distinfo new file mode 100644 index 00000000000..a36c61806a0 --- /dev/null +++ b/textproc/dict-mueller7/distinfo @@ -0,0 +1,5 @@ +$NetBSD: distinfo,v 1.1.1.1 2011/05/08 18:19:54 cheusov Exp $ + +SHA1 (Mueller7GPL.tgz) = 5d2566e4a312d21c2cf4231771f4e405d54affc1 +RMD160 (Mueller7GPL.tgz) = 1d1a674313d871482f326a898e3d1189e461baf2 +Size (Mueller7GPL.tgz) = 2386052 bytes diff --git a/textproc/dict-mueller7/files/mueller2utf8 b/textproc/dict-mueller7/files/mueller2utf8 new file mode 100755 index 00000000000..eb27dc9d7c4 --- /dev/null +++ b/textproc/dict-mueller7/files/mueller2utf8 @@ -0,0 +1,125 @@ +#!/usr/bin/perl +# Written by Alexey Dyachenko (c) 2002 +# Licensed under GNU GPL + + require Unicode::Map8; + my $koi_map = Unicode::Map8->new("koi8-r") || die; + + my $ipa_map = Unicode::Map8->new(); + + $ipa_map->default_to16( 0x20 ); + +$ipa_map->addpair( 0x61, 0x61 ); +$ipa_map->addpair( 0x62, 0x62 ); +$ipa_map->addpair( 0x63, 0x63 ); +$ipa_map->addpair( 0x64, 0x64 ); +$ipa_map->addpair( 0x65, 0x65 ); +$ipa_map->addpair( 0x66, 0x66 ); +$ipa_map->addpair( 0x67, 0x67 ); +$ipa_map->addpair( 0x68, 0x68 ); +$ipa_map->addpair( 0x69, 0x69 ); +$ipa_map->addpair( 0x6a, 0x6a ); +$ipa_map->addpair( 0x6b, 0x6b ); +$ipa_map->addpair( 0x6c, 0x6c ); +$ipa_map->addpair( 0x6d, 0x6d ); +$ipa_map->addpair( 0x6e, 0x6e ); +$ipa_map->addpair( 0x6f, 0x6f ); +$ipa_map->addpair( 0x70, 0x70 ); +$ipa_map->addpair( 0x71, 0x71 ); +$ipa_map->addpair( 0x72, 0x72 ); +$ipa_map->addpair( 0x73, 0x73 ); +$ipa_map->addpair( 0x74, 0x74 ); +$ipa_map->addpair( 0x75, 0x75 ); +$ipa_map->addpair( 0x76, 0x76 ); +$ipa_map->addpair( 0x77, 0x77 ); +$ipa_map->addpair( 0x78, 0x78 ); +$ipa_map->addpair( 0x79, 0x79 ); +$ipa_map->addpair( 0x7a, 0x7a ); + +$ipa_map->addpair( 0x2d, 0x2e ); +$ipa_map->addpair( 0x2e, 0x2e ); + + +$ipa_map->addpair( 0x54, 0x03b8 ); +$ipa_map->addpair( 0x53, 0x0283 ); +$ipa_map->addpair( 0x4e, 0x014b ); +$ipa_map->addpair( 0xd7, 0x02a7 ); +$ipa_map->addpair( 0x44, 0x00F0 ); + +$ipa_map->addpair( 0x5a, 0x0292 ); + +$ipa_map->addpair( 0x51, 0x00e6 ); +$ipa_map->addpair( 0x49, 0x0131 ); +$ipa_map->addpair( 0xc3, 0x028c ); +$ipa_map->addpair( 0x55, 0x028a ); +$ipa_map->addpair( 0x81, 0x0252 ); +$ipa_map->addpair( 0x45, 0x025b ); +$ipa_map->addpair( 0xab, 0x0259 ); + +$ipa_map->addpair( 0x41, 0x0251 ); +$ipa_map->addpair( 0xce, 0x025c ); +$ipa_map->addpair( 0x8d, 0x0254 ); + +$ipa_map->addpair( 0xc7, 0x02cc ); +$ipa_map->addpair( 0xc8, 0x02c8 ); + +$ipa_map->addpair( 0xf9, 0x02d0 ); +$ipa_map->addpair( 0x3e, 0x02d1 ); + +# dump_map( $ipa_map ); +# +# exit; + +sub to_utf { + return $koi_map->tou($_[0])->utf8; +} + +sub ipa_code { + return $ipa_map->tou($_[0])->utf8; +} + + +while( <> ) { + if( /\[\S+\]/ ) { + if( /(.*) (.*)/ ){ + my $leftpart = $1; + my $rightpart = $2; + print to_utf($leftpart)." "; + if( substr( $rightpart, 0, 1 ) != "[" ) { + if( $rightpart =~ /([^[]+)/ ){ + print to_utf($1); + } + }; + + while( $rightpart =~ /\[([^]]+)\]([^[]*)/g ){ + print "[".ipa_code($1)."]".to_utf($2); + }; + + print "\n"; + }else{ + print to_utf($_); + }; + }else{ + print to_utf($_); + } +}; + +sub dump_map +{ + my $m = shift; + for (my $i = 0; $i < 256; $i++) { + my $u = $m->to_char16($i); + next if $u == NOCHAR; + printf "0x%02X 0x%04X\n", $i, $u; + } + for (my $block = 0; $block < 256; $block++) { + next if $m->_empty_block($block); + print "# BLOCK $block\n"; + for (my $i = 0; $i < 256; $i++) { + my $u = $block*256 + $i; + my $c = $m->to_char8($u); + next if $c == NOCHAR; + printf "0x%04X 0x%02X\n", $u, $c; + } + } +} diff --git a/textproc/dict-mueller7/files/to-dict b/textproc/dict-mueller7/files/to-dict new file mode 100755 index 00000000000..b77d3dc7640 --- /dev/null +++ b/textproc/dict-mueller7/files/to-dict @@ -0,0 +1,194 @@ +#!/bin/sh +# A shell script for conversion of MOVA MuellerXX.koi dictionaries +# into DICT format. +# Written by Andrew Comech +# GNU GPL (2000) +# The latest version is available from +# http://www.math.sunysb.edu/~comech/tools/to-dict + +version="0.1" +versiondate="November 11, 2000" + +# We need the following binaries: +DICTFMT=`which dictfmt` +DICTZIP=`which dictzip` + +INFO () { + echo " +to-dict, version $version ($versiondate). +Conversion of MOVA MuellerXX.koi dictionaries into DICT format. +Written by Andrew Comech . GNU GPL (2000) + +The latest version is available from +http://www.math.sunysb.edu/~comech/tools/to-dict +" +} + +REQUIREMENTS () { + echo " +REQUIREMENTS: you need the binaries \`dictfmt' and \`dictzip'. + +dictzip.c can be found in dictd-1.5.0.tar.gz (or later version) at +ftp://ftp.cs.unc.edu/pub/users/faith/dict/ + +dictfmt.c can be found in Debian/GNU Linux package dict-elements at +ftp://ftp.debian.org/debian/dists/potato/main/source/text/ + +Compiled binaries (dictfmt and dictzip) could be downloaded from +http://www.wh9.tu-dresden.de/~heinrich/dict/dict_leo_ftp/static-binaries/ +or +http://iris.ltas.ulg.ac.be/download/apps/dict/ +" +} + +USAGE () { + echo " +USAGE: + -version: show version + -h, --help, or no parameters: show this help + +(*) To make DICT database from Mueller7GPL.koi available from +http://www.chat.ru/~mueller_dic/Mueller7GPL.tgz + +# Remove transcription: +./to-dict --no-trans Mueller7GPL.koi mueller7.notr +# Convert into (a file with %h, %d-headers): +./to-dict --src-data mueller7.notr mueller7.data && rm -i mueller7.notr +# Convert into DICT-format (files .dict.dz and .index): +./to-dict --data-dict mueller7.data mueller7 && rm -i mueller7.data +# Expand index file (to be able to access lines like \"A, a\" by \"A\" and \"a\"): +./to-dict --expand-index mueller7.index mueller7.index.exp +# Install a new dictionary with expanded index (RUN AS ROOT). +# The location of files may depend on your distribution!!! +cp mueller7.dict.dz /usr/share/dictd/mueller7.dict.dz +cp mueller7.index.exp /usr/share/dictd/mueller7.index +dictdconfig -w && (killall dictd; dictd) + +(*) To make DICT database from Mueller24.koi available from +http://www.chat.ru/~mueller_dic/Mueller24.tgz (this one is preferred) + +# Convert into (a file with %h, %d): +./to-dict --src-data Mueller24.koi mueller24.data +# Convert into DICT-format (files .dict.dz and .index): +./to-dict --data-dict mueller24.data mueller24 && rm -i mueller24.data +# Install a new dictionary with expanded index (RUN AS ROOT). +# The location of files may depend on your distribution!!! +cp mueller24.dict.dz /usr/share/dictd/mueller24.dict.dz +cp mueller24.index /usr/share/dictd/mueller24.index +dictdconfig -w && (killall dictd; dictd) + +(*) To re-convert into (a file with %h, %d-headers): + +./to-dict --dict-data + + ************************************************************* + !!WARNING!! !!WARNING!! !!WARNING!! !!WARNING!! + + Temporary files created by this script occupy a lot of drive space! + 15 MB for Mueller7GPL.koi (have to strip off transcription first) + 12 MB for Mueller24.koi + ************************************************************* +" +} + +# To remove the transcription except for [r] and [ju:] which found in the text. +# This procedure should not change Mueller24.koi if applied to it. +NO_TRANS () { +sed 's/ËÁË\ \[juù\]/ËÁË\ "ju:"/; s/\[l\],/"l",/g; s/\[r\]/"r"/g; s/\], \[/A/g; s/\]\; _ÁÍ\. \[/A/g; s/\]\; _pl\. \[/A/g; s/\[[^]]*\]\ (ÐÏÌÎ.. ÆÏÒÍ.). \[[^]]*\] (ÒÅÄÕÃÉÒÏ×ÁÎÎ[^)]*)\ //g; s/\[[^]]*\]\ //g; s/\[[^]]*\],\ //; s/\ \[[^]]*\],/,/g; s/\ \[[^]]*\])/)/g; s/\ \[[^]]*\]:/:/g; s/\ \[[^]]*\];/;/g; s/\ \[[^]]*\]$//g; s/ËÁË\ "ju:"/ËÁË\ \[juù\]/g; s/"l"/\[l\]/g; s/"r"/\[r\]/g ' +} + +# Strip the copyright/info +STRIP () { +sed -n '/^_[aA]/,$p' +} + +# Format the file +MK_DATA () { +sed 's/$/\ +/g; s/[^]]*\ \ /%h&\ +%d/; s/_[IVX][IVX]* /\ + &/g; s/ [1-9]\. /\ + &/g; s/[1-9][0-9]*>/\ + &/g; s/[ÁÂ×ÇÄÅÖÚÉËÌÍÎÏÐÒÓÔÕÆÞÃÞÛÈÝßØÜÀÑ]>/(&>/g; s/>>/)/g; s/\ \_[AISE][a-z]*:/\ + &/g; s/>/:/g'\ +|sed ' s/%d$/%z/; s/%d/%d\ + / ; s/%z/%d/; s/%h/%h / ' \ +| gfmt -s -w 74;} + +######################################################################## + +if [ "$1" = "-version" ]; then + INFO + exit 0 +fi + +if [ "$#" = 0 -o "$1" = "-h" -o "$1" = "--help" -o "$1" = "-help" ]; then + USAGE + exit 0 +fi + +if [ "$#" != 3 ]; then + USAGE; exit 1; +fi + +## Will not go further if there are no dictfmt and dictzip binaries: +if [ "$DICTFMT" = "" -o "$DICTZIP" = "" ]; then + REQUIREMENTS + exit 1 +fi +## + +if [ ! -f "$2" ]; then + echo "No input file: $2"; USAGE; exit 1 +fi + +case $1 in + "--no-trans") + echo "Removing transcription ($2 -> $3).."; + cat $2 | NO_TRANS > $3 || exit 1 + echo "."; exit 0 + ;; + "--src-data") + echo "Writing the header of $3.." + echo -e "%h 00-database-info\n%d" > $3 + cat $2 | sed -n '1p' | sed 's/^/ /' | gfmt -s -w 74 >> $3; + cat $2 | sed -n '/^_/,/_ÑÐ. Japan ÑÐÏÎÓËÉÊ/p' | sed 's/^/ /' | gfmt -s -w 74 >> $3; + echo "" >> $3 + echo "Formatting data ($2 -> $3).." + cat $2 | sed -n '/^_[aA]/,$p' | MK_DATA >> $3 || exit 1 + echo "."; exit 0 + ;; + "--data-dict") + TITLE="Mueller English-Russian Dictionary" + echo "dictfmt: $2 -> $3.dict and $3.index.." + dictfmt -p -u "http://www.chat.ru/~mueller_dic" \ + -s "$TITLE" $3 < $2 || exit 1 + echo "Compressing $3.dict.."; dictzip $3.dict || exit 1 +# echo -n "Restarting daemons"; killall dictd; dictd + echo "."; exit 0 + ;; + "--expand-index") +# So that the line +# ``whisky, whiskey a sort of spirit I like'' +# could be found not only by /usr/bin/dict "whisky, whiskey", but also by +# /usr/bin/dict "whisky" and /usr/bin/dict "whiskey" + cat $2 | sed 's/^[^,]*, [^,]*/%TAG1&\ +%REM2&\ +%TAG3&/; s/^%TAG1[^,]*, /&%REM1/; s/, %REM1[^'$'\t'']*//; s/%REM2[^,]*, //; s/%TAG[13]//g' > $3 || exit 1 + exit 0 + ;; + "--dict-data") + if [ "` file $2 | grep gzip`" != "0" ]; then + CAT=zcat; + else + CAT=cat; + fi + $CAT $2 | sed 's/^[^\ ].*/%h &\ +%d/; s/^[\ ][\ ]*/ /' >$3 || exit 1 + echo "."; exit 0 + ;; + *) INFO; USAGE; exit 1 +esac + +echo "You are not supposed to be here." +exit 1 -- cgit v1.2.3