Add 'nixpkgs/' from commit 'abf060725d7614bd3b9f96764262dfbc2f9c2199'

git-subtree-dir: nixpkgs git-subtree-mainline: 4e31070265257dc67d120c27e0f75c2344fdfa9a git-subtree-split: abf060725d7614bd3b9f96764262dfbc2f9c2199
author: Alyssa Ross <hi@alyssa.is> 2019-01-07 02:18:36 +0000
committer: Alyssa Ross <hi@alyssa.is> 2019-01-07 02:18:47 +0000
commit: 36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2 (patch)
tree: b3faaf573407b32aa645237a4d16b82778a39a92 /nixpkgs/pkgs/servers/dict
parent: 4e31070265257dc67d120c27e0f75c2344fdfa9a (diff)
parent: abf060725d7614bd3b9f96764262dfbc2f9c2199 (diff)
download: nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar
nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.gz
nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.bz2
nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.lz
nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.xz
nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.zst
nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.zip
9 files changed, 1405 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/servers/dict/buildfix.diff b/nixpkgs/pkgs/servers/dict/buildfix.diff
new file mode 100644
index 000000000000..e30fcb2b0380
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/buildfix.diff
@@ -0,0 +1,11 @@
+--- Makefile.in~	2011-03-06 18:52:54.000000000 +0100
++++ Makefile.in	2014-01-29 19:04:51.384844897 +0100
+@@ -123,7 +123,7 @@
+ 
+ %: %.o
+ 	$(LIBTOOL) --tag=CC --mode=link $(CC) -o $@ -static \
+-		$^ $(OBJS) $(LDFLAGS) -lz ${LIBS}
++	    $(^:.o=.lo) $(OBJS) $(LDFLAGS) -lz ${LIBS}
+ 
+ include $(srcdir)/deps
+ 
diff --git a/nixpkgs/pkgs/servers/dict/default.nix b/nixpkgs/pkgs/servers/dict/default.nix
new file mode 100644
index 000000000000..bf9fd77df7c9
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/default.nix
@@ -0,0 +1,35 @@
+{ stdenv, fetchurl, which, bison, flex, libmaa, zlib, libtool }:
+
+stdenv.mkDerivation rec {
+  name = "dictd-${version}";
+  version = "1.12.1";
+
+  src = fetchurl {
+    url = "mirror://sourceforge/dict/dictd-${version}.tar.gz";
+    sha256 = "0min6v60b6z5mrymyjfwzx8nv6rdm8pd8phlwl6v2jl5vkngcdx2";
+  };
+
+  buildInputs = [ libmaa zlib ];
+
+  nativeBuildInputs = [ bison flex libtool which ];
+
+  # Makefile(.in) contains "clientparse.c clientparse.h: clientparse.y" which
+  # causes bison to run twice, and break the build when this happens in
+  # parallel.  Test with "make -j clientparse.c clientparse.h".  The error
+  # message may be "mv: cannot move 'y.tab.c' to 'clientparse.c'".
+  enableParallelBuilding = false;
+
+  patchPhase = "patch -p0 < ${./buildfix.diff}";
+  configureFlags = [
+    "--enable-dictorg"
+    "--datadir=/run/current-systems/sw/share/dictd"
+  ];
+
+  meta = with stdenv.lib; {
+    description = "Dict protocol server and client";
+    homepage    = http://www.dict.org;
+    license     = licenses.gpl2;
+    maintainers = with maintainers; [ ];
+    platforms   = platforms.linux;
+  };
+}
diff --git a/nixpkgs/pkgs/servers/dict/dictd-db-collector.nix b/nixpkgs/pkgs/servers/dict/dictd-db-collector.nix
new file mode 100644
index 000000000000..f635c98602d9
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/dictd-db-collector.nix
@@ -0,0 +1,80 @@
+{stdenv, lib, dict}:
+({dictlist, allowList ? ["127.0.0.1"], denyList ? []}:
+/*
+ dictlist is a list of form 
+ [ { filename = /path/to/files/basename;
+ name = "name"; } ]
+ basename.dict.dz and basename.index should be 
+ dict files. Or look below for other options.
+ allowList is a list of IP/domain *-wildcarded strings
+ denyList is the same..
+*/
+
+let
+	link_arguments = map 
+			(x: '' "${x.filename}" '')
+			dictlist; 
+	databases = lib.concatStrings (map (x : 
+		"${x.name}	${x.filename}\n") dictlist);
+	allow = lib.concatStrings (map (x: "allow ${x}\n") allowList);
+	deny = lib.concatStrings (map (x: "deny ${x}\n") denyList);
+	accessSection = "
+		access {
+			${allow}
+			${deny}
+		}
+	";
+	installPhase = ''  
+  	mkdir -p $out/share/dictd
+	cd $out/share/dictd
+	echo "${databases}" >databases.names 
+	echo "${accessSection}" > dictd.conf
+	for j in ${toString link_arguments}; do 
+		name="$(egrep '	'"$j"\$ databases.names)"
+		name=''${name%	$j}
+		if test -d "$j"; then
+			if test -d "$j"/share/dictd ; then
+				echo "Got store path $j"
+				j="$j"/share/dictd 
+			fi
+			echo "Directory reference: $j"
+			i=$(ls "$j""/"*.index)
+			i="''${i%.index}";
+		else
+			i="$j";
+		fi
+		echo "Basename is $i"
+		locale=$(cat "$(dirname "$i")"/locale)
+		base="$(basename "$i")"
+		echo "Locale is $locale"
+		export LC_ALL=$locale 
+		export LANG=$locale 
+		if test -e "$i".dict.dz; then
+			ln -s "$i".dict.dz
+		else
+			cp "$i".dict .
+			dictzip "$base".dict
+		fi
+		ln -s "$i".index .
+		dictfmt_index2word --locale $locale < "$base".index > "$base".word || true
+		dictfmt_index2suffix --locale $locale < "$base".index > "$base".suffix || true
+
+		echo "database $name {" >> dictd.conf
+		echo "  data $out/share/dictd/$base.dict.dz" >> dictd.conf
+		echo "  index $out/share/dictd/$base.index" >> dictd.conf
+		echo "  index_word $out/share/dictd/$base.word" >> dictd.conf
+		echo "  index_suffix $out/share/dictd/$base.suffix" >> dictd.conf
+		echo "}" >> dictd.conf
+	done
+  	'';
+
+in
+
+stdenv.mkDerivation {
+  name = "dictd-dbs";
+
+  phases = ["installPhase"];
+  buildInputs = [dict];
+
+  inherit installPhase;
+})
diff --git a/nixpkgs/pkgs/servers/dict/dictd-db.nix b/nixpkgs/pkgs/servers/dict/dictd-db.nix
new file mode 100644
index 000000000000..065218a5b95d
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/dictd-db.nix
@@ -0,0 +1,95 @@
+{ stdenv, fetchurl, callPackage }:
+
+let
+ # Probably a bug in some FreeDict release files, but easier to trivially
+ # work around than report. Not that it can cause any other problems..
+ makeDictdDBFreedict = src: name: locale:
+   makeDictdDB src name "{.,bin}" locale;
+
+ makeDictdDB = src: _name: _subdir: _locale:
+   stdenv.mkDerivation rec {
+     name = "dictd-db-${_name}";
+     inherit src;
+     locale = _locale;
+     dbName = _name;
+     buildPhase = ":";
+     unpackPhase = ''
+       tar xf  ${src}
+     '';
+     installPhase = ''
+       mkdir -p $out/share/dictd
+       cp $(ls ./${_subdir}/*.{dict*,index} || true) $out/share/dictd
+       echo "${_locale}" >$out/share/dictd/locale
+     '';
+
+     meta = {
+       description = "dictd-db dictionary for dictd";
+       platforms = stdenv.lib.platforms.linux;
+     };
+   };
+in rec {
+  deu2eng = makeDictdDBFreedict (fetchurl {
+    url = mirror://sourceforge/freedict/deu-eng.tar.gz;
+    sha256 = "0dqrhv04g4f5s84nbgisgcfwk5x0rpincif0yfhfh4sc1bsvzsrb";
+  }) "deu-eng" "de_DE";
+  eng2deu = makeDictdDBFreedict (fetchurl {
+    url = mirror://sourceforge/freedict/eng-deu.tar.gz;
+    sha256 = "01x12p72sa3071iff3jhzga8588440f07zr56r3x98bspvdlz73r";
+  }) "eng-deu" "en_EN";
+  nld2eng = makeDictdDBFreedict (fetchurl {
+    url = mirror://sourceforge/freedict/nld-eng.tar.gz;
+    sha256 = "1vhw81pphb64fzsjvpzsnnyr34ka2fxizfwilnxyjcmpn9360h07";
+  }) "nld-eng" "nl_NL";
+  eng2nld =  makeDictdDBFreedict (fetchurl {
+    url = mirror://sourceforge/freedict/eng-nld.tar.gz;
+    sha256 = "0rcg28ldykv0w2mpxc6g4rqmfs33q7pbvf68ssy1q9gpf6mz7vcl";
+  }) "eng-nld" "en_UK";
+  eng2rus = makeDictdDBFreedict (fetchurl {
+    url = mirror://sourceforge/freedict/eng-rus.tar.gz;
+    sha256 = "15409ivhww1wsfjr05083pv6mg10bak8v5pg1wkiqybk7ck61rry";
+  }) "eng-rus" "en_UK";
+  fra2eng = makeDictdDBFreedict (fetchurl {
+    url = mirror://sourceforge/freedict/fra-eng.tar.gz;
+    sha256 = "0sdd88s2zs5whiwdf3hd0s4pzzv75sdsccsrm1wxc87l3hjm85z3";
+  }) "fra-eng" "fr_FR";
+  eng2fra = makeDictdDBFreedict (fetchurl {
+    url = mirror://sourceforge/freedict/eng-fra.tar.gz;
+    sha256 = "0fi6rrnbqnhc6lq8d0nmn30zdqkibrah0mxfg27hsn9z7alwbj3m";
+  }) "eng-fra" "en_UK";
+  mueller_eng2rus_pkg = makeDictdDB (fetchurl {
+    url = mirror://sourceforge/mueller-dict/mueller-dict-3.1.tar.gz;
+    sha256 = "04r5xxznvmcb8hkxqbjgfh2gxvbdd87jnhqn5gmgvxxw53zpwfmq";
+  }) "mueller-eng-rus" "mueller-dict-*/dict" "en_UK";
+  mueller_enru_abbr = {
+    outPath = "${mueller_eng2rus_pkg}/share/dictd/mueller-abbrev";
+    name = "mueller-abbr";
+    dbName = "mueller-abbr";
+    locale = "en_UK";
+  };
+  mueller_enru_base = {
+    outPath = "${mueller_eng2rus_pkg}/share/dictd/mueller-base";
+    name = "mueller-base";
+    dbName = "mueller-base";
+    locale = "en_UK";
+  };
+  mueller_enru_dict = {
+    outPath = "${mueller_eng2rus_pkg}/share/dictd/mueller-dict";
+    name = "mueller-dict";
+    dbName = "mueller-dict";
+    locale = "en_UK";
+  };
+  mueller_enru_geo = {
+    outPath = "${mueller_eng2rus_pkg}/share/dictd/mueller-geo";
+    name = "mueller-geo";
+    dbName = "mueller-geo";
+    locale = "en_UK";
+  };
+  mueller_enru_names = {
+    outPath = "${mueller_eng2rus_pkg}/share/dictd/mueller-names";
+    name = "mueller-names";
+    dbName = "mueller-names";
+    locale = "en_UK";
+  };
+  wordnet = callPackage ./dictd-wordnet.nix {};
+  wiktionary = callPackage ./dictd-wiktionary.nix {};
+}
diff --git a/nixpkgs/pkgs/servers/dict/dictd-wiktionary.nix b/nixpkgs/pkgs/servers/dict/dictd-wiktionary.nix
new file mode 100644
index 000000000000..13e4757fe89e
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/dictd-wiktionary.nix
@@ -0,0 +1,31 @@
+{stdenv, fetchurl, python, dict, glibcLocales, writeScript}:
+
+stdenv.mkDerivation rec {
+  version = "20161001";
+  name = "dict-db-wiktionary-${version}";
+  data = fetchurl {
+    url = "http://dumps.wikimedia.org/enwiktionary/${version}/enwiktionary-${version}-pages-articles.xml.bz2";
+    sha256 = "0g3k7kxp2nzg0v56i4cz253af3aqvhn1lwkys2fnam51cn3yqm7m";
+  };
+
+  convert = ./wiktionary2dict.py;
+  buildInputs = [python dict glibcLocales];
+
+  builder = writeScript "wiktionary-builder.sh" ''
+    source $stdenv/setup
+
+    mkdir -p $out/share/dictd/
+    cd $out/share/dictd
+
+    python -O ${convert} ${data}
+    dictzip wiktionary-en.dict
+    echo en_US.UTF-8 > locale
+  '';
+
+  meta = {
+    description = "DICT version of English Wiktionary";
+    homepage = http://en.wiktionary.org/;
+    maintainers = [ ];
+    platforms = stdenv.lib.platforms.all;
+  };
+}
diff --git a/nixpkgs/pkgs/servers/dict/dictd-wordnet.nix b/nixpkgs/pkgs/servers/dict/dictd-wordnet.nix
new file mode 100644
index 000000000000..8a1bb6313ad2
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/dictd-wordnet.nix
@@ -0,0 +1,36 @@
+{stdenv, python, wordnet, writeScript}:
+
+stdenv.mkDerivation rec {
+  version = "542";
+  name = "dict-db-wordnet-${version}";
+
+  buildInputs = [python wordnet];
+  convert = ./wordnet_structures.py;
+
+  builder = writeScript "builder.sh" ''
+    . ${stdenv}/setup
+    mkdir -p $out/share/dictd/
+    cd $out/share/dictd
+
+    for i in ${wordnet}/dict/data.*; do
+      DATA="$DATA `echo $i | sed -e s,data,index,` $i";
+    done
+
+    python ${convert} $DATA
+    echo en_US.UTF-8 > locale
+  '';
+
+  meta = {
+    description = "dictd-compatible version of WordNet";
+
+    longDescription =
+      '' WordNet® is a large lexical database of English. This package makes
+         the wordnet data available to dictd and by extension for lookup with
+         the dict command. '';
+
+    homepage = https://wordnet.princeton.edu/;
+
+    maintainers = [ ];
+    platforms = stdenv.lib.platforms.all;
+  };
+}
diff --git a/nixpkgs/pkgs/servers/dict/libmaa.nix b/nixpkgs/pkgs/servers/dict/libmaa.nix
new file mode 100644
index 000000000000..3c04a678def5
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/libmaa.nix
@@ -0,0 +1,20 @@
+{ stdenv, fetchurl, libtool }:
+
+stdenv.mkDerivation rec {
+  version = "1.3.2";
+  name = "libmaa-${version}";
+
+  src = fetchurl {
+    url = "mirror://sourceforge/dict/libmaa-${version}.tar.gz";
+    sha256 = "1idi4c30pi79g5qfl7rr9s17krbjbg93bi8f2qrbsdlh78ga19ar";
+  };
+
+  buildInputs = [ libtool ];
+  # configureFlags = [ "--datadir=/var/run/current-system/share/dictd" ];
+
+  meta = with stdenv.lib; {
+    description = "Dict protocol server and client";
+    maintainers = [ ];
+    platforms = platforms.linux;
+  };
+}
diff --git a/nixpkgs/pkgs/servers/dict/wiktionary2dict.py b/nixpkgs/pkgs/servers/dict/wiktionary2dict.py
new file mode 100644
index 000000000000..8a0210e7254f
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/wiktionary2dict.py
@@ -0,0 +1,778 @@
+# Adapted to produce DICT-compatible files by Petr Rockai in 2012
+# Based on code from wiktiondict by Greg Hewgill
+import re
+import sys
+import codecs
+import os
+import textwrap
+import time
+import xml.sax
+
+class Text:
+    def __init__(self, s):
+        self.s = s
+    def process(self):
+        return s
+
+class TemplateCall:
+    def __init__(self):
+        pass
+    def process(self):
+        pass
+
+class Template:
+    def __init__(self):
+        self.parts = []
+    def append(self, part):
+        self.parts.append(part)
+    def process(self):
+        return ''.join(x.process() for x in self.parts)
+
+class Whitespace:
+    def __init__(self, s):
+        self.s = s
+
+class OpenDouble: pass
+class OpenTriple: pass
+class CloseDouble: pass
+class CloseTriple: pass
+
+class Equals:
+    def __str__(self):
+        return "="
+
+class Delimiter:
+    def __init__(self, c):
+        self.c = c
+    def __str__(self):
+        return self.c
+
+def Tokenise(s):
+    s = unicode(s)
+    stack = []
+    last = 0
+    i = 0
+    while i < len(s):
+        if s[i] == '{' and i+1 < len(s) and s[i+1] == '{':
+            if i > last:
+                yield s[last:i]
+            if i+2 < len(s) and s[i+2] == '{':
+                yield OpenTriple()
+                stack.append(3)
+                i += 3
+            else:
+                yield OpenDouble()
+                stack.append(2)
+                i += 2
+            last = i
+        elif s[i] == '}' and i+1 < len(s) and s[i+1] == '}':
+            if i > last:
+                yield s[last:i]
+            if len(stack) == 0:
+                yield "}}"
+                i += 2
+            elif stack[-1] == 2:
+                yield CloseDouble()
+                i += 2
+                stack.pop()
+            elif i+2 < len(s) and s[i+2] == '}':
+                yield CloseTriple()
+                i += 3
+                stack.pop()
+            else:
+                raise SyntaxError()
+            last = i
+        elif s[i] == ':' or s[i] == '|':
+            if i > last:
+                yield s[last:i]
+            yield Delimiter(s[i])
+            i += 1
+            last = i
+        elif s[i] == '=':
+            if i > last:
+                yield s[last:i]
+            yield Equals()
+            i += 1
+            last = i
+        #elif s[i] == ' ' or s[i] == '\t' or s[i] == '\n':
+        #    if i > last:
+        #        yield s[last:i]
+        #    last = i
+        #    m = re.match(r"\s+", s[i:])
+        #    assert m
+        #    yield Whitespace(m.group(0))
+        #    i += len(m.group(0))
+        #    last = i
+        else:
+            i += 1
+    if i > last:
+        yield s[last:i]
+
+def processSub(templates, tokens, args):
+    t = tokens.next()
+    if not isinstance(t, unicode):
+        raise SyntaxError
+    name = t
+    t = tokens.next()
+    default = None
+    if isinstance(t, Delimiter) and t.c == '|':
+        default = ""
+        while True:
+            t = tokens.next()
+            if isinstance(t, unicode):
+                default += t
+            elif isinstance(t, OpenDouble):
+                default += processTemplateCall(templates, tokens, args)
+            elif isinstance(t, OpenTriple):
+                default += processSub(templates, tokens, args)
+            elif isinstance(t, CloseTriple):
+                break
+            else:
+                print "Unexpected:", t
+                raise SyntaxError()
+    if name in args:
+        return args[name]
+    if default is not None:
+        return default
+    if name == "lang":
+        return "en"
+    return "{{{%s}}}" % name
+
+def processTemplateCall(templates, tokens, args):
+    template = tokens.next().strip().lower()
+    args = {}
+    a = 1
+    t = tokens.next()
+    while True:
+        if isinstance(t, Delimiter):
+            name = unicode(a)
+            arg = ""
+            while True:
+                t = tokens.next()
+                if isinstance(t, unicode):
+                    arg += t
+                elif isinstance(t, OpenDouble):
+                    arg += processTemplateCall(templates, tokens, args)
+                elif isinstance(t, OpenTriple):
+                    arg += processSub(templates, tokens, args)
+                elif isinstance(t, Delimiter) and t.c != '|':
+                    arg += str(t)
+                else:
+                    break
+            if isinstance(t, Equals):
+                name = arg.strip()
+                arg = ""
+                while True:
+                    t = tokens.next()
+                    if isinstance(t, (unicode, Equals)):
+                        arg += unicode(t)
+                    elif isinstance(t, OpenDouble):
+                        arg += processTemplateCall(templates, tokens, args)
+                    elif isinstance(t, OpenTriple):
+                        arg += processSub(templates, tokens, args)
+                    elif isinstance(t, Delimiter) and t.c != '|':
+                        arg += str(t)
+                    else:
+                        break
+                arg = arg.strip()
+            else:
+                a += 1
+            args[name] = arg
+        elif isinstance(t, CloseDouble):
+            break
+        else:
+            print "Unexpected:", t
+            raise SyntaxError
+    #print template, args
+    if template[0] == '#':
+        if template == "#if":
+            if args['1'].strip():
+                return args['2']
+            elif '3' in args:
+                return args['3']
+            else:
+                return ""
+        elif template == "#ifeq":
+            if args['1'].strip() == args['2'].strip():
+                return args['3']
+            elif '4' in args:
+                return args['4']
+            else:
+                return ""
+        elif template == "#ifexist":
+            return ""
+        elif template == "#switch":
+            sw = args['1'].strip()
+            if sw in args:
+                return args[sw]
+            else:
+                return ""
+        else:
+            print "Unknown ParserFunction:", template
+            sys.exit(1)
+    if template not in templates:
+        return "{{%s}}" % template
+    return process(templates, templates[template], args)
+
+def process(templates, s, args = {}):
+    s = re.compile(r"<!--.*?-->", re.DOTALL).sub("", s)
+    s = re.compile(r"<noinclude>.*?</noinclude>", re.DOTALL).sub("", s)
+    assert "<onlyinclude>" not in s
+    #s = re.sub(r"(.*?)<onlyinclude>(.*?)</onlyinclude>(.*)", r"\1", s)
+    s = re.compile(r"<includeonly>(.*?)</includeonly>", re.DOTALL).sub(r"\1", s)
+    r = ""
+    #print list(Tokenise(s))
+    tokens = Tokenise(s)
+    try:
+        while True:
+            t = tokens.next()
+            if isinstance(t, OpenDouble):
+                r += processTemplateCall(templates, tokens, args)
+            elif isinstance(t, OpenTriple):
+                r += processSub(templates, tokens, args)
+            else:
+                r += unicode(t)
+    except StopIteration:
+        pass
+    return r
+
+def test():
+    templates = {
+        'lb': "{{",
+        'name-example': "I am a template example, my first name is '''{{{firstName}}}''' and my last name is '''{{{lastName}}}'''. You can reference my page at [[{{{lastName}}}, {{{firstName}}}]].",
+        't': "start-{{{1|pqr}}}-end",
+        't0': "start-{{{1}}}-end",
+        't1': "start{{{1}}}end<noinclude>moo</noinclude>",
+        't2a1': "{{t2demo|a|{{{1}}}}}",
+        't2a2': "{{t2demo|a|2={{{1}}}}}",
+        't2demo': "start-{{{1}}}-middle-{{{2}}}-end",
+        't5': "{{t2demo|{{{a}}}=b}}",
+        't6': "t2demo|a",
+    }
+    def t(text, expected):
+        print "text:", text
+        s = process(templates, text)
+        if s != expected:
+            print "got:", s
+            print "expected:", expected
+            sys.exit(1)
+    t("{{Name-example}}", "I am a template example, my first name is '''{{{firstName}}}''' and my last name is '''{{{lastName}}}'''. You can reference my page at [[{{{lastName}}}, {{{firstName}}}]].")
+    t("{{Name-example | firstName=John | lastName=Smith }}", "I am a template example, my first name is '''John''' and my last name is '''Smith'''. You can reference my page at [[Smith, John]].")
+    t("{{t0|a}}", "start-a-end")
+    t("{{t0| }}", "start- -end")
+    t("{{t0|}}", "start--end")
+    t("{{t0}}", "start-{{{1}}}-end")
+    t("{{t0|     }}", "start-     -end")
+    t("{{t0|\n}}", "start-\n-end")
+    t("{{t0|1=     }}", "start--end")
+    t("{{t0|1=\n}}", "start--end")
+    t("{{T}}", "start-pqr-end")
+    t("{{T|}}", "start--end")
+    t("{{T|abc}}", "start-abc-end")
+    t("{{T|abc|def}}", "start-abc-end")
+    t("{{T|1=abc|1=def}}", "start-def-end")
+    t("{{T|abc|1=def}}", "start-def-end")
+    t("{{T|1=abc|def}}", "start-def-end")
+    t("{{T|{{T}}}}", "start-start-pqr-end-end")
+    t("{{T|{{T|{{T}}}}}}", "start-start-start-pqr-end-end-end")
+    t("{{T|{{T|{{T|{{T}}}}}}}}", "start-start-start-start-pqr-end-end-end-end")
+    t("{{T|a{{t|b}}}}", "start-astart-b-end-end")
+    t("{{T|{{T|a=b}}}}", "start-start-pqr-end-end")
+    t("{{T|a=b}}", "start-pqr-end")
+    t("{{T|1=a=b}}", "start-a=b-end")
+    #t("{{t1|{{lb}}tc}}}}", "start{{tcend}}")
+    #t("{{t2a1|1=x=y}}", "start-a-middle-{{{2}}}-end")
+    #t("{{t2a2|1=x=y}}", "start-a-middle-x=y-end")
+    #t("{{t5|a=2=d}}", "start-{{{1}}}-middle-d=b-end")
+    #t("{{ {{t6}} }}", "{{ t2demo|a }}")
+    t("{{t|[[a|b]]}}", "start-b-end")
+    t("{{t|[[a|b]] }}", "start-b -end")
+
+Parts = {
+    # Standard POS headers
+    'noun': "n.",
+    'Noun': "n.",
+    'Noun 1': "n.",
+    'Noun 2': "n.",
+    'Verb': "v.",
+    'Adjective': "adj.",
+    'Adverb': "adv.",
+    'Pronoun': "pron.",
+    'Conjunction': "conj.",
+    'Interjection': "interj.",
+    'Preposition': "prep.",
+    'Proper noun': "n.p.",
+    'Proper Noun': "n.p.",
+    'Article': "art.",
+
+    # Standard non-POS level 3 headers
+    '{{acronym}}': "acr.",
+    'Acronym': "acr.",
+    '{{abbreviation}}': "abbr.",
+    '[[Abbreviation]]': "abbr.",
+    'Abbreviation': "abbr.",
+    '[[initialism]]': "init.",
+    '{{initialism}}': "init.",
+    'Initialism': "init.",
+    'Contraction': "cont.",
+    'Prefix': "prefix",
+    'Suffix': "suffix",
+    'Symbol': "sym.",
+    'Letter': "letter",
+    'Idiom': "idiom",
+    'Idioms': "idiom",
+    'Phrase': "phrase",
+
+    # Debated POS level 3 headers
+    'Number': "num.",
+    'Numeral': "num.",
+    'Cardinal number': "num.",
+    'Ordinal number': "num.",
+    'Cardinal numeral': "num.",
+    'Ordinal numeral': "num.",
+
+    # Other headers in use
+    'Personal pronoun': "pers.pron.",
+    'Adjective/Adverb': "adj./adv.",
+    'Proper adjective': "prop.adj.",
+    'Determiner': "det.",
+    'Demonstrative determiner': "dem.det.",
+    'Clitic': "clitic",
+    'Infix': "infix",
+    'Counter': "counter",
+    'Kanji': None,
+    'Kanji reading': None,
+    'Hiragana letter': None,
+    'Katakana letter': None,
+    'Pinyin': None,
+    'Han character': None,
+    'Hanzi': None,
+    'Hanja': None,
+    'Proverb': "prov.",
+    'Expression': None,
+    'Adjectival noun': None,
+    'Quasi-adjective': None,
+    'Particle': "part.",
+    'Infinitive particle': "part.",
+    'Possessive adjective': "poss.adj.",
+    'Verbal prefix': "v.p.",
+    'Postposition': "post.",
+    'Prepositional article': "prep.art.",
+    'Phrasal verb': "phr.v.",
+    'Participle': "participle",
+    'Interrogative auxiliary verb': "int.aux.v.",
+    'Pronominal adverb': "pron.adv.",
+    'Adnominal': "adn.",
+    'Abstract pronoun': "abs.pron.",
+    'Conjunction particle': None,
+    'Root': "root",
+
+    # Non-standard, deprecated headers
+    'Noun form': "n.",
+    'Verb form': "v.",
+    'Adjective form': "adj.form.",
+    'Nominal phrase': "nom.phr.",
+    'Noun phrase': "n. phrase",
+    'Verb phrase': "v. phrase",
+    'Transitive verb': "v.t.",
+    'Intransitive verb': "v.i.",
+    'Reflexive verb': "v.r.",
+    'Cmavo': None,
+    'Romaji': "rom.",
+    'Hiragana': None,
+    'Furigana': None,
+    'Compounds': None,
+
+    # Other headers seen
+    'Alternative forms': None,
+    'Alternative spellings': None,
+    'Anagrams': None,
+    'Antonym': None,
+    'Antonyms': None,
+    'Conjugation': None,
+    'Declension': None,
+    'Declension and pronunciations': None,
+    'Definite Article': "def.art.",
+    'Definite article': "def.art.",
+    'Demonstrative pronoun': "dem.pron.",
+    'Derivation': None,
+    'Derived expression': None,
+    'Derived expressions': None,
+    'Derived forms': None,
+    'Derived phrases': None,
+    'Derived terms': None,
+    'Derived, Related terms': None,
+    'Descendants': None,
+    #'Etymology': None,
+    #'Etymology 1': None,
+    #'Etymology 2': None,
+    #'Etymology 3': None,
+    #'Etymology 4': None,
+    #'Etymology 5': None,
+    'Examples': None,
+    'External links': None,
+    '[[Gismu]]': None,
+    'Gismu': None,
+    'Homonyms': None,
+    'Homophones': None,
+    'Hyphenation': None,
+    'Indefinite article': "art.",
+    'Indefinite pronoun': "ind.pron.",
+    'Indefinite Pronoun': "ind.pron.",
+    'Indetermined pronoun': "ind.pron.",
+    'Interrogative conjunction': "int.conj.",
+    'Interrogative determiner': "int.det.",
+    'Interrogative particle': "int.part.",
+    'Interrogative pronoun': "int.pron.",
+    'Legal expression': "legal",
+    'Mass noun': "n.",
+    'Miscellaneous': None,
+    'Mutations': None,
+    'Noun and verb': "n/v.",
+    'Other language': None,
+    'Pinyin syllable': None,
+    'Possessive determiner': "poss.det.",
+    'Possessive pronoun': "poss.pron.",
+    'Prepositional phrase': "prep.phr.",
+    'Prepositional Pronoun': "prep.pron.",
+    'Pronunciation': None,
+    'Pronunciation 1': None,
+    'Pronunciation 2': None,
+    'Quotations': None,
+    'References': None,
+    'Reflexive pronoun': "refl.pron.",
+    'Related expressions': None,
+    'Related terms': None,
+    'Related words': None,
+    'Relative pronoun': "rel.pron.",
+    'Saying': "saying",
+    'See also': None,
+    'Shorthand': None,
+    '[http://en.wikipedia.org/wiki/Shorthand Shorthand]': None,
+    'Sister projects': None,
+    'Spelling note': None,
+    'Synonyms': None,
+    'Translation': None,
+    'Translations': None,
+    'Translations to be checked': None,
+    'Transliteration': None,
+    'Trivia': None,
+    'Usage': None,
+    'Usage in English': None,
+    'Usage notes': None,
+    'Verbal noun': "v.n.",
+}
+PartsUsed = {}
+for p in Parts.keys():
+    PartsUsed[p] = 0
+
+def encode(s):
+    r = e(s)
+    assert r[1] == len(s)
+    return r[0]
+
+def dowikilink(m):
+    a = m.group(1).split("|")
+    if len(a) > 1:
+        link = a[1]
+    else:
+        link = a[0]
+    if ':' in link:
+        link = ""
+    return link
+
+seentemplates = {}
+def dotemplate(m):
+    aa = m.group(1).split("|")
+    args = {}
+    n = 0
+    for a in aa:
+        am = re.match(r"(.*?)(=(.*))?", a)
+        if am:
+            args[am.group(1)] = am.group(3)
+        else:
+            n += 1
+            args[n] = am.group(1)
+
+    #if aa[0] in seentemplates:
+    #    seentemplates[aa[0]] += 1
+    #else:
+    #    seentemplates[aa[0]] = 1
+    #    print len(seentemplates), aa[0]
+    #print aa[0]
+
+    #if aa[0] not in Templates:
+    #    return "(unknown template %s)" % aa[0]
+    #body = Templates[aa[0]]
+    #body = re.sub(r"<noinclude>.*?</noinclude>", "", body)
+    #assert "<onlyinclude>" not in body
+    ##body = re.sub(r"(.*?)<onlyinclude>(.*?)</onlyinclude>(.*)", r"\1", body)
+    #body = re.sub(r"<includeonly>(.*?)</includeonly>", r"\1", body)
+    #def dotemplatearg(m):
+    #    ta = m.group(1).split("|")
+    #    if ta[0] in args:
+    #        return args[ta[0]]
+    #    elif len(ta) > 1:
+    #        return ta[1]
+    #    else:
+    #        return "{{{%s}}}" % ta[0]
+    #body = re.sub(r"{{{(.*?)}}}", dotemplatearg, body)
+    #return dewiki(body)
+
+def doparserfunction(m):
+    a = m.group(2).split("|")
+    if m.group(1) == "ifeq":
+        if a[0] == a[1]:
+            return a[2]
+        elif len(a) >= 4:
+            return a[3]
+    return ""
+
+def dewiki(body, indent = 0):
+    # process in this order:
+    #   {{{ }}}
+    #   <> <>
+    #   [[ ]]
+    #   {{ }}
+    #   ''' '''
+    #   '' ''
+    #body = wikimediatemplate.process(Templates, body)
+    body = re.sub(r"\[\[(.*?)\]\]", dowikilink, body)
+    #body = re.sub(r"{{(.*?)}}", dotemplate, body)
+    #body = re.sub(r"{{#(.*?):(.*?)}}", doparserfunction, body)
+    body = re.sub(r"'''(.*?)'''", r"\1", body)
+    body = re.sub(r"''(.*?)''", r"\1", body)
+    lines = body.split("\n")
+    n = 0
+    i = 0
+    while i < len(lines):
+        if len(lines[i]) > 0 and lines[i][0] == "#":
+            if len(lines[i]) > 1 and lines[i][1] == '*':
+                wlines = textwrap.wrap(lines[i][2:].strip(),
+                    initial_indent = "    * ",
+                    subsequent_indent = "      ")
+            elif len(lines[i]) > 1 and lines[i][1] == ':':
+                wlines = textwrap.wrap(lines[i][2:].strip(),
+                    initial_indent = "        ",
+                    subsequent_indent = "        ")
+            else:
+                n += 1
+                wlines = textwrap.wrap(str(n) + ". " + lines[i][1:].strip(),
+                    subsequent_indent = "   ")
+        elif len(lines[i]) > 0 and lines[i][0] == "*":
+            n = 0
+            wlines = textwrap.wrap(lines[i][1:].strip(),
+                initial_indent = "* ",
+                subsequent_indent = "  ")
+        else:
+            n = 0
+            wlines = textwrap.wrap(lines[i].strip())
+            if len(wlines) == 0:
+                wlines = ['']
+        lines[i:i+1] = wlines
+        i += len(wlines)
+    return ''.join("  "*(indent-1)+x+"\n" for x in lines)
+
+class WikiSection:
+    def __init__(self, heading, body):
+        self.heading = heading
+        self.body = body
+        #self.lines = re.split("\n+", body.strip())
+        #if len(self.lines) == 1 and len(self.lines[0]) == 0:
+        #    self.lines = []
+        self.children = []
+    def __str__(self):
+        return "<%s:%i:%s>" % (self.heading, len(self.body or ""), ','.join([str(x) for x in self.children]))
+    def add(self, section):
+        self.children.append(section)
+
+def parse(word, text):
+    headings = list(re.finditer("^(=+)\s*(.*?)\s*=+\n", text, re.MULTILINE))
+    #print [x.group(1) for x in headings]
+    doc = WikiSection(word, "")
+    stack = [doc]
+    for i, m in enumerate(headings):
+        depth = len(m.group(1))
+        if depth < len(stack):
+            stack = stack[:depth]
+        else:
+            while depth > len(stack):
+                s = WikiSection(None, "")
+                stack[-1].add(s)
+                stack.append(s)
+        if i+1 < len(headings):
+            s = WikiSection(m.group(2), text[m.end(0):headings[i+1].start(0)].strip())
+        else:
+            s = WikiSection(m.group(2), text[m.end(0):].strip())
+        assert len(stack) == depth
+        stack[-1].add(s)
+        stack.append(s)
+    #while doc.heading is None and len(doc.lines) == 0 and len(doc.children) == 1:
+    #    doc = doc.children[0]
+    return doc
+
+def formatFull(word, doc):
+    def f(depth, section):
+        if section.heading:
+            r = "  "*(depth-1) + section.heading + "\n\n"
+        else:
+            r = ""
+        if section.body:
+            r += dewiki(section.body, depth+1)+"\n"
+        #r += "".join("  "*depth + x + "\n" for x in dewiki(section.body))
+        #if len(section.lines) > 0:
+        #    r += "\n"
+        for c in section.children:
+            r += f(depth+1, c)
+        return r
+    s = f(0, doc)
+    s += "Ref: http://en.wiktionary.org/wiki/%s\n" % word
+    return s
+
+def formatNormal(word, doc):
+    def f(depth, posdepth, section):
+        r = ""
+        if depth == posdepth:
+            if not section.heading or section.heading.startswith("Etymology"):
+                posdepth += 1
+            elif section.heading in Parts:
+                #p = Parts[section.heading]
+                #if p:
+                #    r += "  "*(depth-1) + word + " (" + p + ")\n\n"
+                r += "  "*(depth-1) + section.heading + "\n\n"
+            else:
+                print >>errors, "Unknown part: (%s) %s" % (word, section.heading)
+                return ""
+        elif depth > posdepth:
+            return ""
+        elif section.heading:
+            r += "  "*(depth-1) + section.heading + "\n\n"
+        if section.body:
+            r += dewiki(section.body, depth+1)+"\n"
+        #r += "".join("  "*depth + x + "\n" for x in dewiki(section.lines))
+        #if len(section.lines) > 0:
+        #    r += "\n"
+        for c in section.children:
+            r += f(depth+1, posdepth, c)
+        return r
+    s = f(0, 3, doc)
+    s += "Ref: http://en.wiktionary.org/wiki/%s\n" % word
+    return s
+
+def formatBrief(word, doc):
+    def f(depth, posdepth, section):
+        if depth == posdepth:
+            h = section.heading
+            if not section.heading or section.heading.startswith("Etymology"):
+                posdepth += 1
+            elif section.heading in Parts:
+                #h = Parts[section.heading]
+                #if h:
+                #    h = "%s (%s)" % (word, h)
+                pass
+            stack.append([h, False])
+        elif depth > 0:
+            stack.append([section.heading, False])
+        else:
+            stack.append(["%h " + section.heading, False])
+        r = ""
+        #if section.heading:
+        #    r += "  "*(depth-1) + section.heading + "\n"
+        body = ''.join(x+"\n" for x in section.body.split("\n") if len(x) > 0 and x[0] == '#')
+        if len(body) > 0:
+            for i in range(len(stack)):
+                if not stack[i][1]:
+                    if stack[i][0]:
+                        r += "  "*(i-1) + stack[i][0] + "\n"
+                    stack[i][1] = True
+            r += dewiki(body, depth+1)
+        for c in section.children:
+            r += f(depth+1, posdepth, c)
+        stack.pop()
+        return r
+    stack = []
+    s = f(0, 3, doc)
+    s += "Ref: http://en.wiktionary.org/wiki/%s\n" % word
+    return s
+
+class WikiHandler(xml.sax.ContentHandler):
+    def __init__(self):
+        self.element = None
+        self.page = None
+        self.text = ""
+        self.long = {}
+    def startElement(self, name, attrs):
+        #print "start", name, attrs
+        self.element = name
+    def endElement(self, name):
+        #print "end", name
+        if self.element == "text":
+            if self.page:
+                if self.page in self.long:
+                    print self.page, len(self.text)
+                    print
+                self.doPage(self.page, self.text)
+                self.page = None
+            self.text = ""
+        self.element = None
+    def characters(self, content):
+        #print "characters", content
+        if self.element == "title":
+            if self.checkPage(content):
+                self.page = content
+        elif self.element == "text":
+            if self.page:
+                self.text += content
+                if len(self.text) > 100000 and self.page not in self.long:
+                    self.long[self.page] = 1
+    def checkPage(self, page):
+        return False
+    def doPage(self, page, text):
+        pass
+
+class TemplateHandler(WikiHandler):
+    def checkPage(self, page):
+        return page.startswith("Template:")
+    def doPage(self, page, text):
+        Templates[page[page.find(':')+1:].lower()] = text
+
+class WordHandler(WikiHandler):
+    def checkPage(self, page):
+        return ':' not in page
+    def doPage(self, page, text):
+        m = re.match(r"#redirect\s*\[\[(.*?)\]\]", text, re.IGNORECASE)
+        if m:
+            out.write("  See <%s>" % page)
+            return
+        doc = parse(page, text)
+        out.write(formatBrief(page, doc))
+        #print formatBrief(page, doc)
+
+fn = sys.argv[1]
+info = """   This file was converted from the original database on:
+             %s
+
+   The original data is available from:
+             http://en.wiktionary.org
+   The version from which this file was generated was:
+             %s
+
+  Wiktionary is available under the GNU Free Documentation License.
+""" % (time.ctime(), os.path.basename(fn))
+
+errors = codecs.open("mkdict.err", "w", "utf_8")
+e = codecs.getencoder("utf_8")
+
+Templates = {}
+f = os.popen("bunzip2 -c %s" % fn, "r")
+xml.sax.parse(f, TemplateHandler())
+f.close()
+
+f = os.popen("bunzip2 -c %s" % fn, "r")
+out = codecs.getwriter("utf_8")(
+        os.popen("dictfmt -p wiktionary-en --locale en_US.UTF-8 --columns 0 -u http://en.wiktionary.org", "w"))
+
+out.write(("%%h English Wiktionary\n%s" % info).encode('utf-8'))
+xml.sax.parse(f, WordHandler())
+f.close()
+out.close()
diff --git a/nixpkgs/pkgs/servers/dict/wordnet_structures.py b/nixpkgs/pkgs/servers/dict/wordnet_structures.py
new file mode 100644
index 000000000000..6a88427c5da1
--- /dev/null
+++ b/nixpkgs/pkgs/servers/dict/wordnet_structures.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+#Copyright 2007 Sebastian Hagen
+# This file is part of wordnet_tools.
+
+# wordnet_tools is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2
+# as published by the Free Software Foundation
+
+# wordnet_tools is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with wordnet_tools; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+# This program requires python >= 2.4.
+
+# This program converts wordnet index/data file pairs into dict index/data
+# files usable by dictd.
+# This is basically a reimplementation of the wnfilter program by Rik Faith,
+# which unfortunately doesn't work correctly for wordnet files in the newer
+# formats. This version of wordnet_structures whould parse wordnet 2.1 files
+# correctly, and create output very similar to what wnfilter would have
+# written.
+
+import datetime
+from textwrap import TextWrapper
+
+CAT_ADJECTIVE = 0
+CAT_ADVERB = 1
+CAT_NOUN = 2
+CAT_VERB = 3
+
+category_map = {
+   'n': CAT_NOUN,
+   'v': CAT_VERB,
+   'a': CAT_ADJECTIVE,
+   's': CAT_ADJECTIVE,
+   'r': CAT_ADVERB
+}
+
+
+class WordIndex:
+   def __init__(self, lemma, category, ptrs, synsets, tagsense_count):
+      self.lemma = lemma
+      self.category = category
+      self.ptrs = ptrs
+      self.synsets = synsets
+      self.tagsense_count = tagsense_count
+   
+   @classmethod
+   def build_from_line(cls, line_data, synset_map):
+      line_split = line_data.split()
+      lemma = line_split[0]
+      category = category_map[line_split[1]]
+      synset_count = int(line_split[2],10)
+      ptr_count = int(line_split[3],10)
+      ptrs = [line_split[i] for i in range(3, 3+ptr_count)]
+      tagsense_count = int(line_split[5 + ptr_count],10)
+      synsets = [synset_map[int(line_split[i],10)] for i in range(6 + ptr_count, 6 + ptr_count + synset_count)]
+      return cls(lemma, category, ptrs, synsets, tagsense_count)
+   
+   @classmethod
+   def build_from_file(cls, f, synset_map, rv_base=None):
+      if (rv_base is None):
+         rv = {}
+      else:
+         rv = rv_base
+         
+      for line in f:
+         if (line.startswith('  ')):
+            continue
+         wi = cls.build_from_line(line, synset_map)
+         word = wi.lemma.lower()
+         if not (word in rv):
+            rv[word] = []
+         rv[word].append(wi)
+      return rv
+
+   def __repr__(self):
+      return '%s%s' % (self.__class__.__name__, (self.lemma, self.category, self.ptrs, self.synsets, self.tagsense_count))
+   
+   
+class WordIndexDictFormatter(WordIndex):
+   category_map_rev = {
+      CAT_NOUN: 'n',
+      CAT_VERB: 'v',
+      CAT_ADJECTIVE: 'adj',
+      CAT_ADVERB: 'adv'
+   }
+   linesep = '\n'
+   LINE_WIDTH_MAX = 68
+   prefix_fmtf_line_first = '%5s 1: '
+   prefix_fmtn_line_first = '         '
+   prefix_fmtf_line_nonfirst = '%5d: '
+   prefix_fmtn_line_nonfirst = '       '
+   
+   def dict_str(self):
+      tw = TextWrapper(width=self.LINE_WIDTH_MAX,
+         initial_indent=(self.prefix_fmtf_line_first % self.category_map_rev[self.category]),
+         subsequent_indent=self.prefix_fmtn_line_first)
+         
+      lines = (tw.wrap(self.synsets[0].dict_str()))
+      i = 2
+      for synset in self.synsets[1:]:
+         tw = TextWrapper(width=self.LINE_WIDTH_MAX,
+            initial_indent=(self.prefix_fmtf_line_nonfirst % i),
+            subsequent_indent=self.prefix_fmtn_line_nonfirst)
+         lines.extend(tw.wrap(synset.dict_str()))
+         i += 1
+      return self.linesep.join(lines)
+
+
+class Synset:
+   def __init__(self, offset, ss_type, words, ptrs, gloss, frames=()):
+      self.offset = offset
+      self.type = ss_type
+      self.words = words
+      self.ptrs = ptrs
+      self.gloss = gloss
+      self.frames = frames
+      self.comments = []
+   
+   @classmethod
+   def build_from_line(cls, line_data):
+      line_split = line_data.split()
+      synset_offset = int(line_split[0],10)
+      ss_type = category_map[line_split[2]]
+      word_count = int(line_split[3],16)
+      words = [line_split[i] for i in range(4, 4 + word_count*2,2)]
+      ptr_count = int(line_split[4 + word_count*2],10)
+      ptrs = [(line_split[i], line_split[i+1], line_split[i+2], line_split[i+3]) for i in range(5 + word_count*2,4 + word_count*2 + ptr_count*4,4)]
+     
+      tok = line_split[5 + word_count*2 + ptr_count*4]
+      base = 6 + word_count*2 + ptr_count*4
+      if (tok != '|'):
+         frame_count = int(tok, 10)
+         frames = [(int(line_split[i+1],10), int(line_split[i+2],16)) for i in range(base, base + frame_count*3, 3)]
+         base += frame_count*3 + 1
+      else:
+         frames = []
+     
+      line_split2 = line_data.split(None, base)
+      if (len(line_split2) < base):
+         gloss = None
+      else:
+         gloss = line_split2[-1]
+     
+      return cls(synset_offset, ss_type, words, ptrs, gloss, frames)
+   
+   @classmethod
+   def build_from_file(cls, f):
+      rv = {}
+      comments = []
+     
+      for line in f:
+         if (line.startswith('  ')):
+            line_s = line.lstrip().rstrip('\n')
+            line_elements = line_s.split(None,1)
+            try:
+               int(line_elements[0])
+            except ValueError:
+               continue
+            if (len(line_elements) == 1):
+               line_elements.append('')
+            comments.append(line_elements[1])
+            continue
+         synset = cls.build_from_line(line.rstrip())
+         rv[synset.offset] = synset
+
+      return (rv, comments)
+
+   def dict_str(self):
+      rv = self.gloss
+      if (len(self.words) > 1):
+         rv += ' [syn: %s]' % (', '.join([('{%s}' % word) for word in self.words]))
+      return rv
+
+   def __repr__(self):
+      return '%s%s' % (self.__class__.__name__, (self.offset, self.type, self.words, self.ptrs, self.gloss, self.frames))
+
+
+class WordnetDict:
+   db_info_fmt = '''This file was converted from the original database on:
+          %(conversion_datetime)s
+
+The original data is available from:
+     %(wn_url)s
+
+The original data was distributed with the notice shown below. No
+additional restrictions are claimed.  Please redistribute this changed
+version under the same conditions and restriction that apply to the
+original version.\n\n
+%(wn_license)s'''
+
+   datetime_fmt = '%Y-%m-%dT%H:%M:%S'
+   base64_map = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+   
+   def __init__(self, wn_url, desc_short, desc_long):
+      self.word_data = {}
+      self.wn_url = wn_url
+      self.desc_short = desc_short
+      self.desc_long = desc_long
+      self.wn_license = None
+   
+   def wn_dict_add(self, file_index, file_data):
+      file_data.seek(0)
+      file_index.seek(0)
+      (synsets, license_lines) = Synset.build_from_file(file_data)
+      WordIndexDictFormatter.build_from_file(file_index, synsets, self.word_data)
+      if (license_lines):
+         self.wn_license = '\n'.join(license_lines) + '\n'
+   
+   @classmethod
+   def base64_encode(cls, i):
+      """Encode a non-negative integer into a dictd compatible base64 string"""
+      if (i < 0):
+         raise ValueError('Value %r for i is negative' % (i,))
+      r = 63
+      e = 1
+      while (r < i):
+         e += 1
+         r = 64**e - 1
+     
+      rv = ''
+      while (e > 0):
+         e -= 1
+         d = (i / 64**e)
+         rv += cls.base64_map[d]
+         i = i % (64**e)
+      return rv
+     
+   @classmethod
+   def dict_entry_write(cls, file_index, file_data, key, entry, linesep='\n'):
+      """Write a single dict entry for <key> to index and data files"""
+      entry_start = file_data.tell()
+      file_data.write(entry)
+      entry_len = len(entry)
+      file_index.write('%s\t%s\t%s%s' % (key, cls.base64_encode(entry_start),
+            cls.base64_encode(entry_len), linesep))
+     
+   def dict_generate(self, file_index, file_data):
+      file_index.seek(0)
+      file_data.seek(0)
+      # The dictd file format is fairly iffy on the subject of special
+      # headwords: either dictd is buggy, or the manpage doesn't tell the whole
+      # story about the format.
+      # The upshot is that order of these entries in the index *matters*.
+      # Putting them at the beginning and in alphabetic order is afaict ok.
+      # Some other orders completely and quietly break the ability to look
+      # those headwords up.
+      # -- problem encountered with 1.10.2, at 2007-08-05.
+      file_data.write('\n')
+      wn_url = self.wn_url
+      conversion_datetime = datetime.datetime.now().strftime(self.datetime_fmt)
+      wn_license = self.wn_license
+      self.dict_entry_write(file_index, file_data, '00-database-info', '00-database-info\n%s\n' % (self.db_info_fmt % vars()))
+      self.dict_entry_write(file_index, file_data, '00-database-long', '00-database-long\n%s\n' % self.desc_long)
+      self.dict_entry_write(file_index, file_data, '00-database-short', '00-database-short\n%s\n' % self.desc_short)
+      self.dict_entry_write(file_index, file_data, '00-database-url', '00-database-url\n%s\n' % self.wn_url)
+
+     
+      words = self.word_data.keys()
+      words.sort()
+      for word in words:
+         for wi in self.word_data[word]:
+            word_cs = word
+            # Use case-sensitivity information of first entry of first synset that
+            # matches this word case-insensitively
+            for synset in wi.synsets:
+               for ss_word in synset.words:
+                  if (ss_word.lower() == word_cs.lower()):
+                     word_cs = ss_word
+                     break
+               else:
+                  continue
+               break
+            else:
+               continue
+            break
+           
+         outstr = ''
+         for wi in self.word_data[word]:
+            outstr += wi.dict_str() + '\n'
+         
+         outstr = '%s%s%s' % (word_cs, wi.linesep, outstr)
+         self.dict_entry_write(file_index, file_data, word_cs, outstr, wi.linesep)
+     
+      file_index.truncate()
+      file_data.truncate()
+
+
+if (__name__ == '__main__'):
+   import optparse
+   op = optparse.OptionParser(usage='usage: %prog [options] (<wn_index_file> <wn_data_file>)+')
+   op.add_option('-i', '--outindex', dest='oi', default='wn.index', help='filename of index file to write to')
+   op.add_option('-d', '--outdata', dest='od', default='wn.dict', help='filename of data file to write to')
+   op.add_option('--wn_url', dest='wn_url', default='ftp://ftp.cogsci.princeton.edu/pub/wordnet/2.0', help='URL for wordnet sources')
+   op.add_option('--db_desc_short', dest='desc_short', default='     WordNet (r) 2.1 (2005)', help='short dict DB description')
+   op.add_option('--db_desc_long', dest='desc_long', default='    WordNet (r): A Lexical Database for English from the\n     Cognitive Science Laboratory at Princeton University', help='long dict DB description')
+   
+   (options, args) = op.parse_args()
+   
+   wnd = WordnetDict(wn_url=options.wn_url, desc_short=options.desc_short, desc_long=options.desc_long)
+   
+   for i in range(0,len(args),2):
+      print 'Opening index file %r...' % args[i]
+      file_index = file(args[i])
+      print 'Opening data file %r...' % args[i+1]
+      file_data = file(args[i+1])
+      print 'Parsing index file and data file...'
+      wnd.wn_dict_add(file_index, file_data)
+
+   print 'All input files parsed. Writing output to index file %r and data file %r.' % (options.oi, options.od)
+   
+   wnd.dict_generate(file(options.oi, 'w'),file(options.od, 'w'))
+   print 'All done.'
author	Alyssa Ross <hi@alyssa.is>	2019-01-07 02:18:36 +0000
committer	Alyssa Ross <hi@alyssa.is>	2019-01-07 02:18:47 +0000
commit	36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2 (patch)
tree	b3faaf573407b32aa645237a4d16b82778a39a92 /nixpkgs/pkgs/servers/dict
parent	4e31070265257dc67d120c27e0f75c2344fdfa9a (diff)
parent	abf060725d7614bd3b9f96764262dfbc2f9c2199 (diff)
download	nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.gz nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.bz2 nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.lz nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.xz nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.tar.zst nixlib-36f56d99fa0a0765c9f1de4a5f17a9b05830c3f2.zip