-
Notifications
You must be signed in to change notification settings - Fork 2
/
dictionary.py
executable file
·78 lines (65 loc) · 2.5 KB
/
dictionary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import rdflib
import urllib
import rdflib.plugins.sparql as sparql
import csv
import sys, os
import argparse
file_in = "data/"
data_ns = rdflib.Namespace("http://example.org/etymwn#")
def main(argv):
# --- init -----------------------------------------------------------------
work_dir = os.path.dirname(os.path.realpath(__file__))
# --- read arg -------------------------------------------------------------
parser = argparse.ArgumentParser(description="Etimology dictionary (based on Wiktionary)")
parser.add_argument("text", type=str, nargs="*",
help="a word to look for in the dictionary")
parser.add_argument("--lang", type=str, default="eng",
help="language code from ISO 639-3 (default: %(default)s)")
args = parser.parse_args()
in_word = " ".join(args.text).lower()
in_lang = args.lang
if len(in_word) == 0:
parser.print_help()
sys.exit(2)
# --- language codes (ISO 639-3) -------------------------------------------
iso_lang = {}
with open("iso-639-3.tab", newline="") as f:
reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
for row in reader:
iso_lang[row[0]] = row[1]
# --- query RDF graph for the word's etymology -----------------------------
g = rdflib.Graph(store='Sleepycat', identifier='etymwn')
g.open(work_dir + "/" + file_in, create=False)
w_start = urllib.parse.quote_plus(in_lang + "_" + in_word)
w_start = data_ns[w_start]
if (w_start, None, None) not in g:
print(f"Cannot find '{in_lang}:{in_word}' in the database!")
sys.exit(0)
q = sparql.prepareQuery(
"""SELECT DISTINCT ?w_lang ?w_label
WHERE {
?w etymwn:origin_of+ ?w_start ;
rdfs:label ?w_label ;
etymwn:lang ?w_lang .
}
LIMIT 10
""",
initNs={'etymwn': data_ns, 'rdfs': rdflib.RDFS}
)
qres = g.query(q, initBindings={'w_start': w_start})
if len(qres) == 0:
print("No ancestors.")
sys.exit(2)
# --- print result ---------------------------------------------------------
print(f"\u202A{in_word} — {iso_lang.get(in_lang, in_lang)}")
indent = ""
for row in qres:
indent += " "
lang = iso_lang.get(str(row[0]), str(row[0]))
word = row[1]
print(f"\u202A{indent}< {word} — {lang}")
g.close()
if __name__ == "__main__":
main(sys.argv[1:])