-
Notifications
You must be signed in to change notification settings - Fork 5
/
Makefile
118 lines (100 loc) · 5.61 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
TH_GEN_IDX=/usr/share/mythes/th_gen_idx.pl
.PHONY: all clean check check-rules check-thes packages
all: dicts/is.dic dicts/is.aff dicts/th_is.dat dicts/th_is.idx
clean:
rm -f dicts/is.aff dicts/is.dic dicts/th_is.dat dicts/th_is.idx dicts/is.oxt dicts/is.xpi
rm -f wiktionary.dic wiktionary.aff wordlist.diff
rm -f huntest.aff huntest.dic
# rm -f ??wiktionary-latest-pages-articles.xml.bz2
rm -f ??wiktionary-latest-pages-articles.xml ??wiktionary-latest-pages-articles.xml.texts
rm -rf libreoffice-tmp/ mozilla-tmp/
rm -rf dicts/
check: check-rules check-thes check-morph
check-rules:
echo "Testing old rules..."
find langs/is/rules/* -type d | while read i; \
do \
cat langs/is/common-aff.d/*.aff > huntest.aff; \
if [ -f "$$i/aff" ]; then \
LINECOUNT="`grep -ce '^.' "$$i/aff"`"; \
echo "SFX X N $$LINECOUNT" >> huntest.aff; \
cat "$$i/aff" >> huntest.aff; \
fi; \
TESTNAME="`basename "$$i"`"; \
echo "Testing rule $$TESTNAME"; \
cp "$$i/dic" huntest.dic; \
test -z "`hunspell -l -d huntest < "$$i/good"`" || { echo "Good word test for $$TESTNAME failed: `hunspell -l -d huntest < "$$i/good"`"; exit 1; }; \
test -z "`hunspell -G -d huntest < "$$i/bad"`" || { echo "Bad word test for $$TESTNAME failed: `hunspell -G -d huntest < "$$i/bad"`"; exit 1; }; \
done
echo "Testing new rules..."
test -z "`hunspell -l -d wiktionary < "langs/is/test.good"`" || { echo "Good word test failed: `hunspell -l -d wiktionary < "langs/is/test.good"`"; exit 1; };
test -z "`hunspell -G -d wiktionary < "langs/is/test.bad"`" || { echo "Bad word test failed: `hunspell -G -d wiktionary < "langs/is/test.bad"`"; exit 1; };
echo "All passed."
check-thes: dicts/th_is.dat
! grep ")," $< # pipe, not comma, should separate meanings
! grep "|[^\(]*)" $< # don't replace comma with pipe inside parentheses
! grep -P "\xe2" $<
! grep "([^)]\+(" $<
! grep "<.*>" $< # no html-like tags
! grep "<.*>" $< # no html-like tags (encoded)
@echo "Thesaurus tests passed."
check-morph: dicts/is.dic dicts/is.aff
@echo "Testing morphology..."
@test -z "`hunspell -m -d dicts/is < langs/is/test.good | diff -q langs/is/test.morph -`" || { echo "Morphology test failed: `hunspell -m -d dicts/is < langs/is/test.good | diff langs/is/test.morph -`"; exit 1; };
@echo "Morphology tests passed."
packages: dicts/is.oxt dicts/is.xpi dicts/SentenceExceptList.xml
# LibreOffice extension
dicts/is.oxt: %.oxt: %.aff %.dic dicts/th_is.dat dicts/th_is.idx \
packages/libreoffice/META-INF/manifest.xml \
packages/libreoffice/description.xml \
packages/libreoffice/dictionaries.xcu \
packages/copyright
rm -rf $@ libreoffice-tmp
cp -rf packages/libreoffice libreoffice-tmp
cp packages/copyright libreoffice-tmp/license.txt
cd libreoffice-tmp && sed -i 's/TODAYPLACEHOLDER/'`date +%Y.%m.%d`'/g' description.xml && zip -r ../$@ *
zip $@ dicts/is.dic dicts/is.aff dicts/th_is.dat dicts/th_is.idx
# LibreOffice autocorrect blocklist - not the end of a sentence
dicts/SentenceExceptList.xml: iswiktionary-latest-pages-articles.xml
echo "<?xml version=\"1.0\" encoding=\"utf-8\"?>" > $@
echo "<block-list:block-list xmlns:block-list=\"http://openoffice.org/2001/block-list\">" >> $@
grep -C 3 "{{-is-}}" iswiktionary-latest-pages-articles.xml | grep -C 2 "{{-is-skammstöfun-}}" | grep "'''[^ ]\+\.'''" | grep -o "[^']\+" | xargs printf " <block-list:block block-list:abbreviated-name=\"%s\"/>\n" | sort >> $@
echo "</block-list:block-list>" >> $@
# Mozilla extension
dicts/is.xpi: %.xpi: %.aff %.dic \
packages/mozilla/install.js \
packages/mozilla/install.rdf
rm -rf $@ mozilla-tmp
cp -rf packages/mozilla mozilla-tmp
cd mozilla-tmp && sed -i 's/TODAYPLACEHOLDER/'`date +%Y.%m.%d`'/g' install.js && sed -i 's/TODAYPLACEHOLDER/'`date +%Y.%m.%d`'/g' install.rdf && mkdir dictionaries && cp ../dicts/is.dic ../dicts/is.aff dictionaries/ && zip -r ../$@ *
dicts/is.aff: makedict.sh makedict.py iswiktionary-latest-pages-articles.xml.texts iswiktionary-latest-pages-articles.xml \
$(wildcard langs/is/common-aff.d/*) $(wildcard "langs/is/rules/*/*")
./$< is
dicts/is.dic: makedict.sh makedict.py iswiktionary-latest-pages-articles.xml.texts iswiktionary-latest-pages-articles.xml \
$(wildcard langs/is/common-aff.d/*) $(wildcard "langs/is/rules/*/*")
./$< is
dicts/th_%.dat: makethes.awk %wiktionary-latest-pages-articles.xml sortthes.py
LC_ALL=is_IS.utf8 gawk -F " " -f $< <iswiktionary-latest-pages-articles.xml | LC_ALL=is_IS.utf8 ./sortthes.py > $@
%.idx: %.dat
LC_ALL=is_IS.utf8 ${TH_GEN_IDX} -o $@ < $<
iswiktionary-latest-pages-articles.xml.bz2:
wget http://dumps.wikimedia.org/iswiktionary/latest/$@ -O $@
touch $@
iswiktionary-latest-pages-articles.xml: iswiktionary-latest-pages-articles.xml.bz2
bunzip2 -kf $<
touch $@
iswiktionary-latest-pages-articles.xml.texts: iswiktionary-latest-pages-articles.xml
tr -d "\r\n" < iswiktionary-latest-pages-articles.xml | grep -o "{{[^.|{}]*|[^-.}][^ }]*[}|][^}]*" | sed "s/mynd=.*//g" | sed "s/lo.nf.et.ó=.*//g" | sort | uniq > $@
# Performance test target: perf.txt
randwordlist:
tr -cd '[:alpha:]' < /dev/urandom | fold -w12 | head -n 100 > randwordlist
time=/usr/bin/time -o perf.txt -f "%E real\t%U user\t%S sys\t%M mem\t%C" --append
perf.txt: dicts/is.dic dicts/is.aff randwordlist
hunspell -vv > perf.txt
${time} hunspell -d dicts/is -a langs/is/wordlist > /dev/null
${time} hunspell -d dicts/is -a randwordlist > /dev/null
${time} hunspell -d dicts/is -m langs/is/wordlist > /dev/null
${time} hunspell -d dicts/is -m randwordlist > /dev/null
${time} hunspell -d dicts/is -s langs/is/wordlist > /dev/null
${time} hunspell -d dicts/is -s randwordlist > /dev/null
@cat perf.txt