From 29aef7daba3606a2d33ca5a0756795bc1acd2e02 Mon Sep 17 00:00:00 2001 From: jacksonllee Date: Fri, 7 May 2021 23:57:20 -0500 Subject: [PATCH] FIX raise NotImplementedError for ipsyn --- CHANGELOG.md | 2 ++ docs/_modules/pycantonese/corpus.html | 7 +++++++ docs/_sources/changelog.rst.txt | 2 ++ docs/api.html | 22 ++++------------------ docs/changelog.html | 2 ++ docs/generated/pycantonese.CHATReader.html | 8 ++++---- docs/searchindex.js | 2 +- docs/source/changelog.rst | 2 ++ pycantonese/corpus.py | 7 +++++++ 9 files changed, 31 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a453cf..af00dd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/). - Fixed the previously inoperational methods `append`, `append_left`, `extend`, and `extend_left` of the class `CHATReader` through the upstream PyLangAcq package. - Retrained the part-of-speech tagger, after the minor character fix from v3.2.3. +- Raised `NotImplementedError` for the method `ipsyn` of `CHATReader`, + since the upstream method works only for English. ## [3.2.3] - 2021-04-12 diff --git a/docs/_modules/pycantonese/corpus.html b/docs/_modules/pycantonese/corpus.html index 007690a..e710218 100644 --- a/docs/_modules/pycantonese/corpus.html +++ b/docs/_modules/pycantonese/corpus.html @@ -365,6 +365,13 @@

Source code for pycantonese.corpus

         which may or may not be applicable to your use case.
     """
 
+
[docs] def ipsyn(self): + """(Not implemented - the upstream ``ipsyn`` method works for English only.)""" + raise NotImplementedError( + "The upstream `ipsyn` method works for English only. " + "There isn't yet a Cantonese version of IPSyn." + )
+ @staticmethod def _preprocess_token(t) -> Token: # Examples from the CHILDES LeeWongLeung corpus, child mhz diff --git a/docs/_sources/changelog.rst.txt b/docs/_sources/changelog.rst.txt index 73eb612..f26a590 100644 --- a/docs/_sources/changelog.rst.txt +++ b/docs/_sources/changelog.rst.txt @@ -34,6 +34,8 @@ Fixed * Fixed the previously inoperational methods ``append``\ , ``append_left``\ , ``extend``\ , and ``extend_left`` of the class ``CHATReader`` through the upstream PyLangAcq package. * Retrained the part-of-speech tagger, after the minor character fix from v3.2.3. +* Raised ``NotImplementedError`` for the method ``ipsyn`` of ``CHATReader``\ , + since the upstream method works only for English. [3.2.3] - 2021-04-12 -------------------- diff --git a/docs/api.html b/docs/api.html index ab5a489..7af2cc4 100644 --- a/docs/api.html +++ b/docs/api.html @@ -449,8 +449,8 @@

headers()

Return the headers.

-

ipsyn([participant])

-

Return the indexes of productive syntax (IPSyn).

+

ipsyn()

+

(Not implemented - the upstream ipsyn method works for English only.)

jyutping([participants, exclude, …])

Return the data in Jyutping romanization.

@@ -867,22 +867,8 @@

-ipsyn(participant='CHI') → List[int]
-

Return the indexes of productive syntax (IPSyn).

-
-
Parameters
-
-
participantstr, optional

Participant of interest, which defaults to the typical use case of "CHI" -for the target child.

-
-
-
-
Returns
-
-
List[float]
-
-
-
+ipsyn()[source] +

(Not implemented - the upstream ipsyn method works for English only.)

diff --git a/docs/changelog.html b/docs/changelog.html index ee816c0..69c6753 100644 --- a/docs/changelog.html +++ b/docs/changelog.html @@ -345,6 +345,8 @@

Fixed
  • Fixed the previously inoperational methods append, append_left, extend, and extend_left of the class CHATReader through the upstream PyLangAcq package.

  • Retrained the part-of-speech tagger, after the minor character fix from v3.2.3.

  • +
  • Raised NotImplementedError for the method ipsyn of CHATReader, +since the upstream method works only for English.

  • diff --git a/docs/generated/pycantonese.CHATReader.html b/docs/generated/pycantonese.CHATReader.html index 9d3001f..6fa0bd5 100644 --- a/docs/generated/pycantonese.CHATReader.html +++ b/docs/generated/pycantonese.CHATReader.html @@ -377,8 +377,8 @@

    pycantonese.CHATReader

    headers()

    Return the headers.

    -

    ipsyn([participant])

    -

    Return the indexes of productive syntax (IPSyn).

    +

    ipsyn()

    +

    (Not implemented - the upstream ipsyn method works for English only.)

    jyutping([participants, exclude, …])

    Return the data in Jyutping romanization.

    @@ -517,8 +517,8 @@

    pycantonese.CHATReader

    headers()

    Return the headers.

    -

    ipsyn([participant])

    -

    Return the indexes of productive syntax (IPSyn).

    +

    ipsyn()

    +

    (Not implemented - the upstream ipsyn method works for English only.)

    jyutping([participants, exclude, …])

    Return the data in Jyutping romanization.

    diff --git a/docs/searchindex.js b/docs/searchindex.js index 58b2c2a..2d24bf1 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["api","archives","changelog","data","generated/pycantonese.CHATReader","generated/pycantonese.CHATReader.search","generated/pycantonese.characters_to_jyutping","generated/pycantonese.hkcancor","generated/pycantonese.jyutping_to_tipa","generated/pycantonese.jyutping_to_yale","generated/pycantonese.parse_jyutping","generated/pycantonese.pos_tag","generated/pycantonese.pos_tagging.hkcancor_to_ud","generated/pycantonese.read_chat","generated/pycantonese.segment","generated/pycantonese.stop_words","generated/pycantonese.word_segmentation.Segmenter","index","jyutping","pos_tagging","quickstart","reader","searches","stop_words","word_segmentation"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api.rst","archives.rst","changelog.rst","data.rst","generated/pycantonese.CHATReader.rst","generated/pycantonese.CHATReader.search.rst","generated/pycantonese.characters_to_jyutping.rst","generated/pycantonese.hkcancor.rst","generated/pycantonese.jyutping_to_tipa.rst","generated/pycantonese.jyutping_to_yale.rst","generated/pycantonese.parse_jyutping.rst","generated/pycantonese.pos_tag.rst","generated/pycantonese.pos_tagging.hkcancor_to_ud.rst","generated/pycantonese.read_chat.rst","generated/pycantonese.segment.rst","generated/pycantonese.stop_words.rst","generated/pycantonese.word_segmentation.Segmenter.rst","index.rst","jyutping.rst","pos_tagging.rst","quickstart.rst","reader.rst","searches.rst","stop_words.rst","word_segmentation.rst"],objects:{"pycantonese.CHATReader":{__init__:[4,1,1,""],ages:[0,1,1,""],append:[0,1,1,""],append_left:[0,1,1,""],characters:[0,1,1,""],clear:[0,1,1,""],dates_of_recording:[0,1,1,""],extend:[0,1,1,""],extend_left:[0,1,1,""],file_paths:[0,1,1,""],from_dir:[0,1,1,""],from_files:[0,1,1,""],from_strs:[0,1,1,""],from_zip:[0,1,1,""],headers:[0,1,1,""],ipsyn:[0,1,1,""],jyutping:[0,1,1,""],languages:[0,1,1,""],mlu:[0,1,1,""],mlum:[0,1,1,""],mluw:[0,1,1,""],n_files:[0,1,1,""],participants:[0,1,1,""],pop:[0,1,1,""],pop_left:[0,1,1,""],search:[5,1,1,""],sents:[0,1,1,""],tagged_sents:[0,1,1,""],tagged_words:[0,1,1,""],tokens:[0,1,1,""],ttr:[0,1,1,""],utterances:[0,1,1,""],word_frequencies:[0,1,1,""],word_ngrams:[0,1,1,""],words:[0,1,1,""]},"pycantonese.corpus":{Token:[0,0,1,""]},"pycantonese.jyutping":{Jyutping:[0,0,1,""]},"pycantonese.jyutping.Jyutping":{"final":[0,1,1,""],__str__:[0,1,1,""]},"pycantonese.pos_tagging":{hkcancor_to_ud:[12,2,1,""]},"pycantonese.word_segmentation":{Segmenter:[16,0,1,""]},"pycantonese.word_segmentation.Segmenter":{__init__:[16,1,1,""]},pycantonese:{CHATReader:[4,0,1,""],characters_to_jyutping:[6,2,1,""],hkcancor:[7,2,1,""],jyutping_to_tipa:[8,2,1,""],jyutping_to_yale:[9,2,1,""],parse_jyutping:[10,2,1,""],pos_tag:[11,2,1,""],read_chat:[13,2,1,""],segment:[14,2,1,""],stop_words:[15,2,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","function","Python function"]},objtypes:{"0":"py:class","1":"py:method","2":"py:function"},terms:{"000":19,"001":21,"001_v2":21,"100":[11,12,19,23],"104":[2,15,23],"105":[15,23],"107":23,"1177307":3,"127":[],"12715":22,"13251":21,"134":21,"140":21,"150":19,"153654":3,"160":3,"161":3,"167":21,"16730":3,"1681":22,"178270":3,"186":21,"190":3,"1949480":3,"195":22,"197":18,"1997":21,"1st":22,"2015":[1,17],"2016":1,"202":21,"2020":[17,18,24],"2021":[1,3],"209":21,"21167":22,"219":21,"22328":22,"223415":3,"2259":21,"2570":21,"2734":21,"2741":21,"2755":21,"29012":[],"2911":21,"29726":[20,22],"29954":22,"30th":21,"3rd":1,"4110":21,"501":3,"5019":21,"520":21,"527":21,"533877":3,"70438":3,"705":22,"9282":21,"\u4e00\u5572":[15,23],"\u4e00\u5b9a":[15,23],"\u4e03":22,"\u4e0d\u5982":[15,23],"\u4e0d\u904e":[15,22,23],"\u4e5d\u9f8d":23,"\u4f4f":22,"\u4f60":[21,22],"\u4f62":21,"\u4fc2":[20,21,22],"\u505c\u7528\u8a5e":17,"\u505c\u7528\u8bcd":17,"\u516b\u6708":22,"\u5187\u5f97":[20,22],"\u5206\u8a5e":17,"\u5206\u8bcd":17,"\u53bb":[20,21,22],"\u53ef\u4ee5":22,"\u5416":21,"\u5462":21,"\u54aa":22,"\u5514":[14,21,24],"\u5514\u4fc2":22,"\u5514\u8a72":18,"\u554a":[21,22],"\u5572":21,"\u5582":21,"\u5587":[21,22],"\u558e":21,"\u55ce":21,"\u55f0":[11,19],"\u55f0\u500b":22,"\u55f0\u908a":22,"\u55f1":22,"\u5649":21,"\u565a\u65e5":[11,19],"\u56d6":[21,22],"\u597d":[18,20,22],"\u5b78":[14,20,24],"\u5bb9":[14,24],"\u5bb9\u5514\u5bb9\u6613":[14,24],"\u5bb9\u6613":[14,24],"\u5c0d":[11,19],"\u5c31":21,"\u5e7e":22,"\u5e7f\u4e1c\u8bdd":17,"\u5ee3\u6771":24,"\u5ee3\u6771\u8a71":[6,8,9,10,14,17,18,20,21,24],"\u5ee3\u6771\u8a71\u597d\u96e3\u5b78":20,"\u5ee3\u6771\u8a71\u5bb9\u5514\u5bb9\u6613\u5b78":[14,24],"\u6211":[11,19,21],"\u6211\u565a\u65e5\u8cb7\u55f0\u5c0d\u978b":19,"\u62b5":22,"\u65b0\u754c":23,"\u65c5":21,"\u65c5\u884c":[20,21,22],"\u65e5":22,"\u6709\u5187":[20,22],"\u6709\u5f97":[20,22],"\u6a5f":22,"\u6a5f\u7968":22,"\u6c23\u5019":[9,18],"\u6de1\u5b63":22,"\u73a9":22,"\u76f4\u7a0b":22,"\u771f\u4fc2":22,"\u7793\u89ba":22,"\u789f":22,"\u789f\u5f62":22,"\u7ca4\u62fc":17,"\u7ca4\u8bed":17,"\u7cb5":17,"\u7cb5\u62fc":17,"\u7cb5\u8a9e":17,"\u7da0":22,"\u8072\u6bcd":22,"\u807d":22,"\u81ea\u7136\u8a9e\u8a00\u8655\u7406":17,"\u81ea\u7136\u8bed\u8a00\u5904\u7406":17,"\u86cb":18,"\u86cb\u7cd5":18,"\u884c":21,"\u8981":[20,22],"\u8a5e\u6027\u6a19\u6ce8":17,"\u8a71":[22,24],"\u8a92":21,"\u8a9e\u8a00\u5b78":17,"\u8b1b":[6,18,20,22],"\u8b8a\u97f3":18,"\u8bcd\u6027\u6807\u6ce8":17,"\u8bed\u8a00\u5b66":17,"\u8cb7":[11,19],"\u8cca":22,"\u8ddf":22,"\u8fea\u58eb\u5c3c":22,"\u904e":22,"\u9072":21,"\u90fd":22,"\u96c0":22,"\u96e3":20,"\u978b":[11,19],"\u97fb\u6bcd":[18,22],"\u98db\u6a5f":22,"\u9999\u6e2f":[15,23],"\u9999\u6e2f\u4eba":[6,18,20],"\u9999\u6e2f\u4eba\u8b1b\u5ee3\u6771\u8a71":[6,18,20],"\u9999\u6e2f\u5cf6":23,"\ud842\udfa9\ud843\ude4c":2,"\ud842\udfa9\ud843\ude4c":[],"\ud843\udd15":2,"\ud843\udd15":[],"\ud843\ude9d":2,"\ud843\ude9d":[],"\ud843\udea2":[2,21],"\ud843\udea2":[],"\ud843\uded7":2,"\ud843\uded7":[],"\ud844\udc14":2,"\ud844\udc14":[],"\ud844\udc5c":2,"\ud844\udc5c":[],"\ud844\udcc9":[21,22],"\ud844\udcc9":[],"\ud844\udcd3":2,"\ud844\udcd3":[],"\ud854\udd2b":2,"\ud854\udd2b":[],"\ud854\ude99":2,"\ud854\ude99":[],"\ud85d\udd74":2,"\ud85d\udd74":[],"case":[0,4,18,22,23],"char":6,"class":[0,2,4,14,16,18,21,24],"computational linguist":17,"d\u016bng":[9,18],"default":[0,2,5,9,11,13,14,19,22,23,24],"final":[0,2,5,18,22],"float":0,"function":[2,3,6,8,9,12,18,19,20,21,22,23,24],"g\u014di":18,"gw\u00f3ng":[9,18],"gw\u00f3ngd\u016bngw\u00e1":[9,18],"import":[3,14,18,19,20,21,22,23,24],"int":[0,5,16,22],"long":17,"natural language process":17,"new":[0,2,6,8,9,11,12,14,15,16,23],"null":2,"part-of-speech tag":17,"return":[0,2,5,6,7,8,9,10,11,12,13,14,15,18,21,22,23,24],"stop word":17,"super":[8,18],"switch":[2,9],"true":[0,2,5,9,15,21,22,23],"while":[18,21,22],"word segment":17,Added:14,Eve:[0,13],For:[0,3,5,12,13,21,22],Its:19,One:[0,5],POS:11,Such:0,The:[0,1,2,3,5,6,11,13,14,17,18,19,21,22,23,24],There:11,Used:2,With:22,__init__:[4,16],__str__:0,__version__:[],aa3:[21,22],abil:18,abl:[3,18],about:[19,23],abov:[2,22],accept:19,access:[2,17,18,21,22],accommod:21,acquisit:[0,3,4],adam:[0,13],add:[15,18,23],addit:21,adjust:22,adopt:[2,3],adult:21,adv:[11,19],after:[0,2,5,20,21],age:[0,1,17,21],ages:[0,21],albino:17,algorithm:24,all:[0,2,3,5,20,21,22,23],all_verb:[20,22],allow:[2,11,14,16,22,24],allow_remot:0,alon:22,alphabet:[3,17],alreadi:[0,19,22],also:[18,21,22,23],alwai:22,ambigu:[9,18],american:[0,13],among:18,analysi:22,ani:[0,6,12,18,22],annot:[2,3,12,19,20,22],anonym:21,anoth:[0,18],anyth:3,api:17,append:[0,2],append_left:[0,2],appli:[0,2,11,13],applic:[0,4,12],approach:[14,19],appropri:[0,21],apr:21,april:[1,21],arbitrari:0,archiv:17,argument:[2,11,14,18,19,21,23,24],aris:18,around:[21,22],artist:17,as_list:[2,9,18],ask:22,assist:17,associ:[3,21],attempt:0,attribut:[0,18,21],audio:21,augment:21,author:17,automat:[18,22],avail:[2,3,17,21],averag:[11,19],baat3jyut6:22,back:2,ban:24,base:[6,18,22],basic:[1,2,19],bat1gwo3:22,bear:22,becaus:[22,24],becom:0,been:[0,2,3,17,19,21],befor:[0,2,5],begin:[0,13,20,21,22],behavior:[14,18],being:9,below:[2,21],benefit:12,better:2,between:[9,18,21],beyond:3,big:[1,17],bile:[3,13],bilingu:[1,3,17],bool:[0,5,9],both:[0,2,3,21],bought:[11,19],boundari:[9,24],brown:[0,13],bug:[2,17],bui:17,build:2,built:[2,21,22],bump:2,by_fil:[0,5,21],by_token:[0,2,5,22],by_utter:[0,2,5,21,22],caak2:22,cake:18,call:[18,21,22],can:[0,3,12,13,18,21,22,23,24],cannot:0,cantones:[0,1,2,3,4,5,6,7,8,9,10,11,13,14,15,18,19,20,21,22,23,24],cap:2,capabl:[21,22],capit:0,cat1:22,cathug:17,centr:[1,17],certain:24,cha:[0,3,13,21],chaak:17,chang:[6,9,14,18,22],changelog:17,chao:18,charact:[0,2,5,6,8,9,10,11,14,17,20,24],character_s:[0,2,4],characters2jyutp:[2,6],characters_to_jyutp:[2,18,20],charl:17,chat:[0,2,4,13,21],chatread:[2,3,7,13,21],chcc:3,check:[3,17],chen:[1,17],chi:[0,21],child:[0,13,21],childhood:[1,17],children:[0,13],chim:17,chines:[0,1,3,5,17,18],ci4:21,circleci:2,classmethod:[0,3],clear:0,climat:[9,18],cls:[14,24],coda:[0,2,5,10,18,20,22],codas_ptk:22,code:[2,3,10,17,21,22],coffe:17,collaps:0,collect:[0,21],colloc:22,com:17,combin:[0,2,19,22],come:[0,2,3,17,19],common:[18,22],commonli:18,compl:[11,12],complet:22,compon:18,comput:1,concurr:24,confus:18,conson:[9,18],constrain:24,constraint:22,consult:3,contact:[],contain:[0,3,5,13,14,18,22,24],contextu:18,contrast:21,contribut:17,control:[3,21,24],conveni:[3,21],convent:[0,2,21,24],convers:[2,3,6,17,20,22],convert:[2,3,6,8,9,11,18],corpora:3,corpu:[2,4,6,7,13,14,17,18,19,20,24],correct:[2,18],correspond:[0,21],count:21,counter:[0,21],counterpart:[2,3],cover:2,creat:[7,21],criteria:[0,5],criterion:[2,3],cross:[12,19],current:[0,3,17,18,19,21,24],custom:[2,14,21,22],customiz:[2,16],daam6gwai3:22,daan2:18,daan6gou1:18,dai2:22,dai:0,data:[1,2,5,6,11,12,13,14,17,18,19,21,22,23,24],dataset:[0,2,3,13,17,21],date:[0,21],dates_of_record:0,datetim:[0,21],deal:21,dedic:21,defin:[2,21],demograph:21,depend:[2,11,12,18,19,23,24],deprec:[0,5,6,8,9],describ:[0,11,12,19,22],design:[3,17,22],detail:[2,17,21],detect:18,determin:[0,23],develop:1,di1:21,dict:[0,12,21],dictioari:12,dictionari:[12,18],differ:[21,22],difficult:20,dik6si6nei4:22,dip2:22,dip2jing4:22,direct:[0,2],directli:18,directori:[0,3,13],disabl:[0,2,5],disallow:[2,14,16,24],disambigu:[9,18],discours:22,dist:18,distinct:0,distribut:22,doc:2,docstr:2,document:[2,3,20,21],doe:[0,13,14],domain:2,domin:0,done:0,dou1:22,download:[0,3,20],drive:3,drop:[0,2],due:[18,19],duplic:0,dut2:22,each:[0,2,6,11,22],easi:[14,18,24],edu:[11,12],educ:21,egg:18,either:[0,13,23],element:[0,2,8,9,10,21],email:17,empti:4,encod:[0,2,13],end:[18,24],eng:[0,21],english:[0,13,21,23,24],enough:18,entir:[12,24],entri:2,equival:[0,2,5,6,8,9,18,22],error:[2,18,20],especi:19,etc:[17,21,22],european:0,even:0,exampl:[0,5,6,8,9,10,11,12,13,14,15,18,20,21,22,23],exclud:[0,2,5,13,21],exist:[0,24],expect:19,explicitli:24,expos:[2,18,19],express:[0,13,20,22],extend:[0,2],extend_left:[0,2],extens:0,facebook:17,fact:19,fals:[0,2,5,9,15,18,22],fan3gaau3:22,fat:21,father:21,favor:2,featur:17,feedback:17,fei1gei1:22,femal:21,file:[0,2,3,4,5,13,18,21],file_path:0,filenam:21,filter:[0,13,21,23],find:[20,22],fine:3,first:[0,21,22],five:[14,24],flavor:21,flexibl:22,folder:2,follow:[0,2,3,13,22,24],forc:[0,2],form:[0,2,5,24],format:[2,17,21],found:[17,21,22,24],frequenc:0,from:[0,2,3,4,9,11,12,13,14,17,18,19,20,21,22,23,24],from_dir:[0,3],from_fil:[0,3],from_str:[0,3],from_zip:[0,3],full:22,further:[18,19,21],futur:18,gaa3:22,gan1:22,gei1:22,gei1piu3:22,gei2:22,gender:21,gener:[0,2,3,5,22],get:[18,19,20,21],github:[2,17],give:22,given:[0,2,3,5,12,21,22],go2bin1:22,go2go3:22,gong2:[6,18,20,22],good:18,gra:[0,20,21,22],grab:22,grain:3,grammat:0,granular:12,great:20,group:21,guthri:3,gwo3:22,gwong2dung1waa2:[6,8,9,10,18,20],hai6:[20,22],han:17,handl:[0,3,13,17,18,21],handout:1,has:[2,3,9,17,18,19,21,23,24],hauh:[9,18],have:[0,2,3,17,18,19,20,21,22],header:0,hei3hau6:[9,18],hei6au6:[9,18],hei:[9,18],heihauh:[9,18],helper:19,heoi3:[20,21,22],here:[0,3,13,21,22,24],heritag:3,high:[18,22],hill:17,him:[1,17],hkcancor:[2,3,6,11,12,14,17,18,19,20,21,22,24],hkcancor_to_ud:[2,11,19],hku:3,ho2ji5:22,hoeng1gong2jan4:[6,18,20],hong:[1,2,3,7,17,21,23],hongkong:[6,18,20],hood:3,hou2:[18,22],hou7:18,how:[21,22],howev:24,hss:[11,12],html:[0,5,11,12],http:[0,2,3,5,11,12,13,17],hyperlink:21,ident:21,identifi:[0,21],ids:0,ignor:[0,5,22],illeg:[8,9,10],illustr:21,implement:[2,17,21,24],improv:2,includ:[0,2,5,14,17,18,21,22,23,24],inconveni:18,incorpor:[3,17],independ:[18,21],index:[0,11,12],indic:0,individu:[0,2,5,6,17,18],inform:[0,21,22],ingest:18,inherit:[0,4,21],initi:[0,2,4,5,16,22],innov:1,inoper:2,input:[2,11,12,14,19],instagram:17,instal:20,instanc:[0,21,22,24],instanti:0,instead:[0,2,5,18],integ:24,intellig:18,intention:0,interest:[0,18,22],intern:[2,11],internet:0,interpret:20,introduc:[1,17,21],intuit:[],inv:0,invalid:18,investig:19,involv:[18,22],ipsyn:0,island:23,issu:[2,17],issubset:23,iter:[0,5,15,16,22,23,24],its:[2,3,9,11,17,18,21,22],jackson:[1,17],jacksonlle:17,jat6:22,jau5dak1:[20,22],jau5mou5:[20,22],jenni:17,jiu3:[20,22],jp_str:[8,9,10],json:24,just:[0,5,21,22],jyut6:18,jyutp:[2,3,4,5,6,8,9,10,17,20],jyutping2tipa:[2,8],jyutping2yal:[2,9],jyutping_s:[0,2,4],jyutping_to_tipa:[2,18],jyutping_to_x:[2,9],jyutping_to_yal:[2,18],jyutpingi1:[],keep:22,keep_cas:0,kept:0,keyword:[2,14,18,19,24],kind:22,known:[],kong:[1,2,3,7,17,21,23],kowloon:23,koy55:18,kwarg:24,laa1:22,laa4:22,lai:17,lam:17,languag:[3,4,12,17,18,19,21,23,24],last:[0,17,18],latex:[8,18],lau:17,learn:[14,20,24],leav:0,lee:[1,3,17],leewongleung:3,left:[0,5,20,22],len:[0,3,15,20,22,23],length:[0,2,14,16,24],leo:3,leoi5hang4:[20,21,22],less:12,let:21,letter:[0,18,21,22],leung:3,level:[0,18,21,22],lib:18,librari:[3,14,17,18,24],licens:[2,3,18,19,24],like:[0,3,17,19,22,23,24],likewis:22,limit:19,line:[2,9,18,21],linguist:[1,3,12,19,22],link:2,list:[0,2,5,6,8,9,10,11,14,18,21,22],litong:[1,17],lo1:22,load:24,local:[0,3,13,18],longer:14,longest:[14,24],look:[0,20],loop:22,low:[9,18],lowercas:0,luk2:22,m4goi1:18,m4hai6:22,machin:[3,22],mai6:22,mai:[0,4,18,22,23],maintain:17,maintein:[],major:[19,21],make:[0,18],mani:[0,3,21,23,24],manual:3,map:[2,12,19],march:[1,3],mark:6,marker:[9,18,21],match:[0,5,13,14,22,24],materi:21,matter:6,matthew:3,max_word_length:[16,24],maxim:14,maximum:[2,14,16,24],mean:[0,3,17,19,22],meaning:21,media:17,meet:3,memori:0,menu:20,metadata:21,method:[0,2,4,16,17,22],might:[18,19],ming:17,minor:2,minut:1,mit:17,mix:22,mlu:0,mlum:0,mluw:0,model:[2,3,6,11,14,16,18,19,24],modifi:17,modul:18,month:0,moon:[],mor:[0,20,21,22],more:[0,2,3,5,17,19,20,21,22,24],morphem:0,morpholog:[0,21],most:[0,17,18],most_common:21,mot:[0,21],mother:21,mou5dak1:[20,22],multipl:[8,9,10,18],n_file:[0,3],name:[2,17,21],nasal:[2,18],natur:[3,12,17,19,23,24],naturalist:3,necessari:[18,23,24],necessit:18,need:[3,18,22],nei5:[21,22],neighbor:22,neither:22,ngram:0,nltk:2,none:[0,5,6,12,13,14,15,16,18,20,21,22],nongra:[],nor:22,note:[1,2,17],noun:[11,19],now:[2,6,20],ntu:[11,12],nuclei:18,nucleu:[0,2,5,10,18,20,22],number:[0,3,18,20,21,22],number_of_charact:2,number_of_word:2,numer:18,numpydoc:2,object:[0,2,7,14,16,18,21,22,24],obtain:3,occurr:19,odd:19,off:19,offer:[17,18],often:[23,24],ohio:1,oken:[],on25:[8,18],one:[0,2,3,5,8,9,10,11,13],ones:23,onli:[0,2,5,13,19,22],onset:[0,2,5,9,10,18,20,22],onward:2,open:2,option:[0,5,9,11,12,13,14,15,16,21,23],orb:2,order:[0,3,17,21],org:[0,2,3,5,11,12,13],organ:[0,5],origin:[2,3,11,12,17,19,20,21],orthograph:0,other:[0,2,3,9,18,22],otherwis:[0,5,18,22],out:[3,17],output:[0,5,6,9,18,21],over:[11,12,19],overal:2,own:[3,21],packag:[0,2,18,20,21],page:[17,21],paidocantones:3,paidologo:3,pair:[11,19],paramet:[0,2,5,6,8,9,10,11,12,13,14,15,16,22],parent:[0,4],pars:[0,2,3,10,13,17,20,21,22],parse_jyutp:[2,18,20],parser:2,part:[0,2,3,5,9,11,12,17,18,20,21],particip:[0,2,5,21],particl:22,particular:22,particularli:[3,18,21],pass:[0,13,24],path:[0,2,3,5,13],perceptron:[11,19],perform:22,perhap:19,permiss:[17,19],phonbank:3,phonolog:[0,5,18],phrase:[2,11,19],pick:2,piggyback:2,pin:2,pinjam:[18,22],pip:17,placehold:21,pleas:[0,3,5,17,18,19,20,21],plu:[0,2,5,21,22],point:[2,3,13,21],pop:0,pop_left:0,pos:[0,5,11,12,20,21,22],pos_tag:[2,19],possibl:[2,3,22],potenti:[18,19,24],power:22,pprint:[],preced:22,predict:11,preprocess:0,preserv:21,preval:[0,5],previou:[9,18],previous:[2,6],primer:3,print:[20,21,22],proce:20,process:[12,17,18,19,21,23,24],product:0,profession:17,pron:[11,19],pronoun:23,pronunci:18,properti:0,prove:18,provid:[0,3,12,13,14,18,19,22,23,24],ptk:22,ptk_tone2:22,publicli:3,punct:[11,19],punctuat:6,purpos:[2,12,22],pycantones:[0,1,2,3,18,19,20,21,22,23,24],pylangacq:[0,2,3,21],pypi:2,python3:18,python:[1,2,3,18,20,22],qualiti:[2,19],queri:[3,17],quickstart:17,quot:[9,18],rachel:17,rais:[0,2,5,8,9,10,11,18],random:0,rang:[0,2,5],rather:2,ratio:0,read:[0,2,3,13,18],read_chat:3,readabl:3,reader:[0,2,4,17,18],readi:[17,20],readm:[3,17],readthedoc:2,reason:3,recent:[3,17,18],record:[0,21],recurs:[0,13],refer:17,regex:[0,5,22],regular:[0,13,20,22],rel:19,relat:[0,3],releas:[2,17,18,24],relev:18,remot:0,remov:[0,15,23],rendit:21,repetit:2,replac:[2,6,8,9],report:17,repr:18,repres:[0,2,6,18,22],represent:[0,18,21],request:[],requir:2,research:[3,17,22],resourc:[17,18],restructur:2,result:[0,2,18,19,20,24],retrain:2,retriev:18,revis:2,rich:3,richielo:17,right:[0,3,5,22],rime:[2,6,14,17,18,24],rime_cantones:17,rise:[18,22],robin:17,role:21,roman:[2,3,5,6,8,9,10,17,22],rst:2,ryan:17,rylanchiu:17,sai:21,same:[3,21,22,24],sarah:[0,13],satisfi:0,scheme:18,search:[0,2,3,17,20],second:22,see:[0,3,5,11,17,18,20,21,22],seem:18,segment:[0,2,3,5,6,11,17,18,19,20,21],semant:22,sens:0,sent:[0,2,5],sent_rang:[0,2,5],sentenc:[0,2,11,14,19],separ:18,septemb:[1,17],servic:17,ses:21,session:21,set:[0,5,14,15,18,22,23],sever:[3,21],sex:21,shoe:[11,19],show:22,similarli:23,simpl:24,simpli:[0,21],sinc:[0,3,13,19,21,22],singl:[3,9,13,22],situat:24,size:19,slide:[1,17,22],small:19,snippet:2,snowman:17,social:17,some:[0,4],some_token:21,sophist:19,sort:0,sourc:[0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18],space:24,span:[0,5,22],speak:[6,18,20],speaker:21,specif:[2,19,21,22],specifi:[0,13,22,24],speech:[0,2,3,5,11,12,17,20,21],spot:21,stabl:17,standalon:18,standard:19,state:1,statist:19,statu:21,stdin:18,step:0,stephan:17,still:[18,19,24],stiller:17,stop:[2,15,17],stop_word:23,stop_words_1:[15,23],stop_words_2:[15,23],store:21,str:[0,5,6,8,9,10,11,12,13,14,15,16,18],straightforward:18,string:[0,2,5,6,9,11,13,14,21,22,23,24],strip:2,structur:[0,13,21],studi:[0,21,22],style:2,substanti:17,suffix:18,suit:2,suppli:2,support:[0,2,5,11],syllab:[2,18],syllabl:[9,18],syntax:[0,18,22],system:18,tag:[0,2,3,5,11,12,17,20,21],tagged_s:0,tagged_word:0,tagger:[2,11,19],tagset:[2,11,12,19,22],take:[2,3,18,19,22,23,24],talk:[1,17],talkbank:[0,13,21],tape:21,target:[0,21,22],task:[12,18,22,23,24],teach:18,team:17,technic:17,teng1:22,term:[0,5,17],territori:23,test:2,text:[0,2,5,13,21,22,24],than:[0,2,3,5,14,19,22,24],thank:[3,18],thei:[0,2,3],theme:2,thi:[0,2,3,6,8,9,11,12,13,14,16,17,18,20,21,22,24],think:[19,21,24],those:[0,3,18],though:24,three:21,through:[2,21,22],thrown:19,tier:21,time:[21,22],time_mark:21,tipa:[2,8],todai:18,togeth:0,token:[5,18,20,21,22],tone2:22,tone:[0,5,9,10,18,20,22],tool:[1,17,18,21],top:21,touch:19,traceback:18,track:22,tracker:17,tradit:[0,5],train:[11,14,17,19,24],transcrib:3,transcript:0,translat:21,transpar:2,travel:17,treat:[0,18,23],trigram:21,trivial:18,tsui:[1,17],tsz:[1,17],ttr:0,tun55:[8,18],tupl:[0,2,5,6,11,22],turn:[2,18],twitter:17,two:[18,21,22],txt:17,type:0,typeerror:11,typic:[0,21],ubiquit:21,unclear:9,under:[0,3,13],underli:[2,18,21,24],unicod:[0,13],union:0,univers:[1,2,11,12,17,19],universaldepend:[11,12],unrecogn:[8,9,10,12],unseen:[6,18],unseg:[2,11,14,19,24],unspecifi:[0,5],unzip:[0,3,13],updat:2,upgrad:17,upstream:2,url:[0,3,13],usag:21,use:[0,2,3,4,5,18,20,21,22,23],used:[0,3,5,11,14,18,21,22,24],useful:[3,17,22],usefulness:18,user:[2,18],uses:[3,11,12,19,21],using:[17,18,22],usr:18,usual:21,utf8:21,utf:[0,2,13],utter:[0,3,5,21],utterance_rang:[0,2,5,22],uuid:0,valid:18,valu:[0,2,6,9,13,18,22],valueerror:[0,5,8,9,10,11,18],varieti:22,variou:[2,18,21,22],verb:[11,12,19,20,22],version:[0,2,6,8,9,11,12,14,15,16,17,18,24],via:3,visual:21,wa25:[8,18],waa6:22,waan2:22,wai3:21,wai:[3,17,21,24],well:[2,3,21],whatev:22,when:[0,2,3,18,22],whenev:21,where:[0,2,6,11,21,22,24],whether:[18,24],which:[0,4,9,11,12,18,19,21,22,23],whitespac:2,who:17,whose:[0,20],wide:[3,22],window:[2,22],within:[0,5,22],without:3,wonder:17,wong:3,word:[0,2,3,5,6,11,14,15,16,17,18,19,20],word_freq:21,word_frequ:[0,21],word_ngram:[0,21],word_rang:[0,5,22],word_segment:[14,24],wordlist:2,wordseg:2,work:[0,3,13,19,21,22,24],workshop:1,worth:19,would:[0,3,9,12,17,18,19,24],wouldn:19,wrap:0,write:22,written:24,x2y:2,x_to_i:2,xml:3,xxa:21,xxb:21,xxx:[0,13],yale:[2,9],year:[0,3],yesterdai:[11,19],yip:3,yipmatthew:[3,13],you:[0,3,13,17,18,19,20,21,22,23,24],your:[0,3,4,17,19,20,22,23],yue:21,yuen:17,yut:18,zan1hai6:22,zero:22,zik6cing4:22,zip:[0,3,13],zoek2:22,zyu6:22},titles:["API Reference","Archives","Changelog","Corpus Data","pycantonese.CHATReader","pycantonese.CHATReader.search","pycantonese.characters_to_jyutping","pycantonese.hkcancor","pycantonese.jyutping_to_tipa","pycantonese.jyutping_to_yale","pycantonese.parse_jyutping","pycantonese.pos_tag","pycantonese.pos_tagging.hkcancor_to_ud","pycantonese.read_chat","pycantonese.segment","pycantonese.stop_words","pycantonese.word_segmentation.Segmenter","PyCantonese: Cantonese Linguistics and NLP in Python","Jyutping Romanization","Part-of-Speech Tagging","Quickstart","Corpus Reader Methods","Corpus Search Queries","Stop Words","Word Segmentation"],titleterms:{"0dev":2,"2014":2,"2015":2,"2016":2,"2018":2,"2020":2,"2021":2,"break":2,Added:2,acknowledg:17,annot:21,api:[0,2],archiv:1,built:3,cantones:17,chang:2,changelog:2,charact:[18,21,22],characters_to_jyutp:6,chat:3,chatread:[0,4,5],child:3,chines:[21,22],cite:17,consult:17,content:17,convers:18,corpu:[0,3,21,22],criteria:22,custom:[3,24],data:[0,3],deprec:2,download:17,element:22,exampl:[],fix:2,format:[3,22],frequenc:21,header:21,hkcancor:7,hkcancor_to_ud:12,how:17,instal:17,jyutp:[0,18,21,22],jyutping_to_tipa:8,jyutping_to_yal:9,languag:0,licens:17,linguist:17,link:17,logo:17,method:21,multipl:22,natur:0,ngram:21,nlp:17,non:2,output:[1,22],pars:18,parse_jyutp:10,part:[19,22],pos_tag:[11,12],process:0,pycantones:[4,5,6,7,8,9,10,11,12,13,14,15,16,17],python:17,queri:22,quick:[],quickstart:20,rang:22,read_chat:13,reader:21,refer:0,remov:2,research:1,result:22,roman:[0,18,21],search:[5,22],secur:2,segment:[14,16,24],speech:[19,22],stop:23,stop_word:15,string:18,support:17,tabl:17,tag:[19,22],talkbank:3,tipa:18,token:0,transcript:21,tutori:1,unreleas:2,utter:22,word:[21,22,23,24],word_segment:16,yale:18}}) \ No newline at end of file +Search.setIndex({docnames:["api","archives","changelog","data","generated/pycantonese.CHATReader","generated/pycantonese.CHATReader.search","generated/pycantonese.characters_to_jyutping","generated/pycantonese.hkcancor","generated/pycantonese.jyutping_to_tipa","generated/pycantonese.jyutping_to_yale","generated/pycantonese.parse_jyutping","generated/pycantonese.pos_tag","generated/pycantonese.pos_tagging.hkcancor_to_ud","generated/pycantonese.read_chat","generated/pycantonese.segment","generated/pycantonese.stop_words","generated/pycantonese.word_segmentation.Segmenter","index","jyutping","pos_tagging","quickstart","reader","searches","stop_words","word_segmentation"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api.rst","archives.rst","changelog.rst","data.rst","generated/pycantonese.CHATReader.rst","generated/pycantonese.CHATReader.search.rst","generated/pycantonese.characters_to_jyutping.rst","generated/pycantonese.hkcancor.rst","generated/pycantonese.jyutping_to_tipa.rst","generated/pycantonese.jyutping_to_yale.rst","generated/pycantonese.parse_jyutping.rst","generated/pycantonese.pos_tag.rst","generated/pycantonese.pos_tagging.hkcancor_to_ud.rst","generated/pycantonese.read_chat.rst","generated/pycantonese.segment.rst","generated/pycantonese.stop_words.rst","generated/pycantonese.word_segmentation.Segmenter.rst","index.rst","jyutping.rst","pos_tagging.rst","quickstart.rst","reader.rst","searches.rst","stop_words.rst","word_segmentation.rst"],objects:{"pycantonese.CHATReader":{__init__:[4,1,1,""],ages:[0,1,1,""],append:[0,1,1,""],append_left:[0,1,1,""],characters:[0,1,1,""],clear:[0,1,1,""],dates_of_recording:[0,1,1,""],extend:[0,1,1,""],extend_left:[0,1,1,""],file_paths:[0,1,1,""],from_dir:[0,1,1,""],from_files:[0,1,1,""],from_strs:[0,1,1,""],from_zip:[0,1,1,""],headers:[0,1,1,""],ipsyn:[0,1,1,""],jyutping:[0,1,1,""],languages:[0,1,1,""],mlu:[0,1,1,""],mlum:[0,1,1,""],mluw:[0,1,1,""],n_files:[0,1,1,""],participants:[0,1,1,""],pop:[0,1,1,""],pop_left:[0,1,1,""],search:[5,1,1,""],sents:[0,1,1,""],tagged_sents:[0,1,1,""],tagged_words:[0,1,1,""],tokens:[0,1,1,""],ttr:[0,1,1,""],utterances:[0,1,1,""],word_frequencies:[0,1,1,""],word_ngrams:[0,1,1,""],words:[0,1,1,""]},"pycantonese.corpus":{Token:[0,0,1,""]},"pycantonese.jyutping":{Jyutping:[0,0,1,""]},"pycantonese.jyutping.Jyutping":{"final":[0,1,1,""],__str__:[0,1,1,""]},"pycantonese.pos_tagging":{hkcancor_to_ud:[12,2,1,""]},"pycantonese.word_segmentation":{Segmenter:[16,0,1,""]},"pycantonese.word_segmentation.Segmenter":{__init__:[16,1,1,""]},pycantonese:{CHATReader:[4,0,1,""],characters_to_jyutping:[6,2,1,""],hkcancor:[7,2,1,""],jyutping_to_tipa:[8,2,1,""],jyutping_to_yale:[9,2,1,""],parse_jyutping:[10,2,1,""],pos_tag:[11,2,1,""],read_chat:[13,2,1,""],segment:[14,2,1,""],stop_words:[15,2,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","function","Python function"]},objtypes:{"0":"py:class","1":"py:method","2":"py:function"},terms:{"000":19,"001":21,"001_v2":21,"100":[11,12,19,23],"104":[2,15,23],"105":[15,23],"107":23,"1177307":3,"127":[],"12715":22,"13251":21,"134":21,"140":21,"150":19,"153654":3,"160":3,"161":3,"167":21,"16730":3,"1681":22,"178270":3,"186":21,"190":3,"1949480":3,"195":22,"197":18,"1997":21,"1st":22,"2015":[1,17],"2016":1,"202":21,"2020":[17,18,24],"2021":[1,3],"209":21,"21167":22,"219":21,"22328":22,"223415":3,"2259":21,"2570":21,"2734":21,"2741":21,"2755":21,"29012":[],"2911":21,"29726":[20,22],"29954":22,"30th":21,"3rd":1,"4110":21,"501":3,"5019":21,"520":21,"527":21,"533877":3,"70438":3,"705":22,"9282":21,"\u4e00\u5572":[15,23],"\u4e00\u5b9a":[15,23],"\u4e03":22,"\u4e0d\u5982":[15,23],"\u4e0d\u904e":[15,22,23],"\u4e5d\u9f8d":23,"\u4f4f":22,"\u4f60":[21,22],"\u4f62":21,"\u4fc2":[20,21,22],"\u505c\u7528\u8a5e":17,"\u505c\u7528\u8bcd":17,"\u516b\u6708":22,"\u5187\u5f97":[20,22],"\u5206\u8a5e":17,"\u5206\u8bcd":17,"\u53bb":[20,21,22],"\u53ef\u4ee5":22,"\u5416":21,"\u5462":21,"\u54aa":22,"\u5514":[14,21,24],"\u5514\u4fc2":22,"\u5514\u8a72":18,"\u554a":[21,22],"\u5572":21,"\u5582":21,"\u5587":[21,22],"\u558e":21,"\u55ce":21,"\u55f0":[11,19],"\u55f0\u500b":22,"\u55f0\u908a":22,"\u55f1":22,"\u5649":21,"\u565a\u65e5":[11,19],"\u56d6":[21,22],"\u597d":[18,20,22],"\u5b78":[14,20,24],"\u5bb9":[14,24],"\u5bb9\u5514\u5bb9\u6613":[14,24],"\u5bb9\u6613":[14,24],"\u5c0d":[11,19],"\u5c31":21,"\u5e7e":22,"\u5e7f\u4e1c\u8bdd":17,"\u5ee3\u6771":24,"\u5ee3\u6771\u8a71":[6,8,9,10,14,17,18,20,21,24],"\u5ee3\u6771\u8a71\u597d\u96e3\u5b78":20,"\u5ee3\u6771\u8a71\u5bb9\u5514\u5bb9\u6613\u5b78":[14,24],"\u6211":[11,19,21],"\u6211\u565a\u65e5\u8cb7\u55f0\u5c0d\u978b":19,"\u62b5":22,"\u65b0\u754c":23,"\u65c5":21,"\u65c5\u884c":[20,21,22],"\u65e5":22,"\u6709\u5187":[20,22],"\u6709\u5f97":[20,22],"\u6a5f":22,"\u6a5f\u7968":22,"\u6c23\u5019":[9,18],"\u6de1\u5b63":22,"\u73a9":22,"\u76f4\u7a0b":22,"\u771f\u4fc2":22,"\u7793\u89ba":22,"\u789f":22,"\u789f\u5f62":22,"\u7ca4\u62fc":17,"\u7ca4\u8bed":17,"\u7cb5":17,"\u7cb5\u62fc":17,"\u7cb5\u8a9e":17,"\u7da0":22,"\u8072\u6bcd":22,"\u807d":22,"\u81ea\u7136\u8a9e\u8a00\u8655\u7406":17,"\u81ea\u7136\u8bed\u8a00\u5904\u7406":17,"\u86cb":18,"\u86cb\u7cd5":18,"\u884c":21,"\u8981":[20,22],"\u8a5e\u6027\u6a19\u6ce8":17,"\u8a71":[22,24],"\u8a92":21,"\u8a9e\u8a00\u5b78":17,"\u8b1b":[6,18,20,22],"\u8b8a\u97f3":18,"\u8bcd\u6027\u6807\u6ce8":17,"\u8bed\u8a00\u5b66":17,"\u8cb7":[11,19],"\u8cca":22,"\u8ddf":22,"\u8fea\u58eb\u5c3c":22,"\u904e":22,"\u9072":21,"\u90fd":22,"\u96c0":22,"\u96e3":20,"\u978b":[11,19],"\u97fb\u6bcd":[18,22],"\u98db\u6a5f":22,"\u9999\u6e2f":[15,23],"\u9999\u6e2f\u4eba":[6,18,20],"\u9999\u6e2f\u4eba\u8b1b\u5ee3\u6771\u8a71":[6,18,20],"\u9999\u6e2f\u5cf6":23,"\ud842\udfa9\ud843\ude4c":2,"\ud842\udfa9\ud843\ude4c":[],"\ud843\udd15":2,"\ud843\udd15":[],"\ud843\ude9d":2,"\ud843\ude9d":[],"\ud843\udea2":[2,21],"\ud843\udea2":[],"\ud843\uded7":2,"\ud843\uded7":[],"\ud844\udc14":2,"\ud844\udc14":[],"\ud844\udc5c":2,"\ud844\udc5c":[],"\ud844\udcc9":[21,22],"\ud844\udcc9":[],"\ud844\udcd3":2,"\ud844\udcd3":[],"\ud854\udd2b":2,"\ud854\udd2b":[],"\ud854\ude99":2,"\ud854\ude99":[],"\ud85d\udd74":2,"\ud85d\udd74":[],"case":[0,4,18,22,23],"char":6,"class":[0,2,4,14,16,18,21,24],"computational linguist":17,"d\u016bng":[9,18],"default":[0,2,5,9,11,13,14,19,22,23,24],"final":[0,2,5,18,22],"float":0,"function":[2,3,6,8,9,12,18,19,20,21,22,23,24],"g\u014di":18,"gw\u00f3ng":[9,18],"gw\u00f3ngd\u016bngw\u00e1":[9,18],"import":[3,14,18,19,20,21,22,23,24],"int":[0,5,16,22],"long":17,"natural language process":17,"new":[0,2,6,8,9,11,12,14,15,16,23],"null":2,"part-of-speech tag":17,"return":[0,2,5,6,7,8,9,10,11,12,13,14,15,18,21,22,23,24],"stop word":17,"super":[8,18],"switch":[2,9],"true":[0,2,5,9,15,21,22,23],"while":[18,21,22],"word segment":17,Added:14,Eve:[0,13],For:[0,3,5,12,13,21,22],Its:19,Not:0,One:[0,5],POS:11,Such:0,The:[0,1,2,3,5,6,11,13,14,17,18,19,21,22,23,24],There:11,Used:2,With:22,__init__:[4,16],__str__:0,__version__:[],aa3:[21,22],abil:18,abl:[3,18],about:[19,23],abov:[2,22],accept:19,access:[2,17,18,21,22],accommod:21,acquisit:[0,3,4],adam:[0,13],add:[15,18,23],addit:21,adjust:22,adopt:[2,3],adult:21,adv:[11,19],after:[0,2,5,20,21],age:[0,1,17,21],ages:[0,21],albino:17,algorithm:24,all:[0,2,3,5,20,21,22,23],all_verb:[20,22],allow:[2,11,14,16,22,24],allow_remot:0,alon:22,alphabet:[3,17],alreadi:[0,19,22],also:[18,21,22,23],alwai:22,ambigu:[9,18],american:[0,13],among:18,analysi:22,ani:[0,6,12,18,22],annot:[2,3,12,19,20,22],anonym:21,anoth:[0,18],anyth:3,api:17,append:[0,2],append_left:[0,2],appli:[0,2,11,13],applic:[0,4,12],approach:[14,19],appropri:[0,21],apr:21,april:[1,21],arbitrari:0,archiv:17,argument:[2,11,14,18,19,21,23,24],aris:18,around:[21,22],artist:17,as_list:[2,9,18],ask:22,assist:17,associ:[3,21],attempt:0,attribut:[0,18,21],audio:21,augment:21,author:17,automat:[18,22],avail:[2,3,17,21],averag:[11,19],baat3jyut6:22,back:2,ban:24,base:[6,18,22],basic:[1,2,19],bat1gwo3:22,bear:22,becaus:[22,24],becom:0,been:[0,2,3,17,19,21],befor:[0,2,5],begin:[0,13,20,21,22],behavior:[14,18],being:9,below:[2,21],benefit:12,better:2,between:[9,18,21],beyond:3,big:[1,17],bile:[3,13],bilingu:[1,3,17],bool:[0,5,9],both:[0,2,3,21],bought:[11,19],boundari:[9,24],brown:[0,13],bug:[2,17],bui:17,build:2,built:[2,21,22],bump:2,by_fil:[0,5,21],by_token:[0,2,5,22],by_utter:[0,2,5,21,22],caak2:22,cake:18,call:[18,21,22],can:[0,3,12,13,18,21,22,23,24],cannot:0,cantones:[0,1,2,3,4,5,6,7,8,9,10,11,13,14,15,18,19,20,21,22,23,24],cap:2,capabl:[21,22],capit:0,cat1:22,cathug:17,centr:[1,17],certain:24,cha:[0,3,13,21],chaak:17,chang:[6,9,14,18,22],changelog:17,chao:18,charact:[0,2,5,6,8,9,10,11,14,17,20,24],character_s:[0,2,4],characters2jyutp:[2,6],characters_to_jyutp:[2,18,20],charl:17,chat:[0,2,4,13,21],chatread:[2,3,7,13,21],chcc:3,check:[3,17],chen:[1,17],chi:[0,21],child:[0,13,21],childhood:[1,17],children:[0,13],chim:17,chines:[0,1,3,5,17,18],ci4:21,circleci:2,classmethod:[0,3],clear:0,climat:[9,18],cls:[14,24],coda:[0,2,5,10,18,20,22],codas_ptk:22,code:[2,3,10,17,21,22],coffe:17,collaps:0,collect:[0,21],colloc:22,com:17,combin:[0,2,19,22],come:[0,2,3,17,19],common:[18,22],commonli:18,compl:[11,12],complet:22,compon:18,comput:1,concurr:24,confus:18,conson:[9,18],constrain:24,constraint:22,consult:3,contact:[],contain:[0,3,5,13,14,18,22,24],contextu:18,contrast:21,contribut:17,control:[3,21,24],conveni:[3,21],convent:[0,2,21,24],convers:[2,3,6,17,20,22],convert:[2,3,6,8,9,11,18],corpora:3,corpu:[2,4,6,7,13,14,17,18,19,20,24],correct:[2,18],correspond:[0,21],count:21,counter:[0,21],counterpart:[2,3],cover:2,creat:[7,21],criteria:[0,5],criterion:[2,3],cross:[12,19],current:[0,3,17,18,19,21,24],custom:[2,14,21,22],customiz:[2,16],daam6gwai3:22,daan2:18,daan6gou1:18,dai2:22,dai:0,data:[1,2,5,6,11,12,13,14,17,18,19,21,22,23,24],dataset:[0,2,3,13,17,21],date:[0,21],dates_of_record:0,datetim:[0,21],deal:21,dedic:21,defin:[2,21],demograph:21,depend:[2,11,12,18,19,23,24],deprec:[0,5,6,8,9],describ:[0,11,12,19,22],design:[3,17,22],detail:[2,17,21],detect:18,determin:[0,23],develop:1,di1:21,dict:[0,12,21],dictioari:12,dictionari:[12,18],differ:[21,22],difficult:20,dik6si6nei4:22,dip2:22,dip2jing4:22,direct:[0,2],directli:18,directori:[0,3,13],disabl:[0,2,5],disallow:[2,14,16,24],disambigu:[9,18],discours:22,dist:18,distinct:0,distribut:22,doc:2,docstr:2,document:[2,3,20,21],doe:[0,13,14],domain:2,domin:0,done:0,dou1:22,download:[0,3,20],drive:3,drop:[0,2],due:[18,19],duplic:0,dut2:22,each:[0,2,6,11,22],easi:[14,18,24],edu:[11,12],educ:21,egg:18,either:[0,13,23],element:[0,2,8,9,10,21],email:17,empti:4,encod:[0,2,13],end:[18,24],eng:[0,21],english:[0,2,13,21,23,24],enough:18,entir:[12,24],entri:2,equival:[0,2,5,6,8,9,18,22],error:[2,18,20],especi:19,etc:[17,21,22],european:0,even:0,exampl:[0,5,6,8,9,10,11,12,13,14,15,18,20,21,22,23],exclud:[0,2,5,13,21],exist:[0,24],expect:19,explicitli:24,expos:[2,18,19],express:[0,13,20,22],extend:[0,2],extend_left:[0,2],extens:0,facebook:17,fact:19,fals:[0,2,5,9,15,18,22],fan3gaau3:22,fat:21,father:21,favor:2,featur:17,feedback:17,fei1gei1:22,femal:21,file:[0,2,3,4,5,13,18,21],file_path:0,filenam:21,filter:[0,13,21,23],find:[20,22],fine:3,first:[0,21,22],five:[14,24],flavor:21,flexibl:22,folder:2,follow:[0,2,3,13,22,24],forc:[0,2],form:[0,2,5,24],format:[2,17,21],found:[17,21,22,24],frequenc:0,from:[0,2,3,4,9,11,12,13,14,17,18,19,20,21,22,23,24],from_dir:[0,3],from_fil:[0,3],from_str:[0,3],from_zip:[0,3],full:22,further:[18,19,21],futur:18,gaa3:22,gan1:22,gei1:22,gei1piu3:22,gei2:22,gender:21,gener:[0,2,3,5,22],get:[18,19,20,21],github:[2,17],give:22,given:[0,2,3,5,12,21,22],go2bin1:22,go2go3:22,gong2:[6,18,20,22],good:18,gra:[0,20,21,22],grab:22,grain:3,grammat:0,granular:12,great:20,group:21,guthri:3,gwo3:22,gwong2dung1waa2:[6,8,9,10,18,20],hai6:[20,22],han:17,handl:[0,3,13,17,18,21],handout:1,has:[2,3,9,17,18,19,21,23,24],hauh:[9,18],have:[0,2,3,17,18,19,20,21,22],header:0,hei3hau6:[9,18],hei6au6:[9,18],hei:[9,18],heihauh:[9,18],helper:19,heoi3:[20,21,22],here:[0,3,13,21,22,24],heritag:3,high:[18,22],hill:17,him:[1,17],hkcancor:[2,3,6,11,12,14,17,18,19,20,21,22,24],hkcancor_to_ud:[2,11,19],hku:3,ho2ji5:22,hoeng1gong2jan4:[6,18,20],hong:[1,2,3,7,17,21,23],hongkong:[6,18,20],hood:3,hou2:[18,22],hou7:18,how:[21,22],howev:24,hss:[11,12],html:[0,5,11,12],http:[0,2,3,5,11,12,13,17],hyperlink:21,ident:21,identifi:[0,21],ids:0,ignor:[0,5,22],illeg:[8,9,10],illustr:21,implement:[0,2,17,21,24],improv:2,includ:[0,2,5,14,17,18,21,22,23,24],inconveni:18,incorpor:[3,17],independ:[18,21],index:[11,12],indic:0,individu:[0,2,5,6,17,18],inform:[0,21,22],ingest:18,inherit:[0,4,21],initi:[0,2,4,5,16,22],innov:1,inoper:2,input:[2,11,12,14,19],instagram:17,instal:20,instanc:[0,21,22,24],instanti:0,instead:[0,2,5,18],integ:24,intellig:18,intention:0,interest:[0,18,22],intern:[2,11],internet:0,interpret:20,introduc:[1,17,21],intuit:[],inv:0,invalid:18,investig:19,involv:[18,22],ipsyn:[0,2],island:23,issu:[2,17],issubset:23,iter:[0,5,15,16,22,23,24],its:[2,3,9,11,17,18,21,22],jackson:[1,17],jacksonlle:17,jat6:22,jau5dak1:[20,22],jau5mou5:[20,22],jenni:17,jiu3:[20,22],jp_str:[8,9,10],json:24,just:[0,5,21,22],jyut6:18,jyutp:[2,3,4,5,6,8,9,10,17,20],jyutping2tipa:[2,8],jyutping2yal:[2,9],jyutping_s:[0,2,4],jyutping_to_tipa:[2,18],jyutping_to_x:[2,9],jyutping_to_yal:[2,18],jyutpingi1:[],keep:22,keep_cas:0,kept:0,keyword:[2,14,18,19,24],kind:22,known:[],kong:[1,2,3,7,17,21,23],kowloon:23,koy55:18,kwarg:24,laa1:22,laa4:22,lai:17,lam:17,languag:[3,4,12,17,18,19,21,23,24],last:[0,17,18],latex:[8,18],lau:17,learn:[14,20,24],leav:0,lee:[1,3,17],leewongleung:3,left:[0,5,20,22],len:[0,3,15,20,22,23],length:[0,2,14,16,24],leo:3,leoi5hang4:[20,21,22],less:12,let:21,letter:[0,18,21,22],leung:3,level:[0,18,21,22],lib:18,librari:[3,14,17,18,24],licens:[2,3,18,19,24],like:[0,3,17,19,22,23,24],likewis:22,limit:19,line:[2,9,18,21],linguist:[1,3,12,19,22],link:2,list:[0,2,5,6,8,9,10,11,14,18,21,22],litong:[1,17],lo1:22,load:24,local:[0,3,13,18],longer:14,longest:[14,24],look:[0,20],loop:22,low:[9,18],lowercas:0,luk2:22,m4goi1:18,m4hai6:22,machin:[3,22],mai6:22,mai:[0,4,18,22,23],maintain:17,maintein:[],major:[19,21],make:[0,18],mani:[0,3,21,23,24],manual:3,map:[2,12,19],march:[1,3],mark:6,marker:[9,18,21],match:[0,5,13,14,22,24],materi:21,matter:6,matthew:3,max_word_length:[16,24],maxim:14,maximum:[2,14,16,24],mean:[0,3,17,19,22],meaning:21,media:17,meet:3,memori:0,menu:20,metadata:21,method:[0,2,4,16,17,22],might:[18,19],ming:17,minor:2,minut:1,mit:17,mix:22,mlu:0,mlum:0,mluw:0,model:[2,3,6,11,14,16,18,19,24],modifi:17,modul:18,month:0,moon:[],mor:[0,20,21,22],more:[0,2,3,5,17,19,20,21,22,24],morphem:0,morpholog:[0,21],most:[0,17,18],most_common:21,mot:[0,21],mother:21,mou5dak1:[20,22],multipl:[8,9,10,18],n_file:[0,3],name:[2,17,21],nasal:[2,18],natur:[3,12,17,19,23,24],naturalist:3,necessari:[18,23,24],necessit:18,need:[3,18,22],nei5:[21,22],neighbor:22,neither:22,ngram:0,nltk:2,none:[0,5,6,12,13,14,15,16,18,20,21,22],nongra:[],nor:22,note:[1,2,17],notimplementederror:2,noun:[11,19],now:[2,6,20],ntu:[11,12],nuclei:18,nucleu:[0,2,5,10,18,20,22],number:[0,3,18,20,21,22],number_of_charact:2,number_of_word:2,numer:18,numpydoc:2,object:[0,2,7,14,16,18,21,22,24],obtain:3,occurr:19,odd:19,off:19,offer:[17,18],often:[23,24],ohio:1,oken:[],on25:[8,18],one:[0,2,3,5,8,9,10,11,13],ones:23,onli:[0,2,5,13,19,22],onset:[0,2,5,9,10,18,20,22],onward:2,open:2,option:[0,5,9,11,12,13,14,15,16,21,23],orb:2,order:[0,3,17,21],org:[0,2,3,5,11,12,13],organ:[0,5],origin:[2,3,11,12,17,19,20,21],orthograph:0,other:[0,2,3,9,18,22],otherwis:[0,5,18,22],out:[3,17],output:[0,5,6,9,18,21],over:[11,12,19],overal:2,own:[3,21],packag:[0,2,18,20,21],page:[17,21],paidocantones:3,paidologo:3,pair:[11,19],paramet:[0,2,5,6,8,9,10,11,12,13,14,15,16,22],parent:[0,4],pars:[0,2,3,10,13,17,20,21,22],parse_jyutp:[2,18,20],parser:2,part:[0,2,3,5,9,11,12,17,18,20,21],particip:[0,2,5,21],particl:22,particular:22,particularli:[3,18,21],pass:[0,13,24],path:[0,2,3,5,13],perceptron:[11,19],perform:22,perhap:19,permiss:[17,19],phonbank:3,phonolog:[0,5,18],phrase:[2,11,19],pick:2,piggyback:2,pin:2,pinjam:[18,22],pip:17,placehold:21,pleas:[0,3,5,17,18,19,20,21],plu:[0,2,5,21,22],point:[2,3,13,21],pop:0,pop_left:0,pos:[0,5,11,12,20,21,22],pos_tag:[2,19],possibl:[2,3,22],potenti:[18,19,24],power:22,pprint:[],preced:22,predict:11,preprocess:0,preserv:21,preval:[0,5],previou:[9,18],previous:[2,6],primer:3,print:[20,21,22],proce:20,process:[12,17,18,19,21,23,24],product:[],profession:17,pron:[11,19],pronoun:23,pronunci:18,properti:0,prove:18,provid:[0,3,12,13,14,18,19,22,23,24],ptk:22,ptk_tone2:22,publicli:3,punct:[11,19],punctuat:6,purpos:[2,12,22],pycantones:[0,1,2,3,18,19,20,21,22,23,24],pylangacq:[0,2,3,21],pypi:2,python3:18,python:[1,2,3,18,20,22],qualiti:[2,19],queri:[3,17],quickstart:17,quot:[9,18],rachel:17,rais:[0,2,5,8,9,10,11,18],random:0,rang:[0,2,5],rather:2,ratio:0,read:[0,2,3,13,18],read_chat:3,readabl:3,reader:[0,2,4,17,18],readi:[17,20],readm:[3,17],readthedoc:2,reason:3,recent:[3,17,18],record:[0,21],recurs:[0,13],refer:17,regex:[0,5,22],regular:[0,13,20,22],rel:19,relat:[0,3],releas:[2,17,18,24],relev:18,remot:0,remov:[0,15,23],rendit:21,repetit:2,replac:[2,6,8,9],report:17,repr:18,repres:[0,2,6,18,22],represent:[0,18,21],request:[],requir:2,research:[3,17,22],resourc:[17,18],restructur:2,result:[0,2,18,19,20,24],retrain:2,retriev:18,revis:2,rich:3,richielo:17,right:[0,3,5,22],rime:[2,6,14,17,18,24],rime_cantones:17,rise:[18,22],robin:17,role:21,roman:[2,3,5,6,8,9,10,17,22],rst:2,ryan:17,rylanchiu:17,sai:21,same:[3,21,22,24],sarah:[0,13],satisfi:0,scheme:18,search:[0,2,3,17,20],second:22,see:[0,3,5,11,17,18,20,21,22],seem:18,segment:[0,2,3,5,6,11,17,18,19,20,21],semant:22,sens:0,sent:[0,2,5],sent_rang:[0,2,5],sentenc:[0,2,11,14,19],separ:18,septemb:[1,17],servic:17,ses:21,session:21,set:[0,5,14,15,18,22,23],sever:[3,21],sex:21,shoe:[11,19],show:22,similarli:23,simpl:24,simpli:[0,21],sinc:[0,2,3,13,19,21,22],singl:[3,9,13,22],situat:24,size:19,slide:[1,17,22],small:19,snippet:2,snowman:17,social:17,some:[0,4],some_token:21,sophist:19,sort:0,sourc:[0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18],space:24,span:[0,5,22],speak:[6,18,20],speaker:21,specif:[2,19,21,22],specifi:[0,13,22,24],speech:[0,2,3,5,11,12,17,20,21],spot:21,stabl:17,standalon:18,standard:19,state:1,statist:19,statu:21,stdin:18,step:0,stephan:17,still:[18,19,24],stiller:17,stop:[2,15,17],stop_word:23,stop_words_1:[15,23],stop_words_2:[15,23],store:21,str:[0,5,6,8,9,10,11,12,13,14,15,16,18],straightforward:18,string:[0,2,5,6,9,11,13,14,21,22,23,24],strip:2,structur:[0,13,21],studi:[0,21,22],style:2,substanti:17,suffix:18,suit:2,suppli:2,support:[0,2,5,11],syllab:[2,18],syllabl:[9,18],syntax:[18,22],system:18,tag:[0,2,3,5,11,12,17,20,21],tagged_s:0,tagged_word:0,tagger:[2,11,19],tagset:[2,11,12,19,22],take:[2,3,18,19,22,23,24],talk:[1,17],talkbank:[0,13,21],tape:21,target:[0,21,22],task:[12,18,22,23,24],teach:18,team:17,technic:17,teng1:22,term:[0,5,17],territori:23,test:2,text:[0,2,5,13,21,22,24],than:[0,2,3,5,14,19,22,24],thank:[3,18],thei:[0,2,3],theme:2,thi:[0,2,3,6,8,9,11,12,13,14,16,17,18,20,21,22,24],think:[19,21,24],those:[0,3,18],though:24,three:21,through:[2,21,22],thrown:19,tier:21,time:[21,22],time_mark:21,tipa:[2,8],todai:18,togeth:0,token:[5,18,20,21,22],tone2:22,tone:[0,5,9,10,18,20,22],tool:[1,17,18,21],top:21,touch:19,traceback:18,track:22,tracker:17,tradit:[0,5],train:[11,14,17,19,24],transcrib:3,transcript:0,translat:21,transpar:2,travel:17,treat:[0,18,23],trigram:21,trivial:18,tsui:[1,17],tsz:[1,17],ttr:0,tun55:[8,18],tupl:[0,2,5,6,11,22],turn:[2,18],twitter:17,two:[18,21,22],txt:17,type:0,typeerror:11,typic:[0,21],ubiquit:21,unclear:9,under:[0,3,13],underli:[2,18,21,24],unicod:[0,13],union:0,univers:[1,2,11,12,17,19],universaldepend:[11,12],unrecogn:[8,9,10,12],unseen:[6,18],unseg:[2,11,14,19,24],unspecifi:[0,5],unzip:[0,3,13],updat:2,upgrad:17,upstream:[0,2],url:[0,3,13],usag:21,use:[0,2,3,4,5,18,20,21,22,23],used:[0,3,5,11,14,18,21,22,24],useful:[3,17,22],usefulness:18,user:[2,18],uses:[3,11,12,19,21],using:[17,18,22],usr:18,usual:21,utf8:21,utf:[0,2,13],utter:[0,3,5,21],utterance_rang:[0,2,5,22],uuid:0,valid:18,valu:[0,2,6,9,13,18,22],valueerror:[0,5,8,9,10,11,18],varieti:22,variou:[2,18,21,22],verb:[11,12,19,20,22],version:[0,2,6,8,9,11,12,14,15,16,17,18,24],via:3,visual:21,wa25:[8,18],waa6:22,waan2:22,wai3:21,wai:[3,17,21,24],well:[2,3,21],whatev:22,when:[0,2,3,18,22],whenev:21,where:[0,2,6,11,21,22,24],whether:[18,24],which:[0,4,9,11,12,18,19,21,22,23],whitespac:2,who:17,whose:[0,20],wide:[3,22],window:[2,22],within:[0,5,22],without:3,wonder:17,wong:3,word:[0,2,3,5,6,11,14,15,16,17,18,19,20],word_freq:21,word_frequ:[0,21],word_ngram:[0,21],word_rang:[0,5,22],word_segment:[14,24],wordlist:2,wordseg:2,work:[0,2,3,13,19,21,22,24],workshop:1,worth:19,would:[0,3,9,12,17,18,19,24],wouldn:19,wrap:0,write:22,written:24,x2y:2,x_to_i:2,xml:3,xxa:21,xxb:21,xxx:[0,13],yale:[2,9],year:[0,3],yesterdai:[11,19],yip:3,yipmatthew:[3,13],you:[0,3,13,17,18,19,20,21,22,23,24],your:[0,3,4,17,19,20,22,23],yue:21,yuen:17,yut:18,zan1hai6:22,zero:22,zik6cing4:22,zip:[0,3,13],zoek2:22,zyu6:22},titles:["API Reference","Archives","Changelog","Corpus Data","pycantonese.CHATReader","pycantonese.CHATReader.search","pycantonese.characters_to_jyutping","pycantonese.hkcancor","pycantonese.jyutping_to_tipa","pycantonese.jyutping_to_yale","pycantonese.parse_jyutping","pycantonese.pos_tag","pycantonese.pos_tagging.hkcancor_to_ud","pycantonese.read_chat","pycantonese.segment","pycantonese.stop_words","pycantonese.word_segmentation.Segmenter","PyCantonese: Cantonese Linguistics and NLP in Python","Jyutping Romanization","Part-of-Speech Tagging","Quickstart","Corpus Reader Methods","Corpus Search Queries","Stop Words","Word Segmentation"],titleterms:{"0dev":2,"2014":2,"2015":2,"2016":2,"2018":2,"2020":2,"2021":2,"break":2,Added:2,acknowledg:17,annot:21,api:[0,2],archiv:1,built:3,cantones:17,chang:2,changelog:2,charact:[18,21,22],characters_to_jyutp:6,chat:3,chatread:[0,4,5],child:3,chines:[21,22],cite:17,consult:17,content:17,convers:18,corpu:[0,3,21,22],criteria:22,custom:[3,24],data:[0,3],deprec:2,download:17,element:22,exampl:[],fix:2,format:[3,22],frequenc:21,header:21,hkcancor:7,hkcancor_to_ud:12,how:17,instal:17,jyutp:[0,18,21,22],jyutping_to_tipa:8,jyutping_to_yal:9,languag:0,licens:17,linguist:17,link:17,logo:17,method:21,multipl:22,natur:0,ngram:21,nlp:17,non:2,output:[1,22],pars:18,parse_jyutp:10,part:[19,22],pos_tag:[11,12],process:0,pycantones:[4,5,6,7,8,9,10,11,12,13,14,15,16,17],python:17,queri:22,quick:[],quickstart:20,rang:22,read_chat:13,reader:21,refer:0,remov:2,research:1,result:22,roman:[0,18,21],search:[5,22],secur:2,segment:[14,16,24],speech:[19,22],stop:23,stop_word:15,string:18,support:17,tabl:17,tag:[19,22],talkbank:3,tipa:18,token:0,transcript:21,tutori:1,unreleas:2,utter:22,word:[21,22,23,24],word_segment:16,yale:18}}) \ No newline at end of file diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 73eb612..f26a590 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -34,6 +34,8 @@ Fixed * Fixed the previously inoperational methods ``append``\ , ``append_left``\ , ``extend``\ , and ``extend_left`` of the class ``CHATReader`` through the upstream PyLangAcq package. * Retrained the part-of-speech tagger, after the minor character fix from v3.2.3. +* Raised ``NotImplementedError`` for the method ``ipsyn`` of ``CHATReader``\ , + since the upstream method works only for English. [3.2.3] - 2021-04-12 -------------------- diff --git a/pycantonese/corpus.py b/pycantonese/corpus.py index 28a9e3e..2a069e3 100644 --- a/pycantonese/corpus.py +++ b/pycantonese/corpus.py @@ -50,6 +50,13 @@ class CHATReader(Reader): which may or may not be applicable to your use case. """ + def ipsyn(self): + """(Not implemented - the upstream ``ipsyn`` method works for English only.)""" + raise NotImplementedError( + "The upstream `ipsyn` method works for English only. " + "There isn't yet a Cantonese version of IPSyn." + ) + @staticmethod def _preprocess_token(t) -> Token: # Examples from the CHILDES LeeWongLeung corpus, child mhz