From d6cec8e38eb307525c86b1ca24034edc3da068db Mon Sep 17 00:00:00 2001 From: jacksonllee Date: Thu, 17 Mar 2022 00:51:19 -0400 Subject: [PATCH] DOC refresh docs --- README.rst | 7 - docs/.buildinfo | 2 +- docs/_modules/index.html | 74 ++-- docs/_modules/pylangacq/chat.html | 74 ++-- docs/_modules/pylangacq/objects.html | 74 ++-- docs/_static/basic.css | 3 +- docs/_static/doctools.js | 5 +- docs/_static/documentation_options.js | 2 +- docs/_static/language_data.js | 2 +- docs/_static/pygments.css | 2 + docs/_static/scripts/furo.js | 2 +- docs/_static/scripts/furo.js.map | 2 +- docs/_static/searchtools.js | 2 +- docs/_static/styles/furo-extensions.css | 2 +- docs/_static/styles/furo-extensions.css.map | 2 +- docs/_static/styles/furo.css | 2 +- docs/_static/styles/furo.css.map | 2 +- docs/api.html | 176 +++++----- docs/changelog.html | 362 +++++++++++--------- docs/frequencies.html | 76 ++-- docs/genindex.html | 72 ++-- docs/headers.html | 86 +++-- docs/index.html | 138 ++++---- docs/measures.html | 76 ++-- docs/objects.inv | Bin 843 -> 843 bytes docs/quickstart.html | 88 +++-- docs/read.html | 94 ++--- docs/search.html | 72 ++-- docs/searchindex.js | 2 +- docs/source/build_docs.py | 4 +- docs/source/changelog.rst | 22 ++ docs/transcriptions.html | 92 ++--- docs/write.html | 76 ++-- 33 files changed, 974 insertions(+), 721 deletions(-) diff --git a/README.rst b/README.rst index 32ed637..c47bb9a 100644 --- a/README.rst +++ b/README.rst @@ -42,13 +42,6 @@ To download and install the most recent version:: Ready for more? Check out the `Quickstart `_ page. -Consulting ----------- - -If your team would like professional assistance in using PyLangAcq, -freelance consulting and training services are available for both academic and commercial groups. -Please email `Jackson L. Lee `_. - Support ------- diff --git a/docs/.buildinfo b/docs/.buildinfo index 3d21c7b..cfd84dc 100644 --- a/docs/.buildinfo +++ b/docs/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 55e57f073151487b4452d5da95cf5614 +config: b8a061c5250ba40d6bab5633c3c42eec tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/_modules/index.html b/docs/_modules/index.html index a76c91c..29290c9 100644 --- a/docs/_modules/index.html +++ b/docs/_modules/index.html @@ -4,11 +4,11 @@ - - Overview: module code - PyLangAcq 0.16.0 documentation + + Overview: module code - PyLangAcq 0.16.1 documentation - - + + @@ -20,16 +20,18 @@ --color-code-foreground: black; } - body[data-theme="dark"] { - --color-code-background: #202020; + @media not print { + body[data-theme="dark"] { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; - } - @media (prefers-color-scheme: dark) { - body:not([data-theme="light"]) { - --color-code-background: #202020; + } + @media (prefers-color-scheme: dark) { + body:not([data-theme="light"]) { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; + } } } @@ -41,17 +43,13 @@ Contents - - - - - + + Menu - @@ -60,14 +58,14 @@ Expand - Light mode - @@ -82,7 +80,7 @@ Dark mode - @@ -90,7 +88,7 @@ Auto light/dark mode - @@ -123,7 +121,7 @@ @@ -175,8 +174,13 @@
-
-
+ + + + + Back to top + +
- -
@@ -175,8 +174,13 @@
-
-
+ + + + + Back to top + +
@@ -175,8 +174,13 @@
-
-
+ + + + + Back to top + +
@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

API Reference

+

API Reference#

-

read_chat()

+

read_chat()#

-pylangacq.read_chat(path: str, match: str = None, exclude: str = None, encoding: str = 'utf-8', cls: type = <class 'pylangacq.chat.Reader'>) pylangacq.chat.Reader[source]
+pylangacq.read_chat(path: str, match: str = None, exclude: str = None, encoding: str = 'utf-8', cls: type = <class 'pylangacq.chat.Reader'>) pylangacq.chat.Reader[source]#

Create a reader of CHAT data.

If path is a remote ZIP file and you expect to call this function with the same path multiple times, consider downloading the data to the local @@ -250,10 +254,10 @@

-

Reader

+

Reader#

-class pylangacq.Reader[source]
+class pylangacq.Reader[source]#

A reader that handles CHAT data.

Methods

@@ -377,7 +381,7 @@

-ages(participant='CHI', months=False) Union[List[Tuple[int, int, int]], List[float]][source]
+ages(participant='CHI', months=False) Union[List[Tuple[int, int, int]], List[float]][source]#

Return the ages of the given participant in the data.

Parameters
@@ -401,7 +405,7 @@

-append(reader: pylangacq.chat.Reader) None[source]
+append(reader: pylangacq.chat.Reader) None[source]#

Append data from another reader.

New data is appended as-is with no filtering of any sort, even for files whose file paths duplicate those already in the current reader.

@@ -416,7 +420,7 @@

-append_left(reader: pylangacq.chat.Reader) None[source]
+append_left(reader: pylangacq.chat.Reader) None[source]#

Left-append data from another reader.

New data is appended as-is with no filtering of any sort, even for files whose file paths duplicate those already in the current reader.

@@ -431,12 +435,12 @@

-clear() None[source]
+clear() None[source]#

Remove all data from this reader.

-dates_of_recording(by_files=False) Union[Set[datetime.date], List[Set[datetime.date]]][source]
+dates_of_recording(by_files=False) Union[Set[datetime.date], List[Set[datetime.date]]][source]#

Return the dates of recording.

Parameters
@@ -460,7 +464,7 @@

-extend(readers: Iterable[pylangacq.chat.Reader]) None[source]
+extend(readers: Iterable[pylangacq.chat.Reader]) None[source]#

Extend data from other readers.

New data is appended as-is with no filtering of any sort, even for files whose file paths duplicate those already in the current reader.

@@ -475,7 +479,7 @@

-extend_left(readers: Iterable[pylangacq.chat.Reader]) None[source]
+extend_left(readers: Iterable[pylangacq.chat.Reader]) None[source]#

Left-extend data from other readers.

New data is appended as-is with no filtering of any sort, even for files whose file paths duplicate those already in the current reader.

@@ -490,7 +494,7 @@

-file_paths() List[str][source]
+file_paths() List[str][source]#

Return the file paths.

If the data comes from in-memory strings, then the “file paths” are arbitrary UUID random strings.

@@ -504,7 +508,7 @@

-filter(match: str = None, exclude: str = None) pylangacq.chat.Reader[source]
+filter(match: str = None, exclude: str = None) pylangacq.chat.Reader[source]#

Return a new reader filtered by file paths.

Parameters
@@ -539,7 +543,7 @@

-classmethod from_dir(path: str, match: str = None, exclude: str = None, extension: str = '.cha', encoding: str = 'utf-8', parallel: bool = True) pylangacq.chat.Reader[source]
+classmethod from_dir(path: str, match: str = None, exclude: str = None, extension: str = '.cha', encoding: str = 'utf-8', parallel: bool = True) pylangacq.chat.Reader[source]#

Instantiate a reader from a local directory with CHAT data files.

Parameters
@@ -584,7 +588,7 @@

-classmethod from_files(paths: List[str], match: str = None, exclude: str = None, encoding: str = 'utf-8', parallel: bool = True) pylangacq.chat.Reader[source]
+classmethod from_files(paths: List[str], match: str = None, exclude: str = None, encoding: str = 'utf-8', parallel: bool = True) pylangacq.chat.Reader[source]#

Instantiate a reader from local CHAT data files.

Parameters
@@ -626,7 +630,7 @@

-classmethod from_strs(strs: List[str], ids: List[str] = None, parallel: bool = True) pylangacq.chat.Reader[source]
+classmethod from_strs(strs: List[str], ids: List[str] = None, parallel: bool = True) pylangacq.chat.Reader[source]#

Instantiate a reader from in-memory CHAT data strings.

Parameters
@@ -656,7 +660,7 @@

-classmethod from_zip(path: str, match: str = None, exclude: str = None, extension: str = '.cha', encoding: str = 'utf-8', parallel: bool = True, use_cached: bool = True, session: requests.sessions.Session = None) pylangacq.chat.Reader[source]
+classmethod from_zip(path: str, match: str = None, exclude: str = None, extension: str = '.cha', encoding: str = 'utf-8', parallel: bool = True, use_cached: bool = True, session: requests.sessions.Session = None) pylangacq.chat.Reader[source]#

Instantiate a reader from a local or remote ZIP file.

If the input data is a remote ZIP file and you expect to call this method with the same path multiple times, consider downloading the data to the local @@ -725,7 +729,7 @@

-head(n: int = 5, participants=None, exclude=None)[source]
+head(n: int = 5, participants=None, exclude=None)[source]#

Return the first several utterances.

Parameters
@@ -757,7 +761,7 @@

-headers() List[Dict][source]
+headers() List[Dict][source]#

Return the headers.

Returns
@@ -769,7 +773,7 @@

-info(verbose=False) None[source]
+info(verbose=False) None[source]#

Print a summary of this Reader’s data.

Parameters
@@ -782,7 +786,7 @@

-ipsyn(participant='CHI') List[int][source]
+ipsyn(participant='CHI') List[int][source]#

Return the indexes of productive syntax (IPSyn).

Parameters
@@ -801,7 +805,7 @@

-languages(by_files=False) Union[Set[str], List[List[str]]][source]
+languages(by_files=False) Union[Set[str], List[List[str]]][source]#

Return the languages in the data.

Parameters
@@ -828,7 +832,7 @@

-mlu(participant='CHI') List[float][source]
+mlu(participant='CHI') List[float][source]#

Return the mean lengths of utterance (MLU).

This method is equivalent to mlum().

@@ -848,7 +852,7 @@

-mlum(participant='CHI') List[float][source]
+mlum(participant='CHI') List[float][source]#

Return the mean lengths of utterance by morphemes.

Parameters
@@ -867,7 +871,7 @@

-mluw(participant='CHI') List[float][source]
+mluw(participant='CHI') List[float][source]#

Return the mean lengths of utterance by words.

Parameters
@@ -886,12 +890,12 @@

-n_files() int[source]
+n_files() int[source]#

Return the number of files.

-participants(by_files=False) Union[Set[str], List[Set[str]]][source]
+participants(by_files=False) Union[Set[str], List[Set[str]]][source]#

Return the participants (e.g., CHI, MOT).

Parameters
@@ -914,7 +918,7 @@

-pop() pylangacq.chat.Reader[source]
+pop() pylangacq.chat.Reader[source]#

Drop the last data file from the reader and return it as a reader.

Returns
@@ -926,7 +930,7 @@

-pop_left() pylangacq.chat.Reader[source]
+pop_left() pylangacq.chat.Reader[source]#

Drop the first data file from the reader and return it as a reader.

Returns
@@ -938,7 +942,7 @@

-sents(participants=None, exclude=None, by_files=False) Union[List[List[str]], List[List[List[str]]]][source]
+sents(participants=None, exclude=None, by_files=False) Union[List[List[str]], List[List[List[str]]]][source]#

Return the sents.

Deprecated since version 0.13.0: Please use words() with by_utterances=True @@ -979,7 +983,7 @@

-tagged_sents(participants=None, exclude=None, by_files=False) Union[List[List[pylangacq.objects.Token]], List[List[List[pylangacq.objects.Token]]]][source]
+tagged_sents(participants=None, exclude=None, by_files=False) Union[List[List[pylangacq.objects.Token]], List[List[List[pylangacq.objects.Token]]]][source]#

Return the tagged sents.

Deprecated since version 0.13.0: Please use tokens() with by_utterances=True @@ -1021,7 +1025,7 @@

-tagged_words(participants=None, exclude=None, by_files=False) Union[List[pylangacq.objects.Token], List[List[pylangacq.objects.Token]]][source]
+tagged_words(participants=None, exclude=None, by_files=False) Union[List[pylangacq.objects.Token], List[List[pylangacq.objects.Token]]][source]#

Return the tagged words.

Deprecated since version 0.13.0: Please use tokens() with by_utterances=False @@ -1062,7 +1066,7 @@

-tail(n: int = 5, participants=None, exclude=None)[source]
+tail(n: int = 5, participants=None, exclude=None)[source]#

Return the last several utterances.

Parameters
@@ -1094,7 +1098,7 @@

-to_chat(path: str, is_dir: bool = False, filenames: Optional[Iterable[str]] = None, tabular: bool = True, encoding: str = 'utf-8') None[source]
+to_chat(path: str, is_dir: bool = False, filenames: Optional[Iterable[str]] = None, tabular: bool = True, encoding: str = 'utf-8') None[source]#

Export to CHAT data files.

Parameters
@@ -1138,7 +1142,7 @@

-to_strs(tabular: bool = True) Generator[str, None, None][source]
+to_strs(tabular: bool = True) Generator[str, None, None][source]#

Yield CHAT data strings.

Note

@@ -1171,7 +1175,7 @@

-tokens(participants=None, exclude=None, by_utterances=False, by_files=False) Union[List[pylangacq.objects.Token], List[List[pylangacq.objects.Token]], List[List[List[pylangacq.objects.Token]]]][source]
+tokens(participants=None, exclude=None, by_utterances=False, by_files=False) Union[List[pylangacq.objects.Token], List[List[pylangacq.objects.Token]], List[List[List[pylangacq.objects.Token]]]][source]#

Return the tokens.

Parameters
@@ -1216,7 +1220,7 @@

-ttr(keep_case=True, participant='CHI') List[float][source]
+ttr(keep_case=True, participant='CHI') List[float][source]#

Return the type-token ratios (TTR).

Parameters
@@ -1244,7 +1248,7 @@

-utterances(participants=None, exclude=None, by_files=False) Union[List[pylangacq.objects.Utterance], List[List[pylangacq.objects.Utterance]]][source]
+utterances(participants=None, exclude=None, by_files=False) Union[List[pylangacq.objects.Utterance], List[List[pylangacq.objects.Utterance]]][source]#

Return the utterances.

Parameters
@@ -1281,7 +1285,7 @@

-word_frequencies(keep_case=True, participants=None, exclude=None, by_files=False) Union[collections.Counter, List[collections.Counter]][source]
+word_frequencies(keep_case=True, participants=None, exclude=None, by_files=False) Union[collections.Counter, List[collections.Counter]][source]#

Return word frequencies.

Parameters
@@ -1328,7 +1332,7 @@

-word_ngrams(n, keep_case=True, participants=None, exclude=None, by_files=False) Union[collections.Counter, List[collections.Counter]][source]
+word_ngrams(n, keep_case=True, participants=None, exclude=None, by_files=False) Union[collections.Counter, List[collections.Counter]][source]#

Return word ngrams.

Parameters
@@ -1375,7 +1379,7 @@

-words(participants=None, exclude=None, by_utterances=False, by_files=False) Union[List[str], List[List[str]], List[List[List[str]]]][source]
+words(participants=None, exclude=None, by_utterances=False, by_files=False) Union[List[str], List[List[str]], List[List[List[str]]]][source]#

Return the words.

Parameters
@@ -1421,10 +1425,10 @@

-

Token

+

Token#

-class pylangacq.objects.Token(word: str, pos: Optional[str], mor: Optional[str], gra: Optional[pylangacq.objects.Gra])[source]
+class pylangacq.objects.Token(word: str, pos: Optional[str], mor: Optional[str], gra: Optional[pylangacq.objects.Gra])[source]#

Token with attributes as parsed from a CHAT utterance.

Attributes
@@ -1458,10 +1462,10 @@

-

Gra

+

Gra#

-class pylangacq.objects.Gra(dep: int, head: int, rel: str)[source]
+class pylangacq.objects.Gra(dep: int, head: int, rel: str)[source]#

Grammatical relation of a word in an utterance.

Attributes
@@ -1478,10 +1482,10 @@

-

Utterance

+

Utterance#

-class pylangacq.objects.Utterance(participant: str, tokens: List[pylangacq.objects.Token], time_marks: Optional[Tuple[int, int]], tiers: Dict[str, str])[source]
+class pylangacq.objects.Utterance(participant: str, tokens: List[pylangacq.objects.Token], time_marks: Optional[Tuple[int, int]], tiers: Dict[str, str])[source]#

Utterance in a CHAT transcript data.

Attributes
@@ -1507,10 +1511,10 @@

-

Helper Functions

+

Helper Functions#

-pylangacq.chat.cached_data_info() Set[str][source]
+pylangacq.chat.cached_data_info() Set[str][source]#

Return the information of the cached datasets.

Returns
@@ -1523,7 +1527,7 @@

Helper Functions
-pylangacq.chat.remove_cached_data(url: Optional[str] = None) None[source]
+pylangacq.chat.remove_cached_data(url: Optional[str] = None) None[source]#

Remove data cached on disk.

Parameters
@@ -1564,11 +1568,21 @@

Helper Functions - Copyright © 2015-2021, Jackson L. Lee | PyLangAcq 0.16.0 | Documentation last updated on December 28, 2021 | - Created using Sphinx and @pradyunsg's - Furo theme. +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ +
+
diff --git a/docs/changelog.html b/docs/changelog.html index 9dc2b8d..bff6d71 100644 --- a/docs/changelog.html +++ b/docs/changelog.html @@ -5,11 +5,11 @@ - - Changelog - PyLangAcq 0.16.0 documentation + + Changelog - PyLangAcq 0.16.1 documentation - - + + @@ -21,16 +21,18 @@ --color-code-foreground: black; } - body[data-theme="dark"] { - --color-code-background: #202020; + @media not print { + body[data-theme="dark"] { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; - } - @media (prefers-color-scheme: dark) { - body:not([data-theme="light"]) { - --color-code-background: #202020; + } + @media (prefers-color-scheme: dark) { + body:not([data-theme="light"]) { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; + } } } @@ -42,17 +44,13 @@ Contents - - - - - + + Menu - @@ -61,14 +59,14 @@ Expand - Light mode - @@ -83,7 +81,7 @@ Dark mode - @@ -91,7 +89,7 @@ Auto light/dark mode - @@ -124,7 +122,7 @@

@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Changelog

+

Changelog#

-

[Unreleased]

+

[Unreleased]#

-

Added

+

Added#

-

Changed

+

Changed#

-

Deprecated

+

Deprecated#

-

Removed

+

Removed#

-

Fixed

+

Fixed#

-

Security

+

Security#

-

[0.16.0] - 2021-12-27

+

[0.16.1] - 2022-03-17#

-

Added

+

Changed#

+
    +
  • Restructured the repository to use top-level src/ and tests/ directories.

  • +
+
+
+

Removed#

+
    +
  • Removed setup.py.

  • +
+
+
+

Fixed#

+
    +
  • Moved BaseTestCHATReader back under the pylangacq package namespace +so that download packages can import BaseTestCHATReader for testing.

  • +
+
+
+
+

[0.16.0] - 2021-12-27#

+
+

Added#

  • Reader objects can now be concatenated by the addition operator +.

  • Implemented the head, tail, and info methods at Reader.

  • @@ -227,8 +253,8 @@

    Added¶ build metadata and options.

-
-

Changed

+
+

Changed#

-
-

Removed

+
+

Removed#

  • Dropped support for Python 3.6.

-
-

Security

+
+

Security#

  • Turned on safety and bandit checks at CircleCI builds.

-
-

[0.15.0] - 2021-06-06

-
-

Added

+
+

[0.15.0] - 2021-06-06#

+
+

Added#

-
-

Changed

+
+

Changed#

  • CHAT parsing for the header information is now more robust for varying whitespace characters between the head and its associated value.

-
-

Removed

+
+

Removed#

  • Dropped kwarg allow_remote in Reader.from_zip. This kwarg wouldn’t make any sense anymore, or at least would be confusing with the introduction of use_cached.

-
-

[0.14.1] - 2021-05-16

-
-

Fixed

+
+

[0.14.1] - 2021-05-16#

+
+

Fixed#

  • The header/metadata has a more reasonable representation for emptiness when input data is empty.

-
-

[0.14.0] - 2021-05-12

-
-

Added

+
+

[0.14.0] - 2021-05-12#

+
+

Added#

  • Added the parallel optional argument to the Reader methods {from_zip, from_dir, from_files, from_strs} @@ -311,44 +337,44 @@

    Added

-
-

[0.13.3] - 2021-05-07

-
-

Fixed

+
+

[0.13.3] - 2021-05-07#

+
+

Fixed#

  • The methods append, append_left, extend, and extend_left now work with a subclass of Reader, not just Reader itself.

-
-

[0.13.2] - 2021-05-02

-
-

Fixed

+
+

[0.13.2] - 2021-05-02#

+
+

Fixed#

  • Fixed utterance cleaning so that it is now compatible with all CHILDES datasets.

-
-

[0.13.1] - 2021-03-23

-
-

Fixed

+
+

[0.13.1] - 2021-03-23#

+
+

Fixed#

  • Fixed a CHAT parsing issue when correction and repetition are combined.

-
-

[0.13.0] - 2021-03-15

+
+

[0.13.0] - 2021-03-15#

API-breaking changes: The Reader class has been completely rewritten. A couple methods have been removed, while others have been renamed. For methods that remain (renamed or not), their behavior for output data structure and arguments allowed has been changed. The details are in the following.

-
-

Added

+
+

Added#

-
-

Changed

+
+

Changed#

-
-

Deprecated

+
+

Deprecated#

@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Word Frequencies and Ngrams

+

Word Frequencies and Ngrams#

Because word frequencies and combinatorics are useful for many purposes, the following Reader methods are defined:

@@ -287,11 +291,21 @@ - - @@ -174,8 +173,13 @@
-
-
+ + + + + Back to top + +
- -
@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Accessing Headers

+

Accessing Headers#

CHAT data files record metadata such as the participants’ demographic information in a header section, which has lines starting with the @ character and is typically found at the top of a data file. @@ -241,7 +245,7 @@

-

Ages

+

Ages#

ages() returns the age information of the participant "CHI" (the target child) by default, since CHAT is by far most commonly used in language acquisition and development research, and that typically only the age of the @@ -299,7 +303,7 @@

Ages

-

Dates of Recording

+

Dates of Recording#

dates_of_recording() returns the dates of recording as a set of date objects for all the date files.

@@ -334,7 +338,7 @@

Dates of Recording -

Languages

+

Languages#

languages() returns the language information. Eve’s data is naturally in English. In datasets with more than one language (bi-/multilingualism), @@ -346,7 +350,7 @@

Languages -

Participants

+

Participants#

participants() returns the participants (e.g., "CHI", "MOT") in the reader. by_files=True is also available if you need the information by individual files.

@@ -359,7 +363,7 @@

Participants -

Other Header Information

+

Other Header Information#

For any header information not given by one of the implemented methods above, headers() gives a list of headers, where each header is a generic Python dictionary for each data file, @@ -444,11 +448,21 @@

Other Header Information - Copyright © 2015-2021, Jackson L. Lee | PyLangAcq 0.16.0 | Documentation last updated on December 28, 2021 | - Created using Sphinx and @pradyunsg's - Furo theme. +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ +
+
diff --git a/docs/index.html b/docs/index.html index 2b3b51e..380eb63 100644 --- a/docs/index.html +++ b/docs/index.html @@ -7,11 +7,11 @@ - - PyLangAcq 0.16.0 documentation + + PyLangAcq 0.16.1 documentation - - + + @@ -23,16 +23,18 @@ --color-code-foreground: black; } - body[data-theme="dark"] { - --color-code-background: #202020; + @media not print { + body[data-theme="dark"] { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; - } - @media (prefers-color-scheme: dark) { - body:not([data-theme="light"]) { - --color-code-background: #202020; + } + @media (prefers-color-scheme: dark) { + body:not([data-theme="light"]) { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; + } } } @@ -44,17 +46,13 @@ Contents - - - - - + + Menu - @@ -63,14 +61,14 @@ Expand - Light mode - @@ -85,7 +83,7 @@ Dark mode - @@ -93,7 +91,7 @@ Auto light/dark mode - @@ -126,7 +124,7 @@

@@ -178,8 +177,13 @@
-
-
+ + + + + Back to top + +
-

PyLangAcq: Language Acquisition Research in Python

+

PyLangAcq: Language Acquisition Research in Python#

PyLangAcq is a Python library for language acquisition research.

  • Easy access to CHILDES and other TalkBank datasets

  • @@ -205,7 +209,7 @@ more generally

-

Download and Install

+

Download and Install#

To download and install the most recent version:

$ pip install --upgrade pylangacq
 
@@ -213,19 +217,13 @@

Ready for more? Check out the Quickstart page.

-
-

Consulting

-

If your team would like professional assistance in using PyLangAcq, -freelance consulting and training services are available for both academic and commercial groups. -Please email Jackson L. Lee.

-
-

Support

+

Support#

If you have found PyLangAcq useful and would like to offer support, buying me a coffee would go a long way!

-

How to Cite

+

How to Cite#

PyLangAcq is authored and maintained by Jackson L. Lee.

Lee, Jackson L., Ross Burkholder, Gallagher B. Flinn, and Emily R. Coppess. 2016. Working with CHAT transcripts in Python. @@ -250,15 +248,15 @@

How to Cite -

License

+

License#

MIT License. Please see LICENSE.txt in the GitHub source code for details.

The test data files included come from CHILDES, and have a CC BY-NC-SA 3.0 license instead; please also see -pylangacq/tests/test_data/README.md in the GitHub source code for details.

+src/pylangacq/tests/README.md in the GitHub source code for details.

-

Table of Contents

+

Table of Contents#

@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Developmental Measures

+

Developmental Measures#

Several developmental measures are defined as Reader methods:

@@ -272,11 +276,21 @@ - - @@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Quickstart

+

Quickstart#

After you have downloaded and installed PyLangAcq (see Download and Install), import the package pylangacq in your Python interpreter:

>>> import pylangacq
@@ -200,7 +204,7 @@
 

No errors? Great! Now you’re ready to proceed.

-

Reading CHAT data

+

Reading CHAT data#

First off, we need some CHAT data to work with. The function read_chat() asks for a data source and returns a CHAT data reader. @@ -231,7 +235,7 @@

Reading CHAT dataReading CHAT Data.

-

Header Information

+

Header Information#

CHAT transcript files store metadata in the header with lines beginning with @. Among other things, eve has the age information of Eve when the recordings were made, which is from 1 year and 6 months old to 2 years and 3 months old:

@@ -261,7 +265,7 @@

Header InformationAccessing Headers.

-

Transcriptions and Annotations

+

Transcriptions and Annotations#

words() is the basic method to access the transcriptions:

>>> words = eve.words()  # list of strings, for all the words across all 20 files
 >>> len(words)  # total word count
@@ -351,7 +355,7 @@ 

Transcriptions and AnnotationsTranscriptions and Annotations.

-

Word Frequencies and Ngrams

+

Word Frequencies and Ngrams#

For word combinatorics, check out word_frequencies() and word_ngrams():

@@ -375,7 +379,7 @@

Word Frequencies and NgramsWord Frequencies and Ngrams.

-

Developmental Measures

+

Developmental Measures#

To get the mean length of utterance (MLU), use mlu():

>>> eve.mlu()
 [2.309041835357625,
@@ -409,7 +413,7 @@ 

Developmental MeasuresMore on Developmental Measures.

-

Questions?

+

Questions?#

If you have any questions, comments, bug reports etc, please open issues at the GitHub repository, or contact Jackson L. Lee.

@@ -442,11 +446,21 @@

Questions? - Copyright © 2015-2021, Jackson L. Lee | PyLangAcq 0.16.0 | Documentation last updated on December 28, 2021 | - Created using Sphinx and @pradyunsg's - Furo theme. +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ +
+
diff --git a/docs/read.html b/docs/read.html index 3fd195b..57125e9 100644 --- a/docs/read.html +++ b/docs/read.html @@ -5,11 +5,11 @@ - - Reading CHAT Data - PyLangAcq 0.16.0 documentation + + Reading CHAT Data - PyLangAcq 0.16.1 documentation - - + + @@ -21,16 +21,18 @@ --color-code-foreground: black; } - body[data-theme="dark"] { - --color-code-background: #202020; + @media not print { + body[data-theme="dark"] { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; - } - @media (prefers-color-scheme: dark) { - body:not([data-theme="light"]) { - --color-code-background: #202020; + } + @media (prefers-color-scheme: dark) { + body:not([data-theme="light"]) { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; + } } } @@ -42,17 +44,13 @@ Contents - - - - - + + Menu - @@ -61,14 +59,14 @@ Expand - Light mode - @@ -83,7 +81,7 @@ Dark mode - @@ -91,7 +89,7 @@ Auto light/dark mode - @@ -124,7 +122,7 @@

@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Reading CHAT Data

+

Reading CHAT Data#

PyLangAcq is designed to handle conversational data represented in the CHAT format as used in the CHILDES database for language acquisition research; CHAT is documented in its official manual. @@ -206,9 +210,9 @@ for parallelization from the official Python documentation.

-

Initializing a Reader

+

Initializing a Reader#

-

read_chat()

+

read_chat()#

Reading CHAT data in PyLangAcq is all about creating a Reader object. The most convenient way to do it is to use the read_chat() function, which asks for a data source and several optional arguments. @@ -329,7 +333,7 @@

-

From a ZIP File or Local Directory

+

From a ZIP File or Local Directory#

Perhaps you don’t want read_chat() to do the guess work of what type of your data source is, or you want more fine-grained control of what counts as CHAT data files or not in your data source. @@ -351,7 +355,7 @@

From a ZIP File or Local Directorypylangacq.chat.remove_cached_data() let you check and remove cached data, respectively.

-

From Specific CHAT Data Files

+

From Specific CHAT Data Files#

If you’d like to target specific files, the Reader classmethod from_files() takes a list of file paths:

>>> path1 = "path/to/one/data/file.cha"
@@ -361,7 +365,7 @@ 

From Specific CHAT Data Files -

From In-Memory Strings

+

From In-Memory Strings#

If your CHAT data comes from in-memory strings, the Reader classmethod from_strs() takes a list of strings, @@ -406,7 +410,7 @@

From In-Memory Strings

-

Parallel Processing

+

Parallel Processing#

Because a CHILDES / TalkBank dataset usually comes with multiple CHAT data files, it is reasonable to parallelize the process of reading and parsing CHAT data for speed-up. By default, such parallelization is applied. @@ -421,7 +425,7 @@

Parallel ProcessingFalse .

-

Creating an Empty Reader

+

Creating an Empty Reader#

Calling Reader itself with no arguments initializes an empty reader:

>>> reader = pylangacq.Reader()
 >>> reader.n_files()
@@ -434,7 +438,7 @@ 

Creating an Empty Reader -

Adding and Removing Data

+

Adding and Removing Data#

A Reader keeps the linear ordering of CHAT data by the ordering of the source data files. CHAT data typically comes as data files that each represent a recording session. @@ -545,7 +549,7 @@

Adding and Removing Data -

Custom Behavior

+

Custom Behavior#

If custom behavior in CHAT handling is needed, consider defining a child class that inherits from Reader. This approach is suitable if, for instance, new class methods are needed, @@ -587,11 +591,21 @@

Custom Behavior - Copyright © 2015-2021, Jackson L. Lee | PyLangAcq 0.16.0 | Documentation last updated on December 28, 2021 | - Created using Sphinx and @pradyunsg's - Furo theme. +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+
+ +
+
diff --git a/docs/search.html b/docs/search.html index 724c7d9..f2aa695 100644 --- a/docs/search.html +++ b/docs/search.html @@ -4,9 +4,9 @@ - Search - PyLangAcq 0.16.0 documentation - - + Search - PyLangAcq 0.16.1 documentation + + @@ -18,16 +18,18 @@ --color-code-foreground: black; } - body[data-theme="dark"] { - --color-code-background: #202020; + @media not print { + body[data-theme="dark"] { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; - } - @media (prefers-color-scheme: dark) { - body:not([data-theme="light"]) { - --color-code-background: #202020; + } + @media (prefers-color-scheme: dark) { + body:not([data-theme="light"]) { + --color-code-background: #202020; --color-code-foreground: #d0d0d0; + } } } @@ -39,17 +41,13 @@ Contents - - - - - + + Menu - @@ -58,14 +56,14 @@ Expand - Light mode - @@ -80,7 +78,7 @@ Dark mode - @@ -88,7 +86,7 @@ Auto light/dark mode - @@ -121,7 +119,7 @@

@@ -173,8 +172,13 @@
-
-
+ + + + + Back to top + +
- -
@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Transcriptions and Annotations

+

Transcriptions and Annotations#

Conversational data formatted in CHAT provides transcriptions with rich annotations for both linguistic and extra-linguistic information. PyLangAcq is designed to extract data and annotations in CHAT and expose them in Python data structures for flexible modeling work. This page explains how PyLangAcq represents CHAT data and annotations.

-

CHAT Format

+

CHAT Format#

To see how the CHAT format translates to PyLangAcq, let’s look at the very first two utterances in Eve’s data in the American English Brown @@ -287,7 +291,7 @@

CHAT Format -

Words

+

Words#

The Reader method words() returns the transcriptions as segmented words. Calling words() with no arguments gives a @@ -297,7 +301,7 @@

Words

-

Output by Utterances or Files

+

Output by Utterances or Files#

To preserve the utterance-level structure, pass in by_utterances=True so that an inner list is created around the words from each utterance:

>>> reader.words(by_utterances=True)
@@ -320,7 +324,7 @@ 

Output by Utterances or Files -

Filter by Participants

+

Filter by Participants#

Besides controlling the output for its structure, you can also specify which participants’ data to return. The optional arguments participants and exclude are available for this purpose. @@ -342,7 +346,7 @@

Filter by Participants

-

Tokens

+

Tokens#

Beyond the transcriptions from words(), tokens() gives you the word-based annotations from the CHAT data.

@@ -388,7 +392,7 @@

Tokensrel (relation).

-

Utterances

+

Utterances#

The utterances() method gives you information beyond tokens():

>>> reader.utterances(participants="CHI")
@@ -415,7 +419,7 @@ 

Utterances -

Time Marks

+

Time Marks#

Many of the more recent CHILDES datasets (especially starting from the 1990s) come with digitized audio and video data associated with the text-based CHAT data files. In these datasets, an utterance in the CHAT file has time marks to indicate @@ -425,7 +429,7 @@

Time Marks(0, 1073), for ·0_1073· found at the end of the CHAT transcription line.

-

Tiers

+

Tiers#

You may sometimes need the original, unparsed transcription lines, because they contain information, e.g., annotations for pauses, that is dropped when Token objects are constructed @@ -465,11 +469,21 @@

Tiers

- -
@@ -176,8 +175,13 @@
-
-
+ + + + + Back to top + +
-

Writing CHAT Data

+

Writing CHAT Data#

To output CHAT data, a Reader object can either export data to local files or write its data to strings.

@@ -262,11 +266,21 @@ - -