Merge pull request #5 from HealthNLPorg/v0.1.0

V0.1.0
HealthNLPorg · Jan 31, 2024 · 8b05497 · 8b05497
2 parents 8935776 + fda17a2
commit 8b05497
Show file tree

Hide file tree

Showing 6 changed files with 611 additions and 606 deletions.
diff --git a/README.md b/README.md
@@ -177,21 +177,24 @@ load PbjStarter
 
 add PythonRunner Command="-m pip install resources/org/apache/ctakes/timelines/timelines_py" Wait=yes
 
-set TimelinesSecondStep=timelines.timelines_pipeline
+set TimelinesSecondStep=timelines.timelines_python_pipeline
 
 add PythonRunner Command="-m $TimelinesSecondStep -rq JavaToPy -o $OutputDirectory"
 
 set minimumSpan=2
 set exclusionTags=“”
 
 // Just the components we need from DefaultFastPipeline
+
+// Write nice big banners when ctakes starts and finishes.
 set WriteBanner=yes
 
 // Load a simple token processing pipeline from another pipeline file
 load DefaultTokenizerPipeline
 
 // Add non-core annotators
 add ContextDependentTokenizerAnnotator
+// Dictionary module requires tokens so needs to be loaded after the tokenization stack
 load DictionarySubPipe
 
 add BackwardsTimeAnnotator classifierJarPath=/org/apache/ctakes/temporal/models/timeannotator/model.jar
@@ -214,7 +217,7 @@ add PythonRunner Command="-m pip install resources/org/apache/ctakes/timelines/t
 ```
 This sets up the necessary environment variables and installs the relevant Python code as well as its dependencies to the Python environment.
 ```
-set TimelinesSecondStep=timelines.timelines_pipeline
+set TimelinesSecondStep=timelines.timelines_python_pipeline
 
 add PythonRunner Command="-m $TimelinesSecondStep -rq JavaToPy -o $OutputDirectory"
 ```
@@ -251,7 +254,7 @@ Sends the CASes which have been processed by the Java annotators to the Python a
 
 The core Python logic is in the file:
 ```
-timelines/instance-generator/src/user/resources/org/apache/ctakes/timelines/timelines_py/src/timelines/timelines_delegator.py
+timelines/instance-generator/src/user/resources/org/apache/ctakes/timelines/timelines_py/src/timelines/timelines_annotator.py
 ```
 Like the Java annotators the Python annotator implements a `process` method which is the core driver of the annotator for processing each note's contents.  The raw output for the whole cancer type cohort is collected and written to TSV on disk in the `collection_process_complete` method.
 

diff --git a/...nstance-generator/src/user/resources/org/apache/ctakes/timelines/pipeline/Timelines.piper b/...nstance-generator/src/user/resources/org/apache/ctakes/timelines/pipeline/Timelines.piper
@@ -7,7 +7,7 @@ load PbjStarter
 
 add PythonRunner Command="-m pip install resources/org/apache/ctakes/timelines/timelines_py" Wait=yes
 
-set TimelinesSecondStep=timelines.timelines_pipeline
+set TimelinesSecondStep=timelines.timelines_python_pipeline
 
 add PythonRunner Command="-m $TimelinesSecondStep -rq JavaToPy -o $OutputDirectory"
 
@@ -29,10 +29,6 @@ load DictionarySubPipe
 
 add BackwardsTimeAnnotator classifierJarPath=/org/apache/ctakes/temporal/models/timeannotator/model.jar
 add DCTAnnotator
-// loading this after the dictionaries so
-// we no longer waste time normalizing timexes
-// for files with no chemos
-// add TimeMentionNormalizer tuis=T061 timeout=25
 add TimeMentionNormalizer timeout=10
 
 add PbjJmsSender SendQueue=JavaToPy SendStop=yes