Update CLOs

acrostics · Aug 4, 2024 · c53c787 · c53c787
1 parent a88752c
commit c53c787
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 13 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -26,7 +26,7 @@ jobs:
 
       - name: Run Release
         run: |
-          java -jar AcrosticSleuth.jar -input data/demo -language EN -mode LINE -charset utf-8 -outputSize 4000 --concise > result.tsv
+          java -jar AcrosticSleuth.jar -input data/demo -language EN -charset utf-8 -outputSize 4000 --concise > result.tsv
 
       - name: Test Release on Mac or Ubuntu
         if: matrix.os != 'windows-latest'
@@ -46,7 +46,7 @@ jobs:
         run: |
           javac -encoding UTF-8 -d out -cp src src/acrosticsleuth/*.java 
           jar cfe AcrosticSleuth.jar acrosticsleuth.Main -C out . -C models .
-          java -jar AcrosticSleuth.jar -input data/demo -language EN -mode LINE -charset utf-8 -outputSize 4000 --concise > result.tsv
+          java -jar AcrosticSleuth.jar -input data/demo -language EN -charset utf-8 -outputSize 4000 --concise > result.tsv
 
       - name: Test Build on Mac or Ubuntu
         if: matrix.os != 'windows-latest'

diff --git a/AcrosticSleuth.jar b/AcrosticSleuth.jar
diff --git a/README.md b/README.md
@@ -27,13 +27,12 @@ This repository includes a demo dataset comprising a subset of pages with acrost
 You can test AcrosticSleuth on this small dataset using:
 
 ```bash
-java -jar AcrosticSleuth.jar -input data/demo -language EN -mode LINE -charset utf-8 -outputSize 4000 --concise
+java -jar AcrosticSleuth.jar -input data/demo -language EN -charset utf-8 -outputSize 4000 --concise
 ```
 
 Here is the meaning behind each of the options used:
 - `-input data/demo`: analyze all texts in the `data/demo` directory
 - `-language EN`: use the default English language model
-- `-mode LINE`: search for line acrostics (where an acrostic is formed by the initial letters of each line)
 - `-charset utf-8`: use the utf-8 encoding when opening the files
 - `-outputSize 4000`: return top 4000 instances (AcrosticSleuth clusters collocated instances, so the actual number of results it returns is much smaller -- 46)
 - `--concise`: only report key information (file,acrostic,rank).

diff --git a/src/acrosticsleuth/CLO.java b/src/acrosticsleuth/CLO.java
@@ -12,40 +12,43 @@
 import acrosticsleuth.CommandLine.Option;
 import acrosticsleuth.CommandLine.Command;
 
-@Command(name = "CLO", mixinStandardHelpOptions = true, description = "Scout a corpus for acrostics")
+@Command(name = "CLO", mixinStandardHelpOptions = true, description = "Search a corpus for acrostics")
 public class CLO implements Callable<Integer> {
 
-    @Option(names = {"-input", "--input"}, required = true, description = "File or directory with all texts of interest")
+    @Option(names = {"-input", "--input"}, required = true, description = "Directory with input texts")
     private String input; // list of all texts to be processed
     public File[] texts;
 
-    @Option(names = {"-outputSize", "--outputSize"}, description = "Max number of potential acrostics to print")
+    @Option(names = {"-outputSize", "--outputSize"}, description = "Max number of results to print")
     public int outputSize = OUTPUT_SIZE_DEFAULT;
 
     public LanguageModel languageModel;
     public CharModel charModel;
 
-    @Option(names = {"-maxLength", "--maxLength"}, description = "Maximum length of an acrostic (in characters).")
+    @Option(names = {"-maxLength", "--maxLength"}, description = "Maximum allowed length of an acrostic", hidden = true)
     public int maxLength = MAX_LENGTH_DEFAULT;
 
     @Option(names = {"-workers", "--workers"}, description = "Number of threads to use")
     public int workers = WORKERS_DEFAULT;
 
-    @Option(names = {"-mode", "--mode"}, description = "Look for acrostics formed by the first letter of each LINE or WORD")
+    @Option(names = {"-mode", "--mode"}, description = "Look for acrostics formed by the first letters of each LINE or WORD", hidden = true)
     public Mode mode = MODE_DEFAULT;
 
-    @Option(names = {"-charset", "--charset"}, description = "Name of the character encoding to use. Supports utf-8 and windows-1251")
+    @Option(names = {"-charset", "--charset"}, description = "utf-8 or windows-1251")
     public Charset charset = CHARSET_DEFAULT;
 
-    @Option(names = {"-language", "--language"}, required = true, description = "Determines the language of the text: EN, LA, RU, FR")
+    @Option(names = {"-language", "--language"}, required = true, description = "EN, LA, RU, or FR")
     public Language language;
 
-    @Option(names = {"-concise", "--concise"}, description = "Report minimal information -- only the acrostic, the page it comes from, and the rank")
+    @Option(names = {"-concise", "--concise"}, description = "Report results concisely")
     public boolean concise;
 
-    @Option(names = {"-wikisource", "--wikisource"}, description = "Use if the input is a parsed WikiSource database, where there might be several texts per file.")
+    @Option(names = {"-wikisource", "--wikisource"}, description = "Use if running on WikiSource")
     public boolean wikisource;
 
+    @Option(names = {"-help", "--help"}, description = "Show this help message", usageHelp = true)
+    public boolean help;
+
     public static final int MAX_LENGTH_DEFAULT = 50;
     public static final int OUTPUT_SIZE_DEFAULT = 10000;
     public static final int WORKERS_DEFAULT = 1;