Skip to content

Commit

Permalink
Update CLOs
Browse files Browse the repository at this point in the history
  • Loading branch information
Dargones committed Aug 4, 2024
1 parent a88752c commit c53c787
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 13 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

- name: Run Release
run: |
java -jar AcrosticSleuth.jar -input data/demo -language EN -mode LINE -charset utf-8 -outputSize 4000 --concise > result.tsv
java -jar AcrosticSleuth.jar -input data/demo -language EN -charset utf-8 -outputSize 4000 --concise > result.tsv
- name: Test Release on Mac or Ubuntu
if: matrix.os != 'windows-latest'
Expand All @@ -46,7 +46,7 @@ jobs:
run: |
javac -encoding UTF-8 -d out -cp src src/acrosticsleuth/*.java
jar cfe AcrosticSleuth.jar acrosticsleuth.Main -C out . -C models .
java -jar AcrosticSleuth.jar -input data/demo -language EN -mode LINE -charset utf-8 -outputSize 4000 --concise > result.tsv
java -jar AcrosticSleuth.jar -input data/demo -language EN -charset utf-8 -outputSize 4000 --concise > result.tsv
- name: Test Build on Mac or Ubuntu
if: matrix.os != 'windows-latest'
Expand Down
Binary file modified AcrosticSleuth.jar
Binary file not shown.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@ This repository includes a demo dataset comprising a subset of pages with acrost
You can test AcrosticSleuth on this small dataset using:

```bash
java -jar AcrosticSleuth.jar -input data/demo -language EN -mode LINE -charset utf-8 -outputSize 4000 --concise
java -jar AcrosticSleuth.jar -input data/demo -language EN -charset utf-8 -outputSize 4000 --concise
```

Here is the meaning behind each of the options used:
- `-input data/demo`: analyze all texts in the `data/demo` directory
- `-language EN`: use the default English language model
- `-mode LINE`: search for line acrostics (where an acrostic is formed by the initial letters of each line)
- `-charset utf-8`: use the utf-8 encoding when opening the files
- `-outputSize 4000`: return top 4000 instances (AcrosticSleuth clusters collocated instances, so the actual number of results it returns is much smaller -- 46)
- `--concise`: only report key information (file,acrostic,rank).
Expand Down
21 changes: 12 additions & 9 deletions src/acrosticsleuth/CLO.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,43 @@
import acrosticsleuth.CommandLine.Option;
import acrosticsleuth.CommandLine.Command;

@Command(name = "CLO", mixinStandardHelpOptions = true, description = "Scout a corpus for acrostics")
@Command(name = "CLO", mixinStandardHelpOptions = true, description = "Search a corpus for acrostics")
public class CLO implements Callable<Integer> {

@Option(names = {"-input", "--input"}, required = true, description = "File or directory with all texts of interest")
@Option(names = {"-input", "--input"}, required = true, description = "Directory with input texts")
private String input; // list of all texts to be processed
public File[] texts;

@Option(names = {"-outputSize", "--outputSize"}, description = "Max number of potential acrostics to print")
@Option(names = {"-outputSize", "--outputSize"}, description = "Max number of results to print")
public int outputSize = OUTPUT_SIZE_DEFAULT;

public LanguageModel languageModel;
public CharModel charModel;

@Option(names = {"-maxLength", "--maxLength"}, description = "Maximum length of an acrostic (in characters).")
@Option(names = {"-maxLength", "--maxLength"}, description = "Maximum allowed length of an acrostic", hidden = true)
public int maxLength = MAX_LENGTH_DEFAULT;

@Option(names = {"-workers", "--workers"}, description = "Number of threads to use")
public int workers = WORKERS_DEFAULT;

@Option(names = {"-mode", "--mode"}, description = "Look for acrostics formed by the first letter of each LINE or WORD")
@Option(names = {"-mode", "--mode"}, description = "Look for acrostics formed by the first letters of each LINE or WORD", hidden = true)
public Mode mode = MODE_DEFAULT;

@Option(names = {"-charset", "--charset"}, description = "Name of the character encoding to use. Supports utf-8 and windows-1251")
@Option(names = {"-charset", "--charset"}, description = "utf-8 or windows-1251")
public Charset charset = CHARSET_DEFAULT;

@Option(names = {"-language", "--language"}, required = true, description = "Determines the language of the text: EN, LA, RU, FR")
@Option(names = {"-language", "--language"}, required = true, description = "EN, LA, RU, or FR")
public Language language;

@Option(names = {"-concise", "--concise"}, description = "Report minimal information -- only the acrostic, the page it comes from, and the rank")
@Option(names = {"-concise", "--concise"}, description = "Report results concisely")
public boolean concise;

@Option(names = {"-wikisource", "--wikisource"}, description = "Use if the input is a parsed WikiSource database, where there might be several texts per file.")
@Option(names = {"-wikisource", "--wikisource"}, description = "Use if running on WikiSource")
public boolean wikisource;

@Option(names = {"-help", "--help"}, description = "Show this help message", usageHelp = true)
public boolean help;

public static final int MAX_LENGTH_DEFAULT = 50;
public static final int OUTPUT_SIZE_DEFAULT = 10000;
public static final int WORKERS_DEFAULT = 1;
Expand Down

0 comments on commit c53c787

Please sign in to comment.