Skip to content

Commit

Permalink
v1.2 released: use models' absolute paths; avoid using System.getProp…
Browse files Browse the repository at this point in the history
…erty("user.dir")
  • Loading branch information
datquocnguyen committed Feb 11, 2023
1 parent 2879cd1 commit 62bbc58
Show file tree
Hide file tree
Showing 11 changed files with 25 additions and 16 deletions.
12 changes: 6 additions & 6 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ If you are looking for light-weight versions, VnCoreNLP's word segmentation and
## Installation <a name="install"></a>

- `Java 1.8+` (Prerequisite)
- File `VnCoreNLP-1.1.1.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
- File `VnCoreNLP-1.2.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
- `Python 3.6+` if using [a Python wrapper of VnCoreNLP](https://github.com/thelinhbkhn2014/VnCoreNLP_Wrapper). To install this wrapper, users have to run the following command:

`$ pip3 install py_vncorenlp`
Expand All @@ -38,7 +38,7 @@ import py_vncorenlp
# and save them in some local working folder
py_vncorenlp.download_model(save_dir='/absolute/path/to/vncorenlp')

# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.1.1.jar` and `models`
# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.2.jar` and `models`
model = py_vncorenlp.VnCoreNLP(save_dir='/absolute/path/to/vncorenlp')
# Equivalent to: model = py_vncorenlp.VnCoreNLP(annotators=["wseg", "pos", "ner", "parse"], save_dir='/absolute/path/to/vncorenlp')

Expand Down Expand Up @@ -80,13 +80,13 @@ print(output)
You can run VnCoreNLP to annotate an input raw text corpus (e.g. a collection of news content) by using following commands:

// To perform word segmentation, POS tagging, NER and then dependency parsing
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt
// To perform word segmentation, POS tagging and then NER
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
// To perform word segmentation and then POS tagging
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos
// To perform word segmentation
$ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg
$ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg


### Using VnCoreNLP from the API
Expand Down
Binary file added VnCoreNLP-1.2.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>VnCoreNLP</groupId>
<artifactId>VnCoreNLP</artifactId>
<version>1.1.1</version>
<version>1.2</version>
<build>
<plugins>
<plugin>
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/vn/corenlp/ner/NerRecognizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import vn.corenlp.wordsegmenter.Vocabulary;
import vn.pipeline.LexicalInitializer;
import vn.pipeline.Word;
import vn.pipeline.Utils;

import java.io.File;
import java.io.IOException;
Expand All @@ -34,7 +35,7 @@ public NerRecognizer() throws IOException{
nlpDecoder = new NLPDecoder();
List<NLPComponent<NLPNode>> components = new ArrayList();

String modelPath = System.getProperty("user.dir") + "/models/ner/vi-ner.xz";
String modelPath = Utils.jarDir + "/models/ner/vi-ner.xz";
if (!new File(modelPath).exists()) throw new IOException("NerRecognizer: " + modelPath + " is not found!");
GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
if(lexica != null) {
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/vn/corenlp/parser/DependencyParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.log4j.Logger;
import vn.pipeline.LexicalInitializer;
import vn.pipeline.Word;
import vn.pipeline.Utils;

import java.io.File;
import java.io.IOException;
Expand All @@ -32,7 +33,7 @@ public DependencyParser() throws IOException {
nlpDecoder = new NLPDecoder();
List<NLPComponent<NLPNode>> components = new ArrayList();

String modelPath = System.getProperty("user.dir") + "/models/dep/vi-dep.xz";
String modelPath = Utils.jarDir + "/models/dep/vi-dep.xz";
if (!new File(modelPath).exists()) throw new IOException("DependencyParser: " + modelPath + " is not found!");
GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
if(lexica != null) {
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/vn/corenlp/postagger/PosTagger.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
import java.util.LinkedList;
import java.util.List;

import vn.pipeline.Utils;

public class PosTagger {
private static PosTagger posTagger = null;
private MorphTagger tagger;
public final static Logger LOGGER = Logger.getLogger(PosTagger.class);
public PosTagger() throws IOException {
LOGGER.info("Loading POS Tagging model");
String modelPath = System.getProperty("user.dir") + "/models/postagger/vi-tagger";
String modelPath = Utils.jarDir + "/models/postagger/vi-tagger";
if (!new File(modelPath).exists()) throw new IOException("PosTagger: " + modelPath + " is not found!");
tagger = FileUtils.loadFromFile(modelPath);

Expand Down
4 changes: 3 additions & 1 deletion src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
import java.util.HashSet;
import java.util.Set;

import vn.pipeline.Utils;

@SuppressWarnings("unchecked")
public class Vocabulary {
public static Set<String> VN_DICT;
static {
VN_DICT = new HashSet<String>();
try {
String vocabPath = System.getProperty("user.dir") + "/models/wordsegmenter/vi-vocab";
String vocabPath = Utils.jarDir + "/models/wordsegmenter/vi-vocab";
if (!new File(vocabPath).exists())
throw new IOException("Vocabulary: " + vocabPath + " is not found!");
//Vocabulary.class.getClassLoader().getResource("wordsegmenter/vi-vocab").getPath()
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class WordSegmenter {
public WordSegmenter()
throws IOException {
LOGGER.info("Loading Word Segmentation model");
String modelPath = System.getProperty("user.dir") + "/models/wordsegmenter/wordsegmenter.rdr";
String modelPath = vn.pipeline.Utils.jarDir + "/models/wordsegmenter/wordsegmenter.rdr";
if (!new File(modelPath).exists())
throw new IOException("WordSegmenter: " + modelPath + " is not found!");

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/vn/pipeline/LexicalInitializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ public LexicalInitializer(boolean initLexica) throws IOException {
this.initLexica = initLexica;
this.lexicalMap = new HashMap<>();

String lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-500brownclusters.xz";
String lexicalPath = Utils.jarDir + "/models/ner/vi-500brownclusters.xz";
if (!new File(lexicalPath).exists())
throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
lexicalMap.put("word_clusters", lexicalPath);

lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-pretrainedembeddings.xz";
lexicalPath = Utils.jarDir + "/models/ner/vi-pretrainedembeddings.xz";
if (!new File(lexicalPath).exists())
throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
lexicalMap.put("word_embeddings", lexicalPath);
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/vn/pipeline/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfileReader;

import java.io.File;
import java.io.IOException;
import java.util.List;

public class Utils {
private static File jarFile = new File(VnCoreNLP.class.getProtectionDomain().getCodeSource().getLocation().getPath());
public static String jarDir = jarFile.getParentFile().getPath();

private static LanguageDetector languageDetector = null;
public static String detectLanguage(String text) throws IOException{
if(languageDetector == null) {
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/vn/pipeline/VnCoreNLP.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@

public class VnCoreNLP {

private PosTagger posTagger;

public final static Logger LOGGER = Logger.getLogger(Annotation.class);

private WordSegmenter wordSegmenter;
private PosTagger posTagger;
private NerRecognizer nerRecognizer;
private DependencyParser dependencyParser;

Expand Down

0 comments on commit 62bbc58

Please sign in to comment.