v1.2 released: use models' absolute paths; avoid using System.getProp…

…erty("user.dir")
vncorenlp · Feb 11, 2023 · 62bbc58 · 62bbc58
1 parent 2879cd1
commit 62bbc58
Show file tree

Hide file tree

Showing 11 changed files with 25 additions and 16 deletions.
diff --git a/Readme.md b/Readme.md
@@ -21,7 +21,7 @@ If you are looking for light-weight versions, VnCoreNLP's word segmentation and
 ## Installation <a name="install"></a>
 
 - `Java 1.8+` (Prerequisite)
-- File  `VnCoreNLP-1.1.1.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
+- File  `VnCoreNLP-1.2.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
 - `Python 3.6+` if using [a Python wrapper of VnCoreNLP](https://github.com/thelinhbkhn2014/VnCoreNLP_Wrapper). To install this wrapper, users have to run the following command:
 
     `$ pip3 install py_vncorenlp` 
@@ -38,7 +38,7 @@ import py_vncorenlp
 # and save them in some local working folder
 py_vncorenlp.download_model(save_dir='/absolute/path/to/vncorenlp')
 
-# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.1.1.jar` and `models` 
+# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.2.jar` and `models` 
 model = py_vncorenlp.VnCoreNLP(save_dir='/absolute/path/to/vncorenlp')
 # Equivalent to: model = py_vncorenlp.VnCoreNLP(annotators=["wseg", "pos", "ner", "parse"], save_dir='/absolute/path/to/vncorenlp')
 
@@ -80,13 +80,13 @@ print(output)
 You can run VnCoreNLP to annotate an input raw text corpus (e.g. a collection of news content) by using following commands:
 
     // To perform word segmentation, POS tagging, NER and then dependency parsing
-    $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt
     // To perform word segmentation, POS tagging and then NER
-    $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
     // To perform word segmentation and then POS tagging
-    $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos
     // To perform word segmentation
-    $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg    
+    $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg    
 
 
 ### Using VnCoreNLP from the API

diff --git a/VnCoreNLP-1.2.jar b/VnCoreNLP-1.2.jar
diff --git a/pom.xml b/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>VnCoreNLP</groupId>
     <artifactId>VnCoreNLP</artifactId>
-    <version>1.1.1</version>
+    <version>1.2</version>
     <build>
         <plugins>
             <plugin>

diff --git a/src/main/java/vn/corenlp/ner/NerRecognizer.java b/src/main/java/vn/corenlp/ner/NerRecognizer.java
@@ -11,6 +11,7 @@
 import vn.corenlp.wordsegmenter.Vocabulary;
 import vn.pipeline.LexicalInitializer;
 import vn.pipeline.Word;
+import vn.pipeline.Utils;
 
 import java.io.File;
 import java.io.IOException;
@@ -34,7 +35,7 @@ public NerRecognizer() throws IOException{
         nlpDecoder = new NLPDecoder();
         List<NLPComponent<NLPNode>> components = new ArrayList();
 
-        String modelPath = System.getProperty("user.dir") + "/models/ner/vi-ner.xz";
+        String modelPath = Utils.jarDir + "/models/ner/vi-ner.xz";
         if (!new File(modelPath).exists()) throw new IOException("NerRecognizer: " + modelPath + " is not found!");
         GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
         if(lexica != null) {

diff --git a/src/main/java/vn/corenlp/parser/DependencyParser.java b/src/main/java/vn/corenlp/parser/DependencyParser.java
@@ -10,6 +10,7 @@
 import org.apache.log4j.Logger;
 import vn.pipeline.LexicalInitializer;
 import vn.pipeline.Word;
+import vn.pipeline.Utils;
 
 import java.io.File;
 import java.io.IOException;
@@ -32,7 +33,7 @@ public DependencyParser() throws IOException {
         nlpDecoder = new NLPDecoder();
         List<NLPComponent<NLPNode>> components = new ArrayList();
 
-        String modelPath = System.getProperty("user.dir") + "/models/dep/vi-dep.xz";
+        String modelPath = Utils.jarDir + "/models/dep/vi-dep.xz";
         if (!new File(modelPath).exists()) throw new IOException("DependencyParser: " + modelPath + " is not found!");
         GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
         if(lexica != null) {

diff --git a/src/main/java/vn/corenlp/postagger/PosTagger.java b/src/main/java/vn/corenlp/postagger/PosTagger.java
@@ -13,13 +13,15 @@
 import java.util.LinkedList;
 import java.util.List;
 
+import vn.pipeline.Utils;
+
 public class PosTagger {
     private static PosTagger posTagger = null;
     private MorphTagger tagger;
     public final static Logger LOGGER = Logger.getLogger(PosTagger.class);
     public PosTagger() throws IOException {
         LOGGER.info("Loading POS Tagging model");
-        String modelPath = System.getProperty("user.dir") + "/models/postagger/vi-tagger";
+        String modelPath = Utils.jarDir + "/models/postagger/vi-tagger";
         if (!new File(modelPath).exists()) throw new IOException("PosTagger: " + modelPath + " is not found!");
         tagger = FileUtils.loadFromFile(modelPath);
 

diff --git a/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java b/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java
@@ -7,13 +7,15 @@
 import java.util.HashSet;
 import java.util.Set;
 
+import vn.pipeline.Utils;
+
 @SuppressWarnings("unchecked")
 public class Vocabulary {
     public static Set<String> VN_DICT;
     static {
         VN_DICT = new HashSet<String>();
         try {
-            String vocabPath = System.getProperty("user.dir") + "/models/wordsegmenter/vi-vocab";
+            String vocabPath = Utils.jarDir + "/models/wordsegmenter/vi-vocab";
             if (!new File(vocabPath).exists())
                 throw new IOException("Vocabulary: " + vocabPath + " is not found!");
             //Vocabulary.class.getClassLoader().getResource("wordsegmenter/vi-vocab").getPath()

diff --git a/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java b/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java
@@ -22,7 +22,7 @@ public class WordSegmenter {
     public WordSegmenter()
             throws IOException {
         LOGGER.info("Loading Word Segmentation model");
-        String modelPath = System.getProperty("user.dir") + "/models/wordsegmenter/wordsegmenter.rdr";
+        String modelPath = vn.pipeline.Utils.jarDir + "/models/wordsegmenter/wordsegmenter.rdr";
         if (!new File(modelPath).exists())
             throw new IOException("WordSegmenter: " + modelPath + " is not found!");
 

diff --git a/src/main/java/vn/pipeline/LexicalInitializer.java b/src/main/java/vn/pipeline/LexicalInitializer.java
@@ -27,12 +27,12 @@ public LexicalInitializer(boolean initLexica) throws IOException {
         this.initLexica = initLexica;
         this.lexicalMap = new HashMap<>();
 
-        String lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-500brownclusters.xz";
+        String lexicalPath = Utils.jarDir + "/models/ner/vi-500brownclusters.xz";
         if (!new File(lexicalPath).exists())
             throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
         lexicalMap.put("word_clusters", lexicalPath);
 
-        lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-pretrainedembeddings.xz";
+        lexicalPath = Utils.jarDir + "/models/ner/vi-pretrainedembeddings.xz";
         if (!new File(lexicalPath).exists())
             throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
         lexicalMap.put("word_embeddings", lexicalPath);

diff --git a/src/main/java/vn/pipeline/Utils.java b/src/main/java/vn/pipeline/Utils.java
@@ -6,10 +6,14 @@
 import com.optimaize.langdetect.ngram.NgramExtractors;
 import com.optimaize.langdetect.profiles.LanguageProfileReader;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.List;
 
 public class Utils {
+    private static File jarFile = new File(VnCoreNLP.class.getProtectionDomain().getCodeSource().getLocation().getPath());
+    public static String jarDir = jarFile.getParentFile().getPath();
+
     private static LanguageDetector languageDetector = null;
     public static String detectLanguage(String text) throws IOException{
         if(languageDetector == null) {

diff --git a/src/main/java/vn/pipeline/VnCoreNLP.java b/src/main/java/vn/pipeline/VnCoreNLP.java
@@ -14,11 +14,10 @@
 
 public class VnCoreNLP {
 
-    private PosTagger posTagger;
-
     public final static Logger LOGGER = Logger.getLogger(Annotation.class);
 
     private WordSegmenter wordSegmenter;
+    private PosTagger posTagger;
     private NerRecognizer nerRecognizer;
     private DependencyParser dependencyParser;