Skip to content

Commit

Permalink
refactor(lib): Use TypeScript (#4)
Browse files Browse the repository at this point in the history
* refactor(lib): Use TypeScript

BREAKING CHANGE: remove default function

kuromojin v1.1.0 export `tokenize` as default function.
kuromojin v2.0.0 remove the default function.

```js
import kuromojin from "kuromojin";
// kuromojin === tokenize
```

V2.0 should use `import {tokenize} from "kuromojin"` instead of it

```js
import {tokenize} from "kuromojin";
```

* style: apply prettier

* chore: remove babel

* feat(kuromojin): support process.env.KUROMOJIN_DIC_PATH

* rebase

* Update
  • Loading branch information
azu authored Jul 7, 2019
1 parent ab99c7f commit 5836f44
Show file tree
Hide file tree
Showing 13 changed files with 1,903 additions and 114 deletions.
12 changes: 0 additions & 12 deletions .babelrc

This file was deleted.

15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,28 @@ getTokenizer({dicPath: "https://example.com/kuromoji/dict"})

### Note: backward compatibility for <= 1.1.0

kuromojin v1.1.0 export `tokenize` as default function
kuromojin v1.1.0 export `tokenize` as default function.

kuromojin v2.0.0 remove the default function.

```js
import kuromojin from "kuromojin";
// kuromojin === tokenize
```

Recommended use `import {tokenize} from "kuromojin"` instead of it
Recommended: use `import {tokenize} from "kuromojin"` instead of it

```js
import {tokenize} from "kuromojin";
```

### Note: kuromoji version is pinned

kuromojin pin kuromoji's version.

It aim to dedupe kuromoji's dictionary.
The dictionary is large and avoid to duplicated dictionary.

## Tests

npm test
Expand All @@ -92,4 +101,4 @@ import {tokenize} from "kuromojin";

## License

MIT
MIT
63 changes: 45 additions & 18 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
{
"name": "kuromojin",
"version": "1.4.0",
"description": "Provide a high level wrapper for kuromoji.js",
"keywords": [
"kuromoji",
"kuromoji.js",
"promise"
],
"homepage": "https://github.com/azu/kuromojin",
"bugs": {
"url": "https://github.com/azu/kuromojin/issues"
},
"repository": {
"type": "git",
"url": "git+https://github.com/azu/kuromojin.git"
},
"author": "azu",
"email": "azuciao@gmail.com",
"homepage": "https://github.com/azu/kuromojin",
"license": "MIT",
"bugs": {
"url": "https://github.com/azu/kuromojin/issues"
Expand All @@ -18,29 +26,48 @@
"lib/",
"src/"
],
"main": "lib/index.js",
"types": "lib/index.d.ts",
"directories": {
"test": "test"
},
"scripts": {
"build": "babel src --out-dir lib --source-maps",
"watch": "babel src --out-dir lib --watch --source-maps",
"build": "cross-env NODE_ENV=production tsc -p .",
"prepublish": "npm run --if-present build",
"test": "mocha"
"test": "mocha \"test/**/*.{js,ts}\"",
"watch": "tsc -p . --watch",
"prettier": "prettier --write \"**/*.{js,jsx,ts,tsx,css}\""
},
"keywords": [
"kuromoji",
"kuromoji.js",
"promise"
],
"dependencies": {
"kuromoji": "0.1.1"
},
"devDependencies": {
"babel-cli": "^6.6.5",
"babel-plugin-espower": "^2.1.2",
"babel-preset-es2015": "^6.6.0",
"babel-register": "^6.7.2",
"mocha": "^3.0.2",
"power-assert": "^1.1.0"
"@types/mocha": "^5.2.7",
"@types/node": "^12.0.12",
"cross-env": "^5.2.0",
"husky": "^3.0.0",
"lint-staged": "^9.1.0",
"mocha": "^6.1.4",
"prettier": "^1.18.2",
"ts-node": "^8.3.0",
"ts-node-test-register": "^8.0.1",
"typescript": "^3.5.2"
},
"email": "azuciao@gmail.com",
"prettier": {
"singleQuote": false,
"printWidth": 120,
"tabWidth": 4
},
"husky": {
"hooks": {
"precommit": "lint-staged"
}
},
"lint-staged": {
"*.{js,jsx,ts,tsx,css}": [
"prettier --write",
"git add"
]
}
}
}
7 changes: 5 additions & 2 deletions src/Deferred.js → src/Deferred.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
// LICENSE : MIT
"use strict";
export default class Deferred {
export default class Deferred<T> {
promise: Promise<T>;
resolve!: (value?: T) => void;
reject!: (reason?: any) => void;
constructor() {
this.promise = new Promise((resolve, reject) => {
this.resolve = resolve;
this.reject = reject;
});
}
}
}
6 changes: 0 additions & 6 deletions src/index.js

This file was deleted.

2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import { getTokenizer, getTokenizerOption, KuromojiToken, tokenize, Tokenizer } from "./kuromojin";
export { getTokenizer, getTokenizerOption, KuromojiToken, tokenize, Tokenizer };
54 changes: 0 additions & 54 deletions src/kuromojin.js

This file was deleted.

99 changes: 99 additions & 0 deletions src/kuromojin.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// LICENSE : MIT
"use strict";
import path from "path";

const kuromoji = require("kuromoji");
import Deferred from "./Deferred";

export type Tokenizer = {
tokenize: (text: string) => KuromojiToken[];
tokenizeForSentence: (text: string) => KuromojiToken[];
};
export type KuromojiToken = {
// 辞書内での単語ID
word_id: number;
// 単語タイプ(辞書に登録されている単語ならKNOWN; 未知語ならUNKNOWN)
word_type: "KNOWN" | "UNKNOWN";
// 表層形
surface_form: string;
// 品詞
pos: string;
// 品詞細分類1
pos_detail_1: string;
// 品詞細分類2
pos_detail_2: string;
// 品詞細分類3
pos_detail_3: string;
// 活用型
conjugated_type: string;
// 活用形
conjugated_form: string;
// 基本形
basic_form: string;
// 読み
reading: string;
// 発音
pronunciation: string;
// 単語の開始位置
word_position: number;
};
type KuromojiWindow = Window & {
kuromojin?: {
dicPath?: string;
};
};
const deferred = new Deferred<Tokenizer>();
const getNodeModuleDirPath = () => {
// Node
if (typeof process !== "undefined"
&& typeof process.env === "object"
&& process.env.KUROMOJIN_DIC_PATH) {
return process.env.KUROMOJIN_DIC_PATH;
}
// Browser
// if window.kuromojin.dicPath is defined, use it as default dict path.
const maybeKuromojiWindow: KuromojiWindow | undefined = typeof window != "undefined" ? window : undefined;
if (
typeof maybeKuromojiWindow !== "undefined" &&
typeof maybeKuromojiWindow.kuromojin === "object" &&
typeof maybeKuromojiWindow.kuromojin.dicPath === "string"
) {
return maybeKuromojiWindow.kuromojin.dicPath;
}
const kuromojiDir = path.dirname(require.resolve("kuromoji"));
return path.join(kuromojiDir, "..", "dict");
};

// cache for tokenizer
let _tokenizer: null | Tokenizer = null;
// lock boolean
let isLoading = false;

export type getTokenizerOption = {
dicPath: string;
};

export function getTokenizer(options: getTokenizerOption = {dicPath: getNodeModuleDirPath()}): Promise<Tokenizer> {
if (_tokenizer) {
return Promise.resolve(_tokenizer);
}
if (isLoading) {
return deferred.promise;
}
isLoading = true;
// load dict
kuromoji.builder(options).build(function (err: undefined | Error, tokenizer: Tokenizer) {
if (err) {
return deferred.reject(err);
}
_tokenizer = tokenizer;
deferred.resolve(tokenizer);
});
return deferred.promise;
}

export function tokenize(text: string, options?: getTokenizerOption) {
return getTokenizer(options).then(tokenizer => {
return tokenizer.tokenizeForSentence(text);
});
}
36 changes: 18 additions & 18 deletions test/kuromojin-test.js → test/kuromojin-test.ts
Original file line number Diff line number Diff line change
@@ -1,55 +1,55 @@
// LICENSE : MIT
"use strict";
import assert from "power-assert";
import assert from "assert";
// it is compatible check for <= 1.1.0
import defaultFunction from "../src";
import {getTokenizer, tokenize} from "../src";
describe("kuromojin", function () {
context("many access at a time", function () {
it("should return a.promise", function () {
var promises = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9].map(num => {
import { getTokenizer, tokenize } from "../src";

describe("kuromojin", function() {
context("many access at a time", function() {
it("should return a.promise", function() {
var promises = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9].map(_num => {
return getTokenizer();
});
return Promise.all(promises).then(tokenizer => {
tokenizer.reduce((prev, current) => {
assert(prev === current);
return current;
})
});
});
});
});
context("tokenize", function () {
it("is alias to default", function () {
context("tokenize", function() {
it("is alias to default", function() {
var data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
var promises = data.map(num => {
return defaultFunction(String(num));
return tokenize(String(num));
});
return Promise.all(promises).then(texts => {
texts.forEach((results, index) => {
let firstNode = results[0];
assert.equal(firstNode.surface_form, String(index));
assert.strictEqual(firstNode.surface_form, String(index));
});
});
});
it("should return a.promise that resolve analyzed text", function () {
it("should return a.promise that resolve analyzed text", function() {
var data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
var promises = data.map(num => {
return tokenize(String(num));
});
return Promise.all(promises).then(texts => {
texts.forEach((results, index) => {
let firstNode = results[0];
assert.equal(firstNode.surface_form, String(index));
assert.strictEqual(firstNode.surface_form, String(index));
});
});
});
it("should tokenize sentence", function () {
it("should tokenize sentence", function() {
return tokenize("これは1文。これは2文。").then(tokens => {
const firstToken = tokens[0];
assert.equal(firstToken.word_position, 1);
assert.strictEqual(firstToken.word_position, 1);
const lastToken = tokens[tokens.length - 1];
assert.equal(lastToken.word_position, 12);
assert.strictEqual(lastToken.word_position, 12);
});
});
});
});
});
Loading

0 comments on commit 5836f44

Please sign in to comment.