-
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* refactor(lib): Use TypeScript BREAKING CHANGE: remove default function kuromojin v1.1.0 export `tokenize` as default function. kuromojin v2.0.0 remove the default function. ```js import kuromojin from "kuromojin"; // kuromojin === tokenize ``` V2.0 should use `import {tokenize} from "kuromojin"` instead of it ```js import {tokenize} from "kuromojin"; ``` * style: apply prettier * chore: remove babel * feat(kuromojin): support process.env.KUROMOJIN_DIC_PATH * rebase * Update
- Loading branch information
Showing
13 changed files
with
1,903 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
// LICENSE : MIT | ||
"use strict"; | ||
export default class Deferred { | ||
export default class Deferred<T> { | ||
promise: Promise<T>; | ||
resolve!: (value?: T) => void; | ||
reject!: (reason?: any) => void; | ||
constructor() { | ||
this.promise = new Promise((resolve, reject) => { | ||
this.resolve = resolve; | ||
this.reject = reject; | ||
}); | ||
} | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import { getTokenizer, getTokenizerOption, KuromojiToken, tokenize, Tokenizer } from "./kuromojin"; | ||
export { getTokenizer, getTokenizerOption, KuromojiToken, tokenize, Tokenizer }; |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
// LICENSE : MIT | ||
"use strict"; | ||
import path from "path"; | ||
|
||
const kuromoji = require("kuromoji"); | ||
import Deferred from "./Deferred"; | ||
|
||
export type Tokenizer = { | ||
tokenize: (text: string) => KuromojiToken[]; | ||
tokenizeForSentence: (text: string) => KuromojiToken[]; | ||
}; | ||
export type KuromojiToken = { | ||
// 辞書内での単語ID | ||
word_id: number; | ||
// 単語タイプ(辞書に登録されている単語ならKNOWN; 未知語ならUNKNOWN) | ||
word_type: "KNOWN" | "UNKNOWN"; | ||
// 表層形 | ||
surface_form: string; | ||
// 品詞 | ||
pos: string; | ||
// 品詞細分類1 | ||
pos_detail_1: string; | ||
// 品詞細分類2 | ||
pos_detail_2: string; | ||
// 品詞細分類3 | ||
pos_detail_3: string; | ||
// 活用型 | ||
conjugated_type: string; | ||
// 活用形 | ||
conjugated_form: string; | ||
// 基本形 | ||
basic_form: string; | ||
// 読み | ||
reading: string; | ||
// 発音 | ||
pronunciation: string; | ||
// 単語の開始位置 | ||
word_position: number; | ||
}; | ||
type KuromojiWindow = Window & { | ||
kuromojin?: { | ||
dicPath?: string; | ||
}; | ||
}; | ||
const deferred = new Deferred<Tokenizer>(); | ||
const getNodeModuleDirPath = () => { | ||
// Node | ||
if (typeof process !== "undefined" | ||
&& typeof process.env === "object" | ||
&& process.env.KUROMOJIN_DIC_PATH) { | ||
return process.env.KUROMOJIN_DIC_PATH; | ||
} | ||
// Browser | ||
// if window.kuromojin.dicPath is defined, use it as default dict path. | ||
const maybeKuromojiWindow: KuromojiWindow | undefined = typeof window != "undefined" ? window : undefined; | ||
if ( | ||
typeof maybeKuromojiWindow !== "undefined" && | ||
typeof maybeKuromojiWindow.kuromojin === "object" && | ||
typeof maybeKuromojiWindow.kuromojin.dicPath === "string" | ||
) { | ||
return maybeKuromojiWindow.kuromojin.dicPath; | ||
} | ||
const kuromojiDir = path.dirname(require.resolve("kuromoji")); | ||
return path.join(kuromojiDir, "..", "dict"); | ||
}; | ||
|
||
// cache for tokenizer | ||
let _tokenizer: null | Tokenizer = null; | ||
// lock boolean | ||
let isLoading = false; | ||
|
||
export type getTokenizerOption = { | ||
dicPath: string; | ||
}; | ||
|
||
export function getTokenizer(options: getTokenizerOption = {dicPath: getNodeModuleDirPath()}): Promise<Tokenizer> { | ||
if (_tokenizer) { | ||
return Promise.resolve(_tokenizer); | ||
} | ||
if (isLoading) { | ||
return deferred.promise; | ||
} | ||
isLoading = true; | ||
// load dict | ||
kuromoji.builder(options).build(function (err: undefined | Error, tokenizer: Tokenizer) { | ||
if (err) { | ||
return deferred.reject(err); | ||
} | ||
_tokenizer = tokenizer; | ||
deferred.resolve(tokenizer); | ||
}); | ||
return deferred.promise; | ||
} | ||
|
||
export function tokenize(text: string, options?: getTokenizerOption) { | ||
return getTokenizer(options).then(tokenizer => { | ||
return tokenizer.tokenizeForSentence(text); | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,55 +1,55 @@ | ||
// LICENSE : MIT | ||
"use strict"; | ||
import assert from "power-assert"; | ||
import assert from "assert"; | ||
// it is compatible check for <= 1.1.0 | ||
import defaultFunction from "../src"; | ||
import {getTokenizer, tokenize} from "../src"; | ||
describe("kuromojin", function () { | ||
context("many access at a time", function () { | ||
it("should return a.promise", function () { | ||
var promises = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9].map(num => { | ||
import { getTokenizer, tokenize } from "../src"; | ||
|
||
describe("kuromojin", function() { | ||
context("many access at a time", function() { | ||
it("should return a.promise", function() { | ||
var promises = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9].map(_num => { | ||
return getTokenizer(); | ||
}); | ||
return Promise.all(promises).then(tokenizer => { | ||
tokenizer.reduce((prev, current) => { | ||
assert(prev === current); | ||
return current; | ||
}) | ||
}); | ||
}); | ||
}); | ||
}); | ||
context("tokenize", function () { | ||
it("is alias to default", function () { | ||
context("tokenize", function() { | ||
it("is alias to default", function() { | ||
var data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; | ||
var promises = data.map(num => { | ||
return defaultFunction(String(num)); | ||
return tokenize(String(num)); | ||
}); | ||
return Promise.all(promises).then(texts => { | ||
texts.forEach((results, index) => { | ||
let firstNode = results[0]; | ||
assert.equal(firstNode.surface_form, String(index)); | ||
assert.strictEqual(firstNode.surface_form, String(index)); | ||
}); | ||
}); | ||
}); | ||
it("should return a.promise that resolve analyzed text", function () { | ||
it("should return a.promise that resolve analyzed text", function() { | ||
var data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; | ||
var promises = data.map(num => { | ||
return tokenize(String(num)); | ||
}); | ||
return Promise.all(promises).then(texts => { | ||
texts.forEach((results, index) => { | ||
let firstNode = results[0]; | ||
assert.equal(firstNode.surface_form, String(index)); | ||
assert.strictEqual(firstNode.surface_form, String(index)); | ||
}); | ||
}); | ||
}); | ||
it("should tokenize sentence", function () { | ||
it("should tokenize sentence", function() { | ||
return tokenize("これは1文。これは2文。").then(tokens => { | ||
const firstToken = tokens[0]; | ||
assert.equal(firstToken.word_position, 1); | ||
assert.strictEqual(firstToken.word_position, 1); | ||
const lastToken = tokens[tokens.length - 1]; | ||
assert.equal(lastToken.word_position, 12); | ||
assert.strictEqual(lastToken.word_position, 12); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); |
Oops, something went wrong.