Skip to content

Commit

Permalink
Merge pull request #172 from bab2min/dev_18
Browse files Browse the repository at this point in the history
Prepare v0.18.0
  • Loading branch information
bab2min committed Jul 2, 2024
2 parents 30c9f7e + 0ab6c9d commit 4c87dcd
Show file tree
Hide file tree
Showing 22 changed files with 265 additions and 48 deletions.
42 changes: 21 additions & 21 deletions .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,16 @@ jobs:
strategy:
matrix:
include:
- name: "macOS 11 + Xcode 11.7"
os: macos-11
- name: "macOS 13 + Xcode 15.0"
os: macos-13
arch: x86_64
compiler: xcode
version: "11.7"
- name: "macOS 11 + Xcode 12.2"
os: macos-11
version: "15.0"
- name: "macOS 14 Arm64 + Xcode 15.0"
os: macos-14
arch: arm64
compiler: xcode
version: "12.4"
- name: "macOS 11 + gcc-10"
os: macos-11
compiler: gcc
version: "10"
- name: "macOS 11 + gcc-11"
os: macos-11
compiler: gcc
version: "11"
version: "15.0"

runs-on: ${{ matrix.os }}
name: ${{ matrix.name }}
Expand All @@ -42,8 +36,8 @@ jobs:
else
ls -ls /Applications/
sudo xcode-select -switch /Applications/Xcode_${{ matrix.version }}.app
echo "CC=clang" >> $GITHUB_ENV
echo "CXX=clang++" >> $GITHUB_ENV
echo "CC=$(brew --prefix llvm@15)/bin/clang" >> $GITHUB_ENV
echo "CXX=$(brew --prefix llvm@15)/bin/clang++" >> $GITHUB_ENV
fi
- name: Configure Build
run: mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DKIWI_JAVA_BINDING=1 ..
Expand Down Expand Up @@ -74,11 +68,17 @@ jobs:
./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=none ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=balanced ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=sse2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=sse4_1 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=avx2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=avx2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out --sbg kowiki1000.txt
KIWI_ARCH_TYPE=avx2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out --typos 6 kowiki1000.txt
if [ "${{ matrix.arch }}" = "x86_64" ]; then
KIWI_ARCH_TYPE=sse2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=sse4_1 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=avx2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=avx2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out --sbg kowiki1000.txt
KIWI_ARCH_TYPE=avx2 ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out --typos 6 kowiki1000.txt
else
KIWI_ARCH_TYPE=neon ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out kowiki1000.txt
KIWI_ARCH_TYPE=neon ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out --sbg kowiki1000.txt
KIWI_ARCH_TYPE=neon ./build/kiwi-cli-* -m ./ModelGenerator -e -o test.out --typos 6 kowiki1000.txt
fi
- name: Archive binaries
uses: actions/upload-artifact@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ jobs:
build-macos:
strategy:
matrix:
os: [macos-11]
os: [macos-13]
arch: [x86_64, arm64]

runs-on: ${{ matrix.os }}
Expand Down
5 changes: 3 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.12)

project(kiwi VERSION 0.17.1 DESCRIPTION "Kiwi, Korean Intelligent Word Identifier")
project(kiwi VERSION 0.18.0 DESCRIPTION "Kiwi, Korean Intelligent Word Identifier")

set ( CMAKE_CXX_STANDARD 14 )
set ( CMAKE_VERBOSE_MAKEFILE true )
Expand Down Expand Up @@ -48,14 +48,15 @@ set ( CORE_SRCS
src/Form.cpp
src/FeatureTestor.cpp
src/FileUtils.cpp
src/HSDataset.cpp
src/Dataset.cpp
src/Joiner.cpp
src/Kiwi.cpp
src/KiwiBuilder.cpp
src/KTrie.cpp
src/PatternMatcher.cpp
src/search.cpp
src/ScriptType.cpp
src/SubstringExtractor.cpp
src/SwTokenizer.cpp
src/TagUtils.cpp
src/TypoTransformer.cpp
Expand Down
3 changes: 2 additions & 1 deletion bindings/java/JniUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,8 @@ namespace jni
{
return CppType{ env, v };
}
if (!env->IsInstanceOf(v, JIteratorBase::jClass)) throw std::runtime_error{ StringConcat_v<svNotInstanceOf, typeStr, svNullTerm>.data()};
// The following line crashes clang compiler. I don't know why, but it's not necessary. So I commented it out.
if (!env->IsInstanceOf(v, JIteratorBase::jClass)) throw std::runtime_error{ ""/*StringConcat_v<svNotInstanceOf, typeStr, svNullTerm>.data()*/};
return CppType{ env, v };
}
};
Expand Down
2 changes: 1 addition & 1 deletion bindings/java/kr/pe/bab2min/Kiwi.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

public class Kiwi implements AutoCloseable {
private long _inst;
final private static String _version = "0.17.1";
final private static String _version = "0.18.0";

public static class Match {
final static public int none = 0,
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions include/kiwi/Form.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* @file Form.h
* @author bab2min (bab2min@gmail.com)
* @brief 형태 및 형태소에 관한 정보를 담는 구조체들이 선언된 헤더
* @version 0.17.0
* @date 2022-09-01
* @version 0.18.0
* @date 2024-07-01
*
*
*/
Expand Down
6 changes: 3 additions & 3 deletions include/kiwi/Kiwi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* @file Kiwi.h
* @author bab2min (bab2min@gmail.com)
* @brief Kiwi C++ API를 담고 있는 헤더 파일
* @version 0.17.0
* @date 2022-09-01
* @version 0.18.0
* @date 2024-07-01
*
*
*/
Expand Down Expand Up @@ -650,7 +650,7 @@ namespace kiwi
* @param numThreads 모델 및 형태소 분석에 사용할 스레드 개수
* @param options 생성 옵션. `kiwi::BuildOption`을 참조
*/
KiwiBuilder(const std::string& modelPath, size_t numThreads = 0, BuildOption options = BuildOption::integrateAllomorph | BuildOption::loadDefaultDict, bool useSBG = false);
KiwiBuilder(const std::string& modelPath, size_t numThreads = 0, BuildOption options = BuildOption::default_, bool useSBG = false);

/**
* @brief 현재 KiwiBuilder 객체가 유효한 분석 모델을 로딩한 상태인지 알려준다.
Expand Down
4 changes: 2 additions & 2 deletions include/kiwi/Macro.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#define KIWI_STR(x) KIWI_STR_HELPER(x)

#define KIWI_VERSION_MAJOR 0
#define KIWI_VERSION_MINOR 17
#define KIWI_VERSION_PATCH 1
#define KIWI_VERSION_MINOR 18
#define KIWI_VERSION_PATCH 0

#define KIWI_VERSION_STRING KIWI_STR(KIWI_VERSION_MAJOR) "." KIWI_STR(KIWI_VERSION_MINOR) "." KIWI_STR(KIWI_VERSION_PATCH)
16 changes: 16 additions & 0 deletions include/kiwi/SubstringExtractor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include <vector>
#include <string>

namespace kiwi
{
std::vector<std::pair<std::u16string, size_t>> extractSubstrings(
const char16_t* first,
const char16_t* last,
size_t minCnt,
size_t minLength = 2,
size_t maxLength = 32,
bool longestOnly = true,
char16_t stopChr = 0);
}
4 changes: 2 additions & 2 deletions include/kiwi/SwTokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* @file SwTokenizer.h
* @author bab2min (bab2min@gmail.com)
* @brief Subword Tokenizer
* @version 0.16.1
* @date 2022-07-28
* @version 0.18.0
* @date 2024-07-01
*
*
*/
Expand Down
4 changes: 2 additions & 2 deletions include/kiwi/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* @file Types.h
* @author bab2min (bab2min@gmail.com)
* @brief Kiwi C++ API에 쓰이는 주요 타입들을 모아놓은 헤더 파일
* @version 0.17.0
* @date 2022-09-01
* @version 0.18.0
* @date 2024-07-01
*
*
*/
Expand Down
20 changes: 17 additions & 3 deletions include/kiwi/capi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* @file capi.h
* @author bab2min (bab2min@gmail.com)
* @brief Kiwi C API를 담고 있는 헤더 파일
* @version 0.17.0
* @date 2022-09-01
* @version 0.18.0
* @date 2024-07-01
*
*
*/
Expand Down Expand Up @@ -45,7 +45,11 @@ typedef struct {
uint32_t line_number; /**< 줄 번호*/
uint16_t length; /**< 길이(UTF16 문자 기준) */
uint8_t tag; /**< 품사 태그 */
uint8_t sense_id; /**< 의미 번호 */
union
{
uint8_t sense_id; /**< 의미 번호 */
uint8_t script; /**< 유니코드 영역에 기반한 문자 타입 */
};
float score; /**< 해당 형태소의 언어모델 점수 */
float typo_cost; /**< 오타가 교정된 경우 오타 비용. 그렇지 않은 경우 0 */
uint32_t typo_form_id; /**< 교정 전 오타의 형태에 대한 정보 (typoCost가 0인 경우 의미 없음) */
Expand Down Expand Up @@ -1008,6 +1012,16 @@ DECL_DLL int kiwi_pt_add_token_to_span_w(kiwi_pretokenized_h handle, int span_id
*/
DECL_DLL int kiwi_pt_close(kiwi_pretokenized_h handle);

/**
* @brief `kiwi_token_info_t`의 `script`가 가리키는 문자 영역의 유니코드 상 이름을 반환합니다.
*
* @param script `kiwi_token_info_t`의 `script` 필드 값
* @return 유니코드 영역의 이름을 반환합니다. 알 수 없을 경우 "Unknown"을 반환합니다.
*
* @note 이 함수가 반환하는 값은 string literal이므로 별도로 해제할 필요가 없습니다.
*/
DECL_DLL const char* kiwi_get_script_name(uint8_t script);

#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion src/HSDataset.cpp → src/Dataset.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <kiwi/HSDataset.h>
#include <kiwi/Dataset.h>
#include "RaggedVector.hpp"

using namespace kiwi;
Expand Down
2 changes: 1 addition & 1 deletion src/KiwiBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <kiwi/Kiwi.h>
#include <kiwi/Utils.h>
#include <kiwi/HSDataset.h>
#include <kiwi/Dataset.h>
#include "ArchAvailable.h"
#include "KTrie.h"
#include "StrUtils.h"
Expand Down
3 changes: 1 addition & 2 deletions src/PatternMatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ size_t PatternMatcherImpl::testAbbr(const char16_t* first, const char16_t* last)
if (b != last && *b == ' ')
{
if (l > (isUpperAlpha(*first) ? 5 : 3)) return 0; // reject too long patterns for abbreviation
++b;
return b - first;
}
else
{
Expand All @@ -287,7 +287,6 @@ size_t PatternMatcherImpl::testAbbr(const char16_t* first, const char16_t* last)

if (b != last && *b == '.') ++b;
else return b - first;
if (b != last && *b == ' ') ++b;
}
if (b[-1] == ' ') --b;
return b - first;
Expand Down
2 changes: 1 addition & 1 deletion src/ScriptType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ namespace kiwi
if (type == ScriptType::chess_symbols) return "Chess Symbols";
if (type == ScriptType::symbols_for_legacy_computing) return "Symbols for Legacy Computing";
if (type == ScriptType::tags) return "Tags";
return "unknown";
return "Unknown";
}

int isEmoji(char32_t c0, char32_t c1)
Expand Down
Loading

0 comments on commit 4c87dcd

Please sign in to comment.