-
Notifications
You must be signed in to change notification settings - Fork 2
Fix CI: build mecab-ko as CMake ExternalProject and fix tokenizer bug #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fcf1900
0601876
38cc5be
1fc560a
0ba204b
1d9dc8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,26 +43,75 @@ endif() | |
|
|
||
| find_package(Groonga REQUIRED) | ||
|
|
||
| find_program(MECAB_KO_CONFIG "mecab-config" REQUIRED) | ||
| execute_process( | ||
| COMMAND ${MECAB_KO_CONFIG} --inc-dir | ||
| COMMAND_ERROR_IS_FATAL ANY | ||
| OUTPUT_VARIABLE MECAB_KO_INCLUDE_DIRS | ||
| OUTPUT_STRIP_TRAILING_WHITESPACE | ||
| ) | ||
| execute_process( | ||
| COMMAND ${MECAB_KO_CONFIG} --libs-only-L | ||
| COMMAND_ERROR_IS_FATAL ANY | ||
| OUTPUT_VARIABLE MECAB_KO_LIBRARY_DIRS | ||
| OUTPUT_STRIP_TRAILING_WHITESPACE | ||
| ) | ||
| find_library( | ||
| MECAB_KO_LIBRARY | ||
| NAMES mecab | ||
| PATHS ${MECAB_KO_LIBRARY_DIRS} | ||
| REQUIRED | ||
| NO_DEFAULT_PATH | ||
| ) | ||
| if(DEFINED MECAB_KO_PRIVATE_PREFIX) | ||
| include(ExternalProject) | ||
| include(ProcessorCount) | ||
| ProcessorCount(NPROC) | ||
| if(NPROC EQUAL 0) | ||
| set(NPROC 1) | ||
| endif() | ||
|
|
||
| set(MECAB_KO_INCLUDE_DIRS "${MECAB_KO_PRIVATE_PREFIX}/include") | ||
| set(MECAB_KO_LIBRARY "${MECAB_KO_PRIVATE_PREFIX}/lib/libmecab.a") | ||
| set(MECAB_KO_RC_PATH "${MECAB_KO_PRIVATE_PREFIX}/etc/mecabrc") | ||
| set(MECAB_KO_DIC_DIR | ||
| "${MECAB_KO_PRIVATE_PREFIX}/lib/mecab/dic/mecab-ko-dic" | ||
| ) | ||
|
|
||
| ExternalProject_Add( | ||
| mecab_ko_external | ||
| URL | ||
| https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz | ||
| PREFIX ${CMAKE_BINARY_DIR}/mecab-ko | ||
| CONFIGURE_COMMAND | ||
| <SOURCE_DIR>/configure --prefix=${MECAB_KO_PRIVATE_PREFIX} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use ji-heo111#3 does so. |
||
| --enable-static --disable-shared --with-pic | ||
| BUILD_COMMAND make -j${NPROC} | ||
| INSTALL_COMMAND make install | ||
| BUILD_IN_SOURCE FALSE | ||
| BUILD_BYPRODUCTS ${MECAB_KO_LIBRARY} | ||
| ) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to install license files of mecab-ko when we use bundled mecab-ko. ji-heo111#3 does so. |
||
|
|
||
| ExternalProject_Add( | ||
| mecab_ko_dic_external | ||
| URL | ||
| https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz | ||
| PREFIX ${CMAKE_BINARY_DIR}/mecab-ko-dic | ||
| DEPENDS mecab_ko_external | ||
| CONFIGURE_COMMAND | ||
| sh -c | ||
| "cd <SOURCE_DIR> && ./autogen.sh && ./configure --prefix=${MECAB_KO_PRIVATE_PREFIX} --with-mecab-config=${MECAB_KO_PRIVATE_PREFIX}/bin/mecab-config --with-dicdir=${MECAB_KO_DIC_DIR}" | ||
|
Comment on lines
+81
to
+83
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can use Let's install mecab-ko-dic files to ji-heo111#3 does so. |
||
| BUILD_COMMAND | ||
| ${CMAKE_COMMAND} -E env | ||
| PATH=${MECAB_KO_PRIVATE_PREFIX}/bin:$ENV{PATH} make -j${NPROC} | ||
|
Comment on lines
+85
to
+86
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to change ji-heo111#3 removes this. |
||
| INSTALL_COMMAND make install | ||
| COMMAND | ||
| sed -i "s|^dicdir.*|dicdir = ${MECAB_KO_DIC_DIR}|" | ||
| ${MECAB_KO_RC_PATH} | ||
|
Comment on lines
+88
to
+90
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can use ji-heo111#3 does so. |
||
| BUILD_IN_SOURCE TRUE | ||
| ) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to install license files of mecab-ko-dic when we use bundled mecab-ko-dic. ji-heo111#3 does so. |
||
| else() | ||
| find_program(MECAB_KO_CONFIG "mecab-config" REQUIRED) | ||
| execute_process( | ||
| COMMAND ${MECAB_KO_CONFIG} --inc-dir | ||
| COMMAND_ERROR_IS_FATAL ANY | ||
| OUTPUT_VARIABLE MECAB_KO_INCLUDE_DIRS | ||
| OUTPUT_STRIP_TRAILING_WHITESPACE | ||
| ) | ||
| execute_process( | ||
| COMMAND ${MECAB_KO_CONFIG} --libs-only-L | ||
| COMMAND_ERROR_IS_FATAL ANY | ||
| OUTPUT_VARIABLE MECAB_KO_LIBRARY_DIRS | ||
| OUTPUT_STRIP_TRAILING_WHITESPACE | ||
| ) | ||
| find_library( | ||
| MECAB_KO_LIBRARY | ||
| NAMES mecab | ||
| PATHS ${MECAB_KO_LIBRARY_DIRS} | ||
| REQUIRED | ||
| NO_DEFAULT_PATH | ||
| ) | ||
| endif() | ||
|
|
||
| add_subdirectory(tokenizers) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,17 @@ target_include_directories( | |
| PRIVATE ${MECAB_KO_INCLUDE_DIRS} | ||
| ) | ||
| target_link_libraries(groonga_tokenizer_mecab_ko PRIVATE ${MECAB_KO_LIBRARY}) | ||
| if(DEFINED MECAB_KO_PRIVATE_PREFIX) | ||
| add_dependencies(groonga_tokenizer_mecab_ko mecab_ko_dic_external) | ||
| target_compile_definitions( | ||
| groonga_tokenizer_mecab_ko | ||
| PRIVATE | ||
| GRN_WITH_BUNDLED_MECAB_KO | ||
| GRN_BUNDLED_MECAB_KO_RC_PATH="${MECAB_KO_RC_PATH}" | ||
| ) | ||
| find_package(Threads REQUIRED) | ||
| target_link_libraries(groonga_tokenizer_mecab_ko PRIVATE Threads::Threads) | ||
| endif() | ||
|
Comment on lines
+29
to
+39
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ji-heo111#3 added |
||
| install( | ||
| TARGETS groonga_tokenizer_mecab_ko | ||
| DESTINATION "${GRN_PLUGINS_DIR}/tokenizers" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -480,7 +480,7 @@ mecab_ko_model_create(grn_ctx *ctx, grn_mecab_ko_tokenizer_options *options) | |
| { | ||
| mecab_model_t *mecab_model; | ||
| int argc = 0; | ||
| const char *argv[3]; | ||
| const char *argv[5]; | ||
| const char *tag; | ||
|
|
||
| bool need_default_output = | ||
|
|
@@ -497,6 +497,10 @@ mecab_ko_model_create(grn_ctx *ctx, grn_mecab_ko_tokenizer_options *options) | |
| argv[argc++] = "-F%m\n"; | ||
| argv[argc++] = "-E\n"; | ||
| } | ||
| #ifdef GRN_WITH_BUNDLED_MECAB_KO | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about using |
||
| argv[argc++] = "--rcfile"; | ||
| argv[argc++] = GRN_BUNDLED_MECAB_KO_RC_PATH; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto. |
||
| #endif | ||
|
|
||
| mecab_model = mecab_model_new(argc, (char **)argv); | ||
| if (!mecab_model) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about falling back to bundled mecab-ko automatically when system mecab-ko isn't found?
We can use
-DMECAB_KO_CONFIG=nonexistentto disable system mecab-ko explicitly.ji-heo111#3 does so.