Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 69 additions & 20 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,75 @@ endif()

find_package(Groonga REQUIRED)

find_program(MECAB_KO_CONFIG "mecab-config" REQUIRED)
execute_process(
COMMAND ${MECAB_KO_CONFIG} --inc-dir
COMMAND_ERROR_IS_FATAL ANY
OUTPUT_VARIABLE MECAB_KO_INCLUDE_DIRS
OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process(
COMMAND ${MECAB_KO_CONFIG} --libs-only-L
COMMAND_ERROR_IS_FATAL ANY
OUTPUT_VARIABLE MECAB_KO_LIBRARY_DIRS
OUTPUT_STRIP_TRAILING_WHITESPACE
)
find_library(
MECAB_KO_LIBRARY
NAMES mecab
PATHS ${MECAB_KO_LIBRARY_DIRS}
REQUIRED
NO_DEFAULT_PATH
)
if(DEFINED MECAB_KO_PRIVATE_PREFIX)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about falling back to bundled mecab-ko automatically when system mecab-ko isn't found?

We can use -DMECAB_KO_CONFIG=nonexistent to disable system mecab-ko explicitly.

ji-heo111#3 does so.

include(ExternalProject)
include(ProcessorCount)
ProcessorCount(NPROC)
if(NPROC EQUAL 0)
set(NPROC 1)
endif()

set(MECAB_KO_INCLUDE_DIRS "${MECAB_KO_PRIVATE_PREFIX}/include")
set(MECAB_KO_LIBRARY "${MECAB_KO_PRIVATE_PREFIX}/lib/libmecab.a")
set(MECAB_KO_RC_PATH "${MECAB_KO_PRIVATE_PREFIX}/etc/mecabrc")
set(MECAB_KO_DIC_DIR
"${MECAB_KO_PRIVATE_PREFIX}/lib/mecab/dic/mecab-ko-dic"
)

ExternalProject_Add(
mecab_ko_external
URL
https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz
PREFIX ${CMAKE_BINARY_DIR}/mecab-ko
CONFIGURE_COMMAND
<SOURCE_DIR>/configure --prefix=${MECAB_KO_PRIVATE_PREFIX}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use --prefix=<INSTALL_DIR> instead of ${MECAB_KO_PRIVATE_PREFIX}. If we use static linking, we don't need to install mecab-ko.

ji-heo111#3 does so.

--enable-static --disable-shared --with-pic
BUILD_COMMAND make -j${NPROC}
INSTALL_COMMAND make install
BUILD_IN_SOURCE FALSE
BUILD_BYPRODUCTS ${MECAB_KO_LIBRARY}
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to install license files of mecab-ko when we use bundled mecab-ko.

ji-heo111#3 does so.


ExternalProject_Add(
mecab_ko_dic_external
URL
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
PREFIX ${CMAKE_BINARY_DIR}/mecab-ko-dic
DEPENDS mecab_ko_external
CONFIGURE_COMMAND
sh -c
"cd <SOURCE_DIR> && ./autogen.sh && ./configure --prefix=${MECAB_KO_PRIVATE_PREFIX} --with-mecab-config=${MECAB_KO_PRIVATE_PREFIX}/bin/mecab-config --with-dicdir=${MECAB_KO_DIC_DIR}"
Comment on lines +81 to +83
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use PATCH_COMMAND for autogen.sh.

Let's install mecab-ko-dic files to CMAKE_INSTALL_PREFIX/... not MECAB_KO_PRIVATE_PREFIX/.... In general, users control where they install by CMAKE_INSTALL_PREFIX in CMake.

ji-heo111#3 does so.

BUILD_COMMAND
${CMAKE_COMMAND} -E env
PATH=${MECAB_KO_PRIVATE_PREFIX}/bin:$ENV{PATH} make -j${NPROC}
Comment on lines +85 to +86
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to change PATH here because mecab-ko-dic doesn't use mecab in ${MECAB_KO_PRIVATE_PREFIX}/bin and mecab-dict-index exists in ${MECAB_KO_PRIVATE_PREFIX}/libexec/mecab/.

ji-heo111#3 removes this.

INSTALL_COMMAND make install
COMMAND
sed -i "s|^dicdir.*|dicdir = ${MECAB_KO_DIC_DIR}|"
${MECAB_KO_RC_PATH}
Comment on lines +88 to +90
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use configure_file() instead of sed.

ji-heo111#3 does so.

BUILD_IN_SOURCE TRUE
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to install license files of mecab-ko-dic when we use bundled mecab-ko-dic.

ji-heo111#3 does so.

else()
find_program(MECAB_KO_CONFIG "mecab-config" REQUIRED)
execute_process(
COMMAND ${MECAB_KO_CONFIG} --inc-dir
COMMAND_ERROR_IS_FATAL ANY
OUTPUT_VARIABLE MECAB_KO_INCLUDE_DIRS
OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process(
COMMAND ${MECAB_KO_CONFIG} --libs-only-L
COMMAND_ERROR_IS_FATAL ANY
OUTPUT_VARIABLE MECAB_KO_LIBRARY_DIRS
OUTPUT_STRIP_TRAILING_WHITESPACE
)
find_library(
MECAB_KO_LIBRARY
NAMES mecab
PATHS ${MECAB_KO_LIBRARY_DIRS}
REQUIRED
NO_DEFAULT_PATH
)
endif()

add_subdirectory(tokenizers)

Expand Down
11 changes: 11 additions & 0 deletions tokenizers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ target_include_directories(
PRIVATE ${MECAB_KO_INCLUDE_DIRS}
)
target_link_libraries(groonga_tokenizer_mecab_ko PRIVATE ${MECAB_KO_LIBRARY})
if(DEFINED MECAB_KO_PRIVATE_PREFIX)
add_dependencies(groonga_tokenizer_mecab_ko mecab_ko_dic_external)
target_compile_definitions(
groonga_tokenizer_mecab_ko
PRIVATE
GRN_WITH_BUNDLED_MECAB_KO
GRN_BUNDLED_MECAB_KO_RC_PATH="${MECAB_KO_RC_PATH}"
)
find_package(Threads REQUIRED)
target_link_libraries(groonga_tokenizer_mecab_ko PRIVATE Threads::Threads)
endif()
Comment on lines +29 to +39
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ji-heo111#3 added libmecab_ko CMake target and it has all needed options. So we can use libmecab_ko for both system mecab-ko and bundled mecab-ko.

install(
TARGETS groonga_tokenizer_mecab_ko
DESTINATION "${GRN_PLUGINS_DIR}/tokenizers"
Expand Down
6 changes: 5 additions & 1 deletion tokenizers/mecab_ko.c
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ mecab_ko_model_create(grn_ctx *ctx, grn_mecab_ko_tokenizer_options *options)
{
mecab_model_t *mecab_model;
int argc = 0;
const char *argv[3];
const char *argv[5];
const char *tag;

bool need_default_output =
Expand All @@ -497,6 +497,10 @@ mecab_ko_model_create(grn_ctx *ctx, grn_mecab_ko_tokenizer_options *options)
argv[argc++] = "-F%m\n";
argv[argc++] = "-E\n";
}
#ifdef GRN_WITH_BUNDLED_MECAB_KO
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about using GRN_TOKENIZER_MECAB_KO prefix instead of GRN prefix?

argv[argc++] = "--rcfile";
argv[argc++] = GRN_BUNDLED_MECAB_KO_RC_PATH;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto.

#endif

mecab_model = mecab_model_new(argc, (char **)argv);
if (!mecab_model) {
Expand Down