@@ -116,116 +116,11 @@ if(${ANDROID})
116116 list (PREPEND _mtk_llama_executor_runner__srcs
117117 ${CMAKE_CURRENT_LIST_DIR} /executor_runner/mtk_llama_executor_runner.cpp
118118 )
119- # Build ABSL and RE2
120- set (EXTENSIONS_LLM_DIR ${CMAKE_CURRENT_SOURCE_DIR} /../../extension/llm)
121- set (THIRD_PARTY_ABSL_DIR
122- ${EXTENSIONS_LLM_DIR} /tokenizers/third-party/abseil-cpp
123- )
124- set (THIRD_PARTY_RE2_DIR ${EXTENSIONS_LLM_DIR} /tokenizers/third-party/re2)
125- set (THIRD_PARTY_JSON_DIR ${CMAKE_CURRENT_SOURCE_DIR} /../../third-party/json)
126- set (THIRD_PARTY_UNICODE_DIR
127- ${EXTENSIONS_LLM_DIR} /tokenizers/third-party/llama.cpp-unicode
128- )
129- set (THIRD_PARTY_PCRE2_DIR ${EXTENSIONS_LLM_DIR} /tokenizers/third-party/pcre2)
130- set (ABSL_ENABLE_INSTALL ON )
131- set (ABSL_PROPAGATE_CXX_STD ON )
132- set (_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE} )
133- set (CMAKE_POSITION_INDEPENDENT_CODE ON )
134- add_subdirectory (
135- ${THIRD_PARTY_ABSL_DIR}
136- ${CMAKE_CURRENT_BINARY_DIR} /tokenizers/third-party/abseil
137- )
138- add_subdirectory (
139- ${THIRD_PARTY_RE2_DIR}
140- ${CMAKE_CURRENT_BINARY_DIR} /tokenizers/third-party/re2
141- )
142- add_subdirectory (
143- ${THIRD_PARTY_JSON_DIR}
144- ${CMAKE_CURRENT_BINARY_DIR} /tokenizers/third-party/json
145- )
146- add_subdirectory (
147- ${THIRD_PARTY_UNICODE_DIR}
148- ${CMAKE_CURRENT_BINARY_DIR} /tokenizers/third-party/llama.cpp-unicode
149- )
150- add_subdirectory (
151- ${THIRD_PARTY_PCRE2_DIR}
152- ${CMAKE_CURRENT_BINARY_DIR} /tokenizers/third-party/pcre2
153- )
154- set (CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag} )
155-
156- # Build tokenizers
119+ # Build tokenizers library
157120 set (SUPPORT_REGEX_LOOKAHEAD ON )
158- set (LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR} /tokenizers)
159- add_library (tokenizer STATIC )
160- target_include_directories (
161- tokenizer
162- PUBLIC ${_common_include_directories}
163- ${THIRD_PARTY_ABSL_DIR}
164- ${THIRD_PARTY_RE2_DIR}
165- ${LLAMA2_TOKENIZER_DIR} /include
166- ${CMAKE_CURRENT_BINARY_DIR} /tokenizers/third-party/pcre2
167- ${EXECUTORCH_ROOT} /extension/llm/tokenizers/include
168- ${THIRD_PARTY_JSON_DIR}
169- ${THIRD_PARTY_UNICODE_DIR} /include
170- ${THIRD_PARTY_PCRE2_DIR}
171- )
172- target_link_libraries (tokenizer PRIVATE re2::re2 )
173-
174- target_sources (
175- tokenizer
176- PRIVATE
177- ${LLAMA2_TOKENIZER_DIR} /src/tiktoken.cpp
178- ${LLAMA2_TOKENIZER_DIR} /src/llama2c_tokenizer.cpp
179- ${LLAMA2_TOKENIZER_DIR} /src/regex.cpp
180- ${LLAMA2_TOKENIZER_DIR} /src/bpe_tokenizer_base.cpp
181- ${LLAMA2_TOKENIZER_DIR} /src/re2_regex.cpp
182- ${LLAMA2_TOKENIZER_DIR} /src/hf_tokenizer.cpp
183- ${LLAMA2_TOKENIZER_DIR} /src/pre_tokenizer.cpp
184- ${LLAMA2_TOKENIZER_DIR} /src/token_decoder.cpp
185- ${LLAMA2_TOKENIZER_DIR} /src/normalizer.cpp
186- ${LLAMA2_TOKENIZER_DIR} /third-party/llama.cpp-unicode/src/unicode.cpp
187- ${LLAMA2_TOKENIZER_DIR} /third-party/llama.cpp-unicode/src/unicode-data.cpp
188- ${CMAKE_CURRENT_SOURCE_DIR} /../models/llama/tokenizer/llama_tiktoken.cpp
189- )
190-
191- # Add support for regex_lookahead
192- set (PCRE2_STATIC_PIC ON )
193- set (PCRE2_BUILD_PCRE2_8 ON )
194- set (PCRE2_BUILD_PCRE2_16 OFF )
195- set (PCRE2_BUILD_PCRE2_32 OFF )
196- set (PCRE2_BUILD_TESTS OFF )
197- set (PCRE2_BUILD_PCRE2GREP OFF )
198- set (PCRE2_BUILD_PCRE2TEST OFF )
199- set (PCRE2_BUILD_PCRE2GPERF OFF )
200- set (PCRE2_BUILD_DOCS OFF )
201- set (PCRE2_BUILD_LIBPCRE2_PDB OFF )
202-
203- # Set the INTERFACE_INCLUDE_DIRECTORIES property for pcre2-8-static
204- set_target_properties (
205- pcre2-8-static
206- PROPERTIES
207- INTERFACE_INCLUDE_DIRECTORIES
208- $<BUILD_INTERFACE :${CMAKE_CURRENT_BINARY_DIR} /tokenizers /third -party /pcre2 >
209- )
210- add_library (
211- regex_lookahead STATIC
212- ${LLAMA2_TOKENIZER_DIR} /src/pcre2_regex.cpp
213- ${LLAMA2_TOKENIZER_DIR} /src/regex_lookahead.cpp
214- ${LLAMA2_TOKENIZER_DIR} /src/std_regex.cpp
215- )
216- add_library (tokenizer::regex_lookahead ALIAS regex_lookahead )
217- target_link_libraries (regex_lookahead PUBLIC pcre2-8-static )
218- target_include_directories (
219- regex_lookahead
220- PUBLIC $<BUILD_INTERFACE :${CMAKE_CURRENT_SOURCE_DIR} /include >
221- )
222- target_link_libraries (tokenizer PUBLIC regex_lookahead )
223- install (
224- TARGETS regex_lookahead pcre2-8-static
225- EXPORT tokenizers-targets
226- ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
227- LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
228- RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
121+ add_subdirectory (
122+ ${EXECUTORCH_ROOT} /extension/llm/tokenizers
123+ ${CMAKE_CURRENT_BINARY_DIR} /tokenizers
229124 )
230125
231126 # Include directory for neuron headers
@@ -244,10 +139,12 @@ if(${ANDROID})
244139 mtk_llama_executor_runner ${_executor_runner_libs} neuron_backend gflags
245140 mtk_llama_executor_lib
246141 )
247- target_link_libraries (
248- mtk_llama_executor_runner tokenizer
249- $<LINK_LIBRARY :WHOLE_ARCHIVE ,regex_lookahead >
142+ target_sources (
143+ mtk_llama_executor_runner
144+ PRIVATE
145+ ${CMAKE_CURRENT_SOURCE_DIR} /../models/llama/tokenizer/llama_tiktoken.cpp
250146 )
147+ target_link_libraries (mtk_llama_executor_runner tokenizers )
251148 target_compile_options (
252149 mtk_llama_executor_runner PUBLIC ${_common_compile_options}
253150 )
0 commit comments