forked from pytorch-labs/tokenizers
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: X-link: pytorch/executorch#8586 Test Plan: ## OSS Build ``` cmake . -DCMAKE_INSTALL_PREFIX=cmake-out -DTOKENIZERS_BUILD_TEST=ON -Bcmake-out cmake --build cmake-out -j9 --target install ``` Test ``` (executorch) [[email protected] /data/users/lfq/tokenizers/cmake-out (lfq.tokenizer-test)]$ ctest Test project /data/users/lfq/tokenizers/cmake-out Start 1: test_base64 1/5 Test pytorch-labs#1: test_base64 ...................... Passed 0.00 sec Start 2: test_llama2c_tokenizer 2/5 Test pytorch-labs#2: test_llama2c_tokenizer ........... Passed 0.00 sec Start 3: test_pre_tokenizer 3/5 Test pytorch-labs#3: test_pre_tokenizer ............... Passed 0.73 sec Start 4: test_sentencepiece 4/5 Test pytorch-labs#4: test_sentencepiece ............... Passed 0.04 sec Start 5: test_tiktoken 5/5 Test pytorch-labs#5: test_tiktoken .................... Passed 3.32 sec 100% tests passed, 0 tests failed out of 5 Total Test time (real) = 4.10 sec ``` ## Internal ``` buck2 test fbsource//xplat/pytorch/tokenizers/test: buck2 test fbcode//pytorch/tokenizers/test: ``` Reviewed By: larryliu0820 Differential Revision: D69860352 Pulled By: lucylq
- Loading branch information
1 parent
0763945
commit 9b354ae
Showing
6 changed files
with
102 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Any targets that should be shared between fbcode and xplat must be defined in | ||
# targets.bzl. This file can contain fbcode-only targets. | ||
|
||
load(":targets.bzl", "define_common_targets") | ||
|
||
oncall("executorch") | ||
|
||
define_common_targets() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
load( | ||
"@fbsource//tools/build_defs:default_platform_defs.bzl", | ||
"ANDROID", | ||
"CXX", | ||
) | ||
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") | ||
|
||
def define_common_targets(): | ||
"""Defines targets that should be shared between fbcode and xplat. | ||
The directory containing this targets.bzl file should also contain both | ||
TARGETS and BUCK files that call this function. | ||
""" | ||
runtime.cxx_test( | ||
name = "test_base64", | ||
srcs = [ | ||
"test_base64.cpp", | ||
], | ||
deps = [ | ||
"//pytorch/tokenizers:headers", | ||
], | ||
) | ||
|
||
runtime.cxx_test( | ||
name = "test_llama2c_tokenizer", | ||
srcs = [ | ||
"test_llama2c_tokenizer.cpp", | ||
], | ||
deps = [ | ||
"//pytorch/tokenizers:llama2c_tokenizer", | ||
], | ||
env = { | ||
"RESOURCES_PATH": "$(location :resources)/resources", | ||
}, | ||
platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform. | ||
) | ||
|
||
runtime.cxx_test( | ||
name = "test_pre_tokenizer", | ||
srcs = [ | ||
"test_pre_tokenizer.cpp", | ||
], | ||
deps = [ | ||
"//pytorch/tokenizers:headers", | ||
"//pytorch/tokenizers:hf_tokenizer", | ||
], | ||
) | ||
|
||
runtime.cxx_test( | ||
name = "test_sentencepiece", | ||
srcs = [ | ||
"test_sentencepiece.cpp", | ||
], | ||
deps = ["//pytorch/tokenizers:sentencepiece"], | ||
external_deps = [ | ||
"sentencepiece", | ||
"abseil-cpp", | ||
], | ||
env = { | ||
"RESOURCES_PATH": "$(location :resources)/resources", | ||
}, | ||
platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform. | ||
) | ||
|
||
runtime.cxx_test( | ||
name = "test_tiktoken", | ||
srcs = [ | ||
"test_tiktoken.cpp", | ||
], | ||
deps = [ | ||
"//pytorch/tokenizers:tiktoken", | ||
], | ||
env = { | ||
"RESOURCES_PATH": "$(location :resources)/resources", | ||
}, | ||
platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform. | ||
external_deps = [ | ||
"re2", | ||
], | ||
) | ||
|
||
runtime.filegroup( | ||
name = "resources", | ||
srcs = native.glob([ | ||
"resources/**", | ||
]), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters