diff --git a/targets.bzl b/targets.bzl index 7504dfc..824c611 100644 --- a/targets.bzl +++ b/targets.bzl @@ -15,6 +15,7 @@ def define_common_targets(): ]), visibility = [ "@EXECUTORCH_CLIENTS", + "//pytorch/tokenizers/...", ], header_namespace = "", ) @@ -29,12 +30,14 @@ def define_common_targets(): ], visibility = [ "@EXECUTORCH_CLIENTS", + "//pytorch/tokenizers/...", ], compiler_flags = [ "-D_USE_INTERNAL_STRING_VIEW", ], external_deps = [ "sentencepiece", + "abseil-cpp", ], ) @@ -49,6 +52,7 @@ def define_common_targets(): ], visibility = [ "@EXECUTORCH_CLIENTS", + "//pytorch/tokenizers/...", ], compiler_flags = [ "-D_USE_INTERNAL_STRING_VIEW", @@ -84,6 +88,7 @@ def define_common_targets(): ], visibility = [ "@EXECUTORCH_CLIENTS", + "//pytorch/tokenizers/...", ], compiler_flags = [ "-D_USE_INTERNAL_STRING_VIEW", @@ -104,5 +109,6 @@ def define_common_targets(): ], visibility = [ "@EXECUTORCH_CLIENTS", + "//pytorch/tokenizers/...", ], ) diff --git a/test/TARGETS b/test/TARGETS new file mode 100644 index 0000000..2341af9 --- /dev/null +++ b/test/TARGETS @@ -0,0 +1,8 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/test/targets.bzl b/test/targets.bzl new file mode 100644 index 0000000..3505ef8 --- /dev/null +++ b/test/targets.bzl @@ -0,0 +1,86 @@ +load( + "@fbsource//tools/build_defs:default_platform_defs.bzl", + "ANDROID", + "CXX", +) +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + runtime.cxx_test( + name = "test_base64", + srcs = [ + "test_base64.cpp", + ], + deps = [ + "//pytorch/tokenizers:headers", + ], + ) + + runtime.cxx_test( + name = "test_llama2c_tokenizer", + srcs = [ + "test_llama2c_tokenizer.cpp", + ], + deps = [ + "//pytorch/tokenizers:llama2c_tokenizer", + ], + env = { + "RESOURCES_PATH": "$(location :resources)/resources", + }, + platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform. + ) + + runtime.cxx_test( + name = "test_pre_tokenizer", + srcs = [ + "test_pre_tokenizer.cpp", + ], + deps = [ + "//pytorch/tokenizers:headers", + "//pytorch/tokenizers:hf_tokenizer", + ], + ) + + runtime.cxx_test( + name = "test_sentencepiece", + srcs = [ + "test_sentencepiece.cpp", + ], + deps = ["//pytorch/tokenizers:sentencepiece"], + external_deps = [ + "sentencepiece", + "abseil-cpp", + ], + env = { + "RESOURCES_PATH": "$(location :resources)/resources", + }, + ) + + runtime.cxx_test( + name = "test_tiktoken", + srcs = [ + "test_tiktoken.cpp", + ], + deps = [ + "//pytorch/tokenizers:tiktoken", + ], + env = { + "RESOURCES_PATH": "$(location :resources)/resources", + }, + platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform. + external_deps = [ + "re2", + ], + ) + + runtime.filegroup( + name = "resources", + srcs = native.glob([ + "resources/**", + ]), + ) diff --git a/test/test_llama2c_tokenizer.cpp b/test/test_llama2c_tokenizer.cpp index 4e158e7..09b7c46 100644 --- a/test/test_llama2c_tokenizer.cpp +++ b/test/test_llama2c_tokenizer.cpp @@ -6,9 +6,6 @@ * LICENSE file in the root directory of this source tree. */ -#ifdef TOKENIZERS_FB_BUCK -#include -#endif #include #include @@ -17,16 +14,9 @@ using namespace ::testing; namespace tokenizers { namespace { -// Test case based on llama2.c tokenizer static inline std::string _get_resource_path(const std::string& name) { -#ifdef TOKENIZERS_FB_BUCK - return facebook::xplat::testing::getPathForTestResource( - "test/resources/" + name); -#else return std::getenv("RESOURCES_PATH") + std::string("/") + name; -#endif } - } // namespace class Llama2cTokenizerTest : public Test { diff --git a/test/test_sentencepiece.cpp b/test/test_sentencepiece.cpp index 8c5e1e9..b8ffd8a 100644 --- a/test/test_sentencepiece.cpp +++ b/test/test_sentencepiece.cpp @@ -7,9 +7,6 @@ */ // @lint-ignore-every LICENSELINT -#ifdef TOKENIZERS_FB_BUCK -#include -#endif #include #include @@ -17,14 +14,8 @@ namespace tokenizers { namespace { static inline std::string _get_resource_path(const std::string& name) { -#ifdef TOKENIZERS_FB_BUCK - return facebook::xplat::testing::getPathForTestResource( - "test/resources/" + name); -#else return std::getenv("RESOURCES_PATH") + std::string("/") + name; -#endif } - } // namespace TEST(SPTokenizerTest, TestEncodeWithoutLoad) { diff --git a/test/test_tiktoken.cpp b/test/test_tiktoken.cpp index 86af4fe..a7c094e 100644 --- a/test/test_tiktoken.cpp +++ b/test/test_tiktoken.cpp @@ -7,9 +7,6 @@ */ // @lint-ignore-every LICENSELINT -#ifdef TOKENIZERS_FB_BUCK -#include -#endif #include #include @@ -45,12 +42,7 @@ static inline std::unique_ptr> _get_special_tokens() { } static inline std::string _get_resource_path(const std::string& name) { -#ifdef TOKENIZERS_FB_BUCK - return facebook::xplat::testing::getPathForTestResource( - "test/resources/" + name); -#else return std::getenv("RESOURCES_PATH") + std::string("/") + name; -#endif } } // namespace