diff --git a/include/base64.h b/include/pytorch/tokenizers/base64.h similarity index 100% rename from include/base64.h rename to include/pytorch/tokenizers/base64.h diff --git a/include/detail/bpe_tokenizer_base.h b/include/pytorch/tokenizers/bpe_tokenizer_base.h similarity index 96% rename from include/detail/bpe_tokenizer_base.h rename to include/pytorch/tokenizers/bpe_tokenizer_base.h index 1752d5e..587e663 100644 --- a/include/detail/bpe_tokenizer_base.h +++ b/include/pytorch/tokenizers/bpe_tokenizer_base.h @@ -21,8 +21,8 @@ #include // Local -#include "result.h" -#include "tokenizer.h" +#include +#include namespace tokenizers { namespace detail { diff --git a/include/error.h b/include/pytorch/tokenizers/error.h similarity index 99% rename from include/error.h rename to include/pytorch/tokenizers/error.h index 11b3439..7823f16 100644 --- a/include/error.h +++ b/include/pytorch/tokenizers/error.h @@ -13,8 +13,8 @@ #pragma once +#include #include -#include "log.h" namespace tokenizers { diff --git a/include/hf_tokenizer.h b/include/pytorch/tokenizers/hf_tokenizer.h similarity index 84% rename from include/hf_tokenizer.h rename to include/pytorch/tokenizers/hf_tokenizer.h index 73ecc87..4f8301a 100644 --- a/include/hf_tokenizer.h +++ b/include/pytorch/tokenizers/hf_tokenizer.h @@ -19,11 +19,11 @@ #include // Local -#include "detail/bpe_tokenizer_base.h" -#include "error.h" -#include "pre_tokenizer.h" -#include "result.h" -#include "token_decoder.h" +#include +#include +#include +#include +#include namespace tokenizers { class HFTokenizer : public detail::BPETokenizerBase { diff --git a/include/llama2c_tokenizer.h b/include/pytorch/tokenizers/llama2c_tokenizer.h similarity index 96% rename from include/llama2c_tokenizer.h rename to include/pytorch/tokenizers/llama2c_tokenizer.h index fc8418d..6163b55 100644 --- a/include/llama2c_tokenizer.h +++ b/include/pytorch/tokenizers/llama2c_tokenizer.h @@ -7,8 +7,8 @@ */ // @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude #pragma once +#include #include -#include "tokenizer.h" namespace tokenizers { diff --git a/include/log.h b/include/pytorch/tokenizers/log.h similarity index 99% rename from include/log.h rename to include/pytorch/tokenizers/log.h index 207a1a6..0282a2c 100644 --- a/include/log.h +++ b/include/pytorch/tokenizers/log.h @@ -15,6 +15,7 @@ #include #include +#include #include #include diff --git a/include/pre_tokenizer.h b/include/pytorch/tokenizers/pre_tokenizer.h similarity index 100% rename from include/pre_tokenizer.h rename to include/pytorch/tokenizers/pre_tokenizer.h diff --git a/include/result.h b/include/pytorch/tokenizers/result.h similarity index 99% rename from include/result.h rename to include/pytorch/tokenizers/result.h index 76a0e53..868c38c 100644 --- a/include/result.h +++ b/include/pytorch/tokenizers/result.h @@ -13,10 +13,10 @@ #pragma once +#include #include #include #include -#include "error.h" namespace tokenizers { diff --git a/include/sentencepiece.h b/include/pytorch/tokenizers/sentencepiece.h similarity index 95% rename from include/sentencepiece.h rename to include/pytorch/tokenizers/sentencepiece.h index cfacc29..be7fff6 100644 --- a/include/sentencepiece.h +++ b/include/pytorch/tokenizers/sentencepiece.h @@ -10,10 +10,10 @@ // A tokenizer that works with sentencepiece. Used by Llama2. #pragma once +#include #include #include #include "sentencepiece_processor.h" -#include "tokenizer.h" namespace tokenizers { struct TokenIndex { diff --git a/third-party/llama.cpp-unicode/include/unicode-data.h b/include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h similarity index 100% rename from third-party/llama.cpp-unicode/include/unicode-data.h rename to include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h diff --git a/third-party/llama.cpp-unicode/include/unicode.h b/include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h similarity index 100% rename from third-party/llama.cpp-unicode/include/unicode.h rename to include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h diff --git a/include/tiktoken.h b/include/pytorch/tokenizers/tiktoken.h similarity index 96% rename from include/tiktoken.h rename to include/pytorch/tokenizers/tiktoken.h index 2bc909a..11cc667 100644 --- a/include/tiktoken.h +++ b/include/pytorch/tokenizers/tiktoken.h @@ -17,9 +17,9 @@ #include "re2/re2.h" // Local -#include "detail/bpe_tokenizer_base.h" -#include "result.h" -#include "tokenizer.h" +#include +#include +#include namespace tokenizers { diff --git a/include/token_decoder.h b/include/pytorch/tokenizers/token_decoder.h similarity index 100% rename from include/token_decoder.h rename to include/pytorch/tokenizers/token_decoder.h diff --git a/include/tokenizer.h b/include/pytorch/tokenizers/tokenizer.h similarity index 94% rename from include/tokenizer.h rename to include/pytorch/tokenizers/tokenizer.h index 655e947..23bde19 100644 --- a/include/tokenizer.h +++ b/include/pytorch/tokenizers/tokenizer.h @@ -13,10 +13,10 @@ #pragma once +#include +#include #include #include -#include "error.h" -#include "result.h" namespace tokenizers { diff --git a/src/bpe_tokenizer_base.cpp b/src/bpe_tokenizer_base.cpp index 7dc4e1a..6a50b91 100644 --- a/src/bpe_tokenizer_base.cpp +++ b/src/bpe_tokenizer_base.cpp @@ -7,7 +7,7 @@ */ // @lint-ignore-every LICENSELINT -#include "detail/bpe_tokenizer_base.h" +#include // Standard #include diff --git a/src/hf_tokenizer.cpp b/src/hf_tokenizer.cpp index 58bf195..0eefbcc 100644 --- a/src/hf_tokenizer.cpp +++ b/src/hf_tokenizer.cpp @@ -7,7 +7,7 @@ */ // @lint-ignore-every LICENSELINT -#include "hf_tokenizer.h" +#include // Standard #include diff --git a/src/llama2c_tokenizer.cpp b/src/llama2c_tokenizer.cpp index e73089d..951ee3d 100644 --- a/src/llama2c_tokenizer.cpp +++ b/src/llama2c_tokenizer.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ // @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "llama2c_tokenizer.h" +#include #include namespace tokenizers { diff --git a/src/pre_tokenizer.cpp b/src/pre_tokenizer.cpp index 04de5bb..5e6e662 100644 --- a/src/pre_tokenizer.cpp +++ b/src/pre_tokenizer.cpp @@ -5,7 +5,10 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include "pre_tokenizer.h" + +// Local +#include +#include // Standard #include @@ -15,9 +18,6 @@ // Third Party #include -// Local -#include "unicode.h" - using json = nlohmann::json; namespace tokenizers { diff --git a/src/sentencepiece.cpp b/src/sentencepiece.cpp index 67947fd..7401dd9 100644 --- a/src/sentencepiece.cpp +++ b/src/sentencepiece.cpp @@ -8,7 +8,7 @@ // A tokenizer that works with sentencepiece. -#include "sentencepiece.h" +#include #include #include #include "third_party/absl/strings/str_replace.h" diff --git a/src/tiktoken.cpp b/src/tiktoken.cpp index 0180acb..cdc31f7 100644 --- a/src/tiktoken.cpp +++ b/src/tiktoken.cpp @@ -25,11 +25,11 @@ limitations under the License. *************************************************************************/ -#include "tiktoken.h" +#include +#include #include #include #include -#include "base64.h" #include "re2/re2.h" namespace tokenizers { diff --git a/src/token_decoder.cpp b/src/token_decoder.cpp index 28d3b52..669f6dd 100644 --- a/src/token_decoder.cpp +++ b/src/token_decoder.cpp @@ -7,7 +7,7 @@ */ // @lint-ignore-every LICENSELINT -#include "token_decoder.h" +#include // Standard #include @@ -16,7 +16,7 @@ #include // Local -#include "unicode.h" +#include using json = nlohmann::json; diff --git a/targets.bzl b/targets.bzl index dd26998..7504dfc 100644 --- a/targets.bzl +++ b/targets.bzl @@ -11,13 +11,12 @@ def define_common_targets(): runtime.cxx_library( name = "headers", exported_headers = subdir_glob([ - ("include", "*.h"), - ("include", "**/*.h"), + ("include", "pytorch/tokenizers/*.h"), ]), - header_namespace = "", visibility = [ "@EXECUTORCH_CLIENTS", ], + header_namespace = "", ) runtime.cxx_library( @@ -66,7 +65,7 @@ def define_common_targets(): "third-party/llama.cpp-unicode/src/unicode-data.cpp", ], exported_headers = subdir_glob([ - ("third-party/llama.cpp-unicode/include", "*.h"), + ("include", "pytorch/tokenizers/third-party/llama.cpp-unicode/*.h"), ]), header_namespace = "", ) diff --git a/test/test_base64.cpp b/test/test_base64.cpp index 99c9f79..ffc51b2 100644 --- a/test/test_base64.cpp +++ b/test/test_base64.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "base64.h" +#include #include "gtest/gtest.h" namespace tokenizers { diff --git a/test/test_llama2c_tokenizer.cpp b/test/test_llama2c_tokenizer.cpp index 72abc48..4e158e7 100644 --- a/test/test_llama2c_tokenizer.cpp +++ b/test/test_llama2c_tokenizer.cpp @@ -10,7 +10,7 @@ #include #endif #include -#include "llama2c_tokenizer.h" +#include using namespace ::testing; diff --git a/test/test_pre_tokenizer.cpp b/test/test_pre_tokenizer.cpp index 0ab7da5..f87c892 100644 --- a/test/test_pre_tokenizer.cpp +++ b/test/test_pre_tokenizer.cpp @@ -12,7 +12,7 @@ #include // Local -#include "pre_tokenizer.h" +#include using json = nlohmann::json; using namespace tokenizers; diff --git a/test/test_sentencepiece.cpp b/test/test_sentencepiece.cpp index b55ce73..8c5e1e9 100644 --- a/test/test_sentencepiece.cpp +++ b/test/test_sentencepiece.cpp @@ -11,7 +11,7 @@ #include #endif #include -#include "sentencepiece.h" +#include namespace tokenizers { diff --git a/test/test_tiktoken.cpp b/test/test_tiktoken.cpp index 2177872..86af4fe 100644 --- a/test/test_tiktoken.cpp +++ b/test/test_tiktoken.cpp @@ -11,7 +11,7 @@ #include #endif #include -#include "tiktoken.h" +#include using namespace ::testing; diff --git a/third-party/llama.cpp-unicode/src/unicode-data.cpp b/third-party/llama.cpp-unicode/src/unicode-data.cpp index 0317793..c924f0c 100644 --- a/third-party/llama.cpp-unicode/src/unicode-data.cpp +++ b/third-party/llama.cpp-unicode/src/unicode-data.cpp @@ -27,7 +27,7 @@ SOFTWARE. // generated with scripts/gen-unicode-data.py -#include "unicode-data.h" +#include #include #include diff --git a/third-party/llama.cpp-unicode/src/unicode.cpp b/third-party/llama.cpp-unicode/src/unicode.cpp index 3f9db7f..152fca7 100644 --- a/third-party/llama.cpp-unicode/src/unicode.cpp +++ b/third-party/llama.cpp-unicode/src/unicode.cpp @@ -29,8 +29,8 @@ SOFTWARE. #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING #endif -#include "unicode.h" -#include "unicode-data.h" +#include +#include #include #include