diff --git a/include/base64.h b/include/pytorch/tokenizers/base64.h
similarity index 100%
rename from include/base64.h
rename to include/pytorch/tokenizers/base64.h
diff --git a/include/detail/bpe_tokenizer_base.h b/include/pytorch/tokenizers/bpe_tokenizer_base.h
similarity index 96%
rename from include/detail/bpe_tokenizer_base.h
rename to include/pytorch/tokenizers/bpe_tokenizer_base.h
index 1752d5e..587e663 100644
--- a/include/detail/bpe_tokenizer_base.h
+++ b/include/pytorch/tokenizers/bpe_tokenizer_base.h
@@ -21,8 +21,8 @@
 #include <re2/re2.h>
 
 // Local
-#include "result.h"
-#include "tokenizer.h"
+#include <pytorch/tokenizers/result.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace tokenizers {
 namespace detail {
diff --git a/include/error.h b/include/pytorch/tokenizers/error.h
similarity index 99%
rename from include/error.h
rename to include/pytorch/tokenizers/error.h
index 11b3439..7823f16 100644
--- a/include/error.h
+++ b/include/pytorch/tokenizers/error.h
@@ -13,8 +13,8 @@
 
 #pragma once
 
+#include <pytorch/tokenizers/log.h>
 #include <stdint.h>
-#include "log.h"
 
 namespace tokenizers {
 
diff --git a/include/hf_tokenizer.h b/include/pytorch/tokenizers/hf_tokenizer.h
similarity index 84%
rename from include/hf_tokenizer.h
rename to include/pytorch/tokenizers/hf_tokenizer.h
index 73ecc87..4f8301a 100644
--- a/include/hf_tokenizer.h
+++ b/include/pytorch/tokenizers/hf_tokenizer.h
@@ -19,11 +19,11 @@
 #include <re2/re2.h>
 
 // Local
-#include "detail/bpe_tokenizer_base.h"
-#include "error.h"
-#include "pre_tokenizer.h"
-#include "result.h"
-#include "token_decoder.h"
+#include <pytorch/tokenizers/bpe_tokenizer_base.h>
+#include <pytorch/tokenizers/error.h>
+#include <pytorch/tokenizers/pre_tokenizer.h>
+#include <pytorch/tokenizers/result.h>
+#include <pytorch/tokenizers/token_decoder.h>
 
 namespace tokenizers {
 class HFTokenizer : public detail::BPETokenizerBase {
diff --git a/include/llama2c_tokenizer.h b/include/pytorch/tokenizers/llama2c_tokenizer.h
similarity index 96%
rename from include/llama2c_tokenizer.h
rename to include/pytorch/tokenizers/llama2c_tokenizer.h
index fc8418d..6163b55 100644
--- a/include/llama2c_tokenizer.h
+++ b/include/pytorch/tokenizers/llama2c_tokenizer.h
@@ -7,8 +7,8 @@
  */
 // @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
 #pragma once
+#include <pytorch/tokenizers/tokenizer.h>
 #include <memory>
-#include "tokenizer.h"
 
 namespace tokenizers {
 
diff --git a/include/log.h b/include/pytorch/tokenizers/log.h
similarity index 99%
rename from include/log.h
rename to include/pytorch/tokenizers/log.h
index 207a1a6..0282a2c 100644
--- a/include/log.h
+++ b/include/pytorch/tokenizers/log.h
@@ -15,6 +15,7 @@
 
 #include <cstdarg>
 #include <cstddef>
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 
diff --git a/include/pre_tokenizer.h b/include/pytorch/tokenizers/pre_tokenizer.h
similarity index 100%
rename from include/pre_tokenizer.h
rename to include/pytorch/tokenizers/pre_tokenizer.h
diff --git a/include/result.h b/include/pytorch/tokenizers/result.h
similarity index 99%
rename from include/result.h
rename to include/pytorch/tokenizers/result.h
index 76a0e53..868c38c 100644
--- a/include/result.h
+++ b/include/pytorch/tokenizers/result.h
@@ -13,10 +13,10 @@
 
 #pragma once
 
+#include <pytorch/tokenizers/error.h>
 #include <cassert>
 #include <new>
 #include <utility>
-#include "error.h"
 
 namespace tokenizers {
 
diff --git a/include/sentencepiece.h b/include/pytorch/tokenizers/sentencepiece.h
similarity index 95%
rename from include/sentencepiece.h
rename to include/pytorch/tokenizers/sentencepiece.h
index cfacc29..be7fff6 100644
--- a/include/sentencepiece.h
+++ b/include/pytorch/tokenizers/sentencepiece.h
@@ -10,10 +10,10 @@
 // A tokenizer that works with sentencepiece. Used by Llama2.
 #pragma once
 
+#include <pytorch/tokenizers/tokenizer.h>
 #include <memory>
 #include <vector>
 #include "sentencepiece_processor.h"
-#include "tokenizer.h"
 namespace tokenizers {
 
 struct TokenIndex {
diff --git a/third-party/llama.cpp-unicode/include/unicode-data.h b/include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h
similarity index 100%
rename from third-party/llama.cpp-unicode/include/unicode-data.h
rename to include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h
diff --git a/third-party/llama.cpp-unicode/include/unicode.h b/include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h
similarity index 100%
rename from third-party/llama.cpp-unicode/include/unicode.h
rename to include/pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h
diff --git a/include/tiktoken.h b/include/pytorch/tokenizers/tiktoken.h
similarity index 96%
rename from include/tiktoken.h
rename to include/pytorch/tokenizers/tiktoken.h
index 2bc909a..11cc667 100644
--- a/include/tiktoken.h
+++ b/include/pytorch/tokenizers/tiktoken.h
@@ -17,9 +17,9 @@
 #include "re2/re2.h"
 
 // Local
-#include "detail/bpe_tokenizer_base.h"
-#include "result.h"
-#include "tokenizer.h"
+#include <pytorch/tokenizers/bpe_tokenizer_base.h>
+#include <pytorch/tokenizers/result.h>
+#include <pytorch/tokenizers/tokenizer.h>
 
 namespace tokenizers {
 
diff --git a/include/token_decoder.h b/include/pytorch/tokenizers/token_decoder.h
similarity index 100%
rename from include/token_decoder.h
rename to include/pytorch/tokenizers/token_decoder.h
diff --git a/include/tokenizer.h b/include/pytorch/tokenizers/tokenizer.h
similarity index 94%
rename from include/tokenizer.h
rename to include/pytorch/tokenizers/tokenizer.h
index 655e947..23bde19 100644
--- a/include/tokenizer.h
+++ b/include/pytorch/tokenizers/tokenizer.h
@@ -13,10 +13,10 @@
 
 #pragma once
 
+#include <pytorch/tokenizers/error.h>
+#include <pytorch/tokenizers/result.h>
 #include <string>
 #include <vector>
-#include "error.h"
-#include "result.h"
 
 namespace tokenizers {
 
diff --git a/src/bpe_tokenizer_base.cpp b/src/bpe_tokenizer_base.cpp
index 7dc4e1a..6a50b91 100644
--- a/src/bpe_tokenizer_base.cpp
+++ b/src/bpe_tokenizer_base.cpp
@@ -7,7 +7,7 @@
  */
 // @lint-ignore-every LICENSELINT
 
-#include "detail/bpe_tokenizer_base.h"
+#include <pytorch/tokenizers/bpe_tokenizer_base.h>
 
 // Standard
 #include <inttypes.h>
diff --git a/src/hf_tokenizer.cpp b/src/hf_tokenizer.cpp
index 58bf195..0eefbcc 100644
--- a/src/hf_tokenizer.cpp
+++ b/src/hf_tokenizer.cpp
@@ -7,7 +7,7 @@
  */
 // @lint-ignore-every LICENSELINT
 
-#include "hf_tokenizer.h"
+#include <pytorch/tokenizers/hf_tokenizer.h>
 
 // Standard
 #include <filesystem>
diff --git a/src/llama2c_tokenizer.cpp b/src/llama2c_tokenizer.cpp
index e73089d..951ee3d 100644
--- a/src/llama2c_tokenizer.cpp
+++ b/src/llama2c_tokenizer.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 // @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
-#include "llama2c_tokenizer.h"
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 #include <cstring>
 
 namespace tokenizers {
diff --git a/src/pre_tokenizer.cpp b/src/pre_tokenizer.cpp
index 04de5bb..5e6e662 100644
--- a/src/pre_tokenizer.cpp
+++ b/src/pre_tokenizer.cpp
@@ -5,7 +5,10 @@
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */
-#include "pre_tokenizer.h"
+
+// Local
+#include <pytorch/tokenizers/pre_tokenizer.h>
+#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
 
 // Standard
 #include <algorithm>
@@ -15,9 +18,6 @@
 // Third Party
 #include <nlohmann/json.hpp>
 
-// Local
-#include "unicode.h"
-
 using json = nlohmann::json;
 
 namespace tokenizers {
diff --git a/src/sentencepiece.cpp b/src/sentencepiece.cpp
index 67947fd..7401dd9 100644
--- a/src/sentencepiece.cpp
+++ b/src/sentencepiece.cpp
@@ -8,7 +8,7 @@
 
 // A tokenizer that works with sentencepiece.
 
-#include "sentencepiece.h"
+#include <pytorch/tokenizers/sentencepiece.h>
 #include <cinttypes>
 #include <string>
 #include "third_party/absl/strings/str_replace.h"
diff --git a/src/tiktoken.cpp b/src/tiktoken.cpp
index 0180acb..cdc31f7 100644
--- a/src/tiktoken.cpp
+++ b/src/tiktoken.cpp
@@ -25,11 +25,11 @@
    limitations under the License.
  *************************************************************************/
 
-#include "tiktoken.h"
+#include <pytorch/tokenizers/base64.h>
+#include <pytorch/tokenizers/tiktoken.h>
 #include <cinttypes>
 #include <fstream>
 #include <limits>
-#include "base64.h"
 #include "re2/re2.h"
 
 namespace tokenizers {
diff --git a/src/token_decoder.cpp b/src/token_decoder.cpp
index 28d3b52..669f6dd 100644
--- a/src/token_decoder.cpp
+++ b/src/token_decoder.cpp
@@ -7,7 +7,7 @@
  */
 // @lint-ignore-every LICENSELINT
 
-#include "token_decoder.h"
+#include <pytorch/tokenizers/token_decoder.h>
 
 // Standard
 #include <cstdarg>
@@ -16,7 +16,7 @@
 #include <nlohmann/json.hpp>
 
 // Local
-#include "unicode.h"
+#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
 
 using json = nlohmann::json;
 
diff --git a/targets.bzl b/targets.bzl
index dd26998..7504dfc 100644
--- a/targets.bzl
+++ b/targets.bzl
@@ -11,13 +11,12 @@ def define_common_targets():
     runtime.cxx_library(
         name = "headers",
         exported_headers = subdir_glob([
-            ("include", "*.h"),
-            ("include", "**/*.h"),
+            ("include", "pytorch/tokenizers/*.h"),
         ]),
-        header_namespace = "",
         visibility = [
             "@EXECUTORCH_CLIENTS",
         ],
+        header_namespace = "",
     )
 
     runtime.cxx_library(
@@ -66,7 +65,7 @@ def define_common_targets():
             "third-party/llama.cpp-unicode/src/unicode-data.cpp",
         ],
         exported_headers = subdir_glob([
-            ("third-party/llama.cpp-unicode/include", "*.h"),
+            ("include", "pytorch/tokenizers/third-party/llama.cpp-unicode/*.h"),
         ]),
         header_namespace = "",
     )
diff --git a/test/test_base64.cpp b/test/test_base64.cpp
index 99c9f79..ffc51b2 100644
--- a/test/test_base64.cpp
+++ b/test/test_base64.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include "base64.h"
+#include <pytorch/tokenizers/base64.h>
 #include "gtest/gtest.h"
 
 namespace tokenizers {
diff --git a/test/test_llama2c_tokenizer.cpp b/test/test_llama2c_tokenizer.cpp
index 72abc48..4e158e7 100644
--- a/test/test_llama2c_tokenizer.cpp
+++ b/test/test_llama2c_tokenizer.cpp
@@ -10,7 +10,7 @@
 #include <TestResourceUtils/TestResourceUtils.h>
 #endif
 #include <gtest/gtest.h>
-#include "llama2c_tokenizer.h"
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
 
 using namespace ::testing;
 
diff --git a/test/test_pre_tokenizer.cpp b/test/test_pre_tokenizer.cpp
index 0ab7da5..f87c892 100644
--- a/test/test_pre_tokenizer.cpp
+++ b/test/test_pre_tokenizer.cpp
@@ -12,7 +12,7 @@
 #include <re2/re2.h>
 
 // Local
-#include "pre_tokenizer.h"
+#include <pytorch/tokenizers/pre_tokenizer.h>
 
 using json = nlohmann::json;
 using namespace tokenizers;
diff --git a/test/test_sentencepiece.cpp b/test/test_sentencepiece.cpp
index b55ce73..8c5e1e9 100644
--- a/test/test_sentencepiece.cpp
+++ b/test/test_sentencepiece.cpp
@@ -11,7 +11,7 @@
 #include <TestResourceUtils/TestResourceUtils.h>
 #endif
 #include <gtest/gtest.h>
-#include "sentencepiece.h"
+#include <pytorch/tokenizers/sentencepiece.h>
 
 namespace tokenizers {
 
diff --git a/test/test_tiktoken.cpp b/test/test_tiktoken.cpp
index 2177872..86af4fe 100644
--- a/test/test_tiktoken.cpp
+++ b/test/test_tiktoken.cpp
@@ -11,7 +11,7 @@
 #include <TestResourceUtils/TestResourceUtils.h>
 #endif
 #include <gtest/gtest.h>
-#include "tiktoken.h"
+#include <pytorch/tokenizers/tiktoken.h>
 
 using namespace ::testing;
 
diff --git a/third-party/llama.cpp-unicode/src/unicode-data.cpp b/third-party/llama.cpp-unicode/src/unicode-data.cpp
index 0317793..c924f0c 100644
--- a/third-party/llama.cpp-unicode/src/unicode-data.cpp
+++ b/third-party/llama.cpp-unicode/src/unicode-data.cpp
@@ -27,7 +27,7 @@ SOFTWARE.
 
 // generated with scripts/gen-unicode-data.py
 
-#include "unicode-data.h"
+#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h>
 
 #include <cstdint>
 #include <unordered_map>
diff --git a/third-party/llama.cpp-unicode/src/unicode.cpp b/third-party/llama.cpp-unicode/src/unicode.cpp
index 3f9db7f..152fca7 100644
--- a/third-party/llama.cpp-unicode/src/unicode.cpp
+++ b/third-party/llama.cpp-unicode/src/unicode.cpp
@@ -29,8 +29,8 @@ SOFTWARE.
 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
 #endif
 
-#include "unicode.h"
-#include "unicode-data.h"
+#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode.h>
+#include <pytorch/tokenizers/third-party/llama.cpp-unicode/unicode-data.h>
 
 #include <algorithm>
 #include <cassert>