chroma-core · philipithomas · Mar 3, 2025 · Feb 27, 2025 · Feb 28, 2025 · Feb 28, 2025
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 <p align="center">
     <br/>
-    <picture> 
+    <picture>
         <source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/Xenova/transformers.js-docs/raw/main/transformersjs-dark.svg" width="500" style="max-width: 100%;">
         <source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/Xenova/transformers.js-docs/raw/main/transformersjs-light.svg" width="500" style="max-width: 100%;">
         <img alt="transformers.js javascript library logo" src="https://huggingface.co/datasets/Xenova/transformers.js-docs/raw/main/transformersjs-light.svg" width="500" style="max-width: 100%;">
@@ -11,14 +11,14 @@
 </p>
 
 <p align="center">
-    <a href="https://www.npmjs.com/package/@xenova/transformers">
-        <img alt="NPM" src="https://img.shields.io/npm/v/@xenova/transformers">
+    <a href="https://www.npmjs.com/package/chromadb-default-embed">
+        <img alt="NPM" src="https://img.shields.io/npm/v/chromadb-default-embed">
     </a>
-    <a href="https://www.npmjs.com/package/@xenova/transformers">
-        <img alt="NPM Downloads" src="https://img.shields.io/npm/dw/@xenova/transformers">
+    <a href="https://www.npmjs.com/package/chromadb-default-embed">
+        <img alt="NPM Downloads" src="https://img.shields.io/npm/dw/chromadb-default-embed">
     </a>
-    <a href="https://www.jsdelivr.com/package/npm/@xenova/transformers">
-        <img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/@xenova/transformers">
+    <a href="https://www.jsdelivr.com/package/npm/chromadb-default-embed">
+        <img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/chromadb-default-embed">
     </a>
     <a href="https://github.com/xenova/transformers.js/blob/main/LICENSE">
         <img alt="License" src="https://img.shields.io/github/license/xenova/transformers.js?color=blue">
@@ -38,7 +38,7 @@ Transformers.js is designed to be functionally equivalent to Hugging Face's [tra
   - 🗣️ **Audio**: automatic speech recognition and audio classification.
   - 🐙 **Multimodal**: zero-shot image classification.
 
-Transformers.js uses [ONNX Runtime](https://onnxruntime.ai/) to run models in the browser. The best part about it, is that you can easily [convert](#convert-your-models-to-onnx) your pretrained PyTorch, TensorFlow, or JAX models to ONNX using [🤗 Optimum](https://github.com/huggingface/optimum#onnx--onnx-runtime). 
+Transformers.js uses [ONNX Runtime](https://onnxruntime.ai/) to run models in the browser. The best part about it, is that you can easily [convert](#convert-your-models-to-onnx) your pretrained PyTorch, TensorFlow, or JAX models to ONNX using [🤗 Optimum](https://github.com/huggingface/optimum#onnx--onnx-runtime).
 
 For more information, check out the full [documentation](https://huggingface.co/docs/transformers.js).
 
@@ -70,7 +70,7 @@ out = pipe('I love transformers!')
 <td>
 
 ```javascript
-import { pipeline } from '@xenova/transformers';
+import { pipeline } from 'chromadb-default-embed';
 
 // Allocate a pipeline for sentiment-analysis
 let pipe = await pipeline('sentiment-analysis');
@@ -94,15 +94,15 @@ let pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-u
 ## Installation
 
 
-To install via [NPM](https://www.npmjs.com/package/@xenova/transformers), run:
+To install via [NPM](https://www.npmjs.com/package/chromadb-default-embed), run:
 ```bash
-npm i @xenova/transformers
+npm i chromadb-default-embed
 ```
 
 Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
 ```html
 <script type="module">
-    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.13.2';
+    import { pipeline } from 'https://cdn.jsdelivr.net/npm/chromadb-default-embed@2.13.2';
 </script>
 ```
 
@@ -135,13 +135,13 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
 
 
 
-By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@xenova/transformers@2.13.2/dist/), which should work out-of-the-box. You can customize this as follows:
+By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/chromadb-default-embed@2.13.2/dist/), which should work out-of-the-box. You can customize this as follows:
 
 
 ### Settings
 
 ```javascript
-import { env } from '@xenova/transformers';
+import { env } from 'chromadb-default-embed';
 
 // Specify a custom location for models (defaults to '/models/').
 env.localModelPath = '/path/to/models/';

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "chromadb-default-embed",
-  "version": "2.13.3",
+  "version": "2.14.0",
   "description": "Chroma's fork of @xenova/transformers serving as our default embedding function",
   "main": "./src/transformers.js",
   "types": "./types/transformers.d.ts",

diff --git a/src/env.js b/src/env.js
@@ -1,24 +1,24 @@
 /**
  * @file Module used to configure Transformers.js.
- * 
+ *
  * **Example:** Disable remote models.
  * ```javascript
  * import { env } from '@xenova/transformers';
  * env.allowRemoteModels = false;
  * ```
- * 
+ *
  * **Example:** Set local model path.
  * ```javascript
  * import { env } from '@xenova/transformers';
  * env.localModelPath = '/path/to/local/models/';
  * ```
- * 
+ *
  * **Example:** Set cache directory.
  * ```javascript
  * import { env } from '@xenova/transformers';
  * env.cacheDir = '/path/to/cache/directory/';
  * ```
- * 
+ *
  * @module env
  */
 
@@ -31,35 +31,46 @@ const { env: onnx_env } = ONNX;
 
 const VERSION = '2.13.2';
 
+/**
+ * Check if the current environment is a browser.
+ * @returns {boolean} True if running in a browser, false otherwise.
+ */
+function isBrowser() {
+  return (
+    typeof window !== "undefined" &&
+    typeof window.document !== "undefined"
+  );
+}
+
 // Check if various APIs are available (depends on environment)
 const WEB_CACHE_AVAILABLE = typeof self !== 'undefined' && 'caches' in self;
-const FS_AVAILABLE = !isEmpty(fs); // check if file system is available
-const PATH_AVAILABLE = !isEmpty(path); // check if path is available
+const FS_AVAILABLE = !isBrowser() && !isEmpty(fs); // check if file system is available and not in browser
+const PATH_AVAILABLE = !isBrowser() && !isEmpty(path); // check if path is available and not in browser
 
 const RUNNING_LOCALLY = FS_AVAILABLE && PATH_AVAILABLE;
 
 const __dirname = RUNNING_LOCALLY
-    ? path.dirname(path.dirname(url.fileURLToPath(import.meta.url)))
-    : './';
+  ? path.dirname(path.dirname(url.fileURLToPath(import.meta.url)))
+  : './';
 
 // Only used for environments with access to file system
 const DEFAULT_CACHE_DIR = RUNNING_LOCALLY
-    ? path.join(__dirname, '/.cache/')
-    : null;
+  ? path.join(__dirname, '/.cache/')
+  : null;
 
 // Set local model path, based on available APIs
 const DEFAULT_LOCAL_MODEL_PATH = '/models/';
 const localModelPath = RUNNING_LOCALLY
-    ? path.join(__dirname, DEFAULT_LOCAL_MODEL_PATH)
-    : DEFAULT_LOCAL_MODEL_PATH;
+  ? path.join(__dirname, DEFAULT_LOCAL_MODEL_PATH)
+  : DEFAULT_LOCAL_MODEL_PATH;
 
 // Set path to wasm files. This is needed when running in a web worker.
 // https://onnxruntime.ai/docs/api/js/interfaces/Env.WebAssemblyFlags.html#wasmPaths
 // We use remote wasm files by default to make it easier for newer users.
 // In practice, users should probably self-host the necessary .wasm files.
 onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
-    ? path.join(__dirname, '/dist/')
-    : `https://cdn.jsdelivr.net/npm/@xenova/transformers@${VERSION}/dist/`;
+  ? path.join(__dirname, '/dist/')
+  : `https://cdn.jsdelivr.net/npm/@xenova/transformers@${VERSION}/dist/`;
 
 
 /**
@@ -76,6 +87,7 @@ onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
  * If set to `false`, it will skip the local file check and try to load the model from the remote host.
  * @property {string} localModelPath Path to load local models from. Defaults to `/models/`.
  * @property {boolean} useFS Whether to use the file system to load files. By default, it is `true` if available.
+ * @property {boolean} isBrowser Whether the environment is a browser. Determined by checking for window and document objects.
  * @property {boolean} useBrowserCache Whether to use Cache API to cache models. By default, it is `true` if available.
  * @property {boolean} useFSCache Whether to use the file system to cache files. By default, it is `true` if available.
  * @property {string} cacheDir The directory to use for caching files with the file system. By default, it is `./.cache`.
@@ -84,36 +96,39 @@ onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
  * implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache
  */
 export const env = {
-    /////////////////// Backends settings ///////////////////
-    backends: {
-        // onnxruntime-web/onnxruntime-node
-        onnx: onnx_env,
+  /////////////////// Backends settings ///////////////////
+  backends: {
+    // onnxruntime-web/onnxruntime-node
+    onnx: onnx_env,
+
+    // TensorFlow.js
+    tfjs: {},
+  },
 
-        // TensorFlow.js
-        tfjs: {},
-    },
+  __dirname,
+  version: VERSION,
 
-    __dirname,
-    version: VERSION,
+  /////////////////// Model settings ///////////////////
+  allowRemoteModels: true,
+  remoteHost: 'https://huggingface.co/',
+  remotePathTemplate: '{model}/resolve/{revision}/',
 
-    /////////////////// Model settings ///////////////////
-    allowRemoteModels: true,
-    remoteHost: 'https://huggingface.co/',
-    remotePathTemplate: '{model}/resolve/{revision}/',
+  allowLocalModels: true,
+  localModelPath: localModelPath,
 
-    allowLocalModels: true,
-    localModelPath: localModelPath,
-    useFS: FS_AVAILABLE,
+  /////////////////// Environment detection ///////////////////
+  useFS: FS_AVAILABLE,
+  isBrowser: isBrowser(),
 
-    /////////////////// Cache settings ///////////////////
-    useBrowserCache: WEB_CACHE_AVAILABLE,
+  /////////////////// Cache settings ///////////////////
+  useBrowserCache: WEB_CACHE_AVAILABLE,
 
-    useFSCache: FS_AVAILABLE,
-    cacheDir: DEFAULT_CACHE_DIR,
+  useFSCache: FS_AVAILABLE,
+  cacheDir: DEFAULT_CACHE_DIR,
 
-    useCustomCache: false,
-    customCache: null,
-    //////////////////////////////////////////////////////
+  useCustomCache: false,
+  customCache: null,
+  //////////////////////////////////////////////////////
 }
 
 
@@ -122,6 +137,6 @@ export const env = {
  * @private
  */
 function isEmpty(obj) {
-    return Object.keys(obj).length === 0;
+  return Object.keys(obj).length === 0;
 }
 
diff --git a/tests/generate_tests.py b/tests/generate_tests.py
@@ -58,6 +58,9 @@
 
     # TODO: remove when https://github.com/huggingface/transformers/issues/28096 is addressed
     'RajuKandasamy/tamillama_tiny_30m',
+
+    # TODO: remove when need for trust_remote_code can be addressed in CI
+    'monologg/kobert',
 ]
 
 MAX_TESTS = {
@@ -269,15 +272,18 @@ def generate_tokenizer_tests():
                     tokenizer = AutoTokenizer.from_pretrained(
                         tokenizer_name,
                         use_fast=False,
+                        trust_remote_code=True,
                     )
                     decoder_tokenizer = AutoTokenizer.from_pretrained(
                         tokenizer_name,
                         use_fast=True,
+                        trust_remote_code=True,
                     )
 
                 else:
                     decoder_tokenizer = tokenizer = AutoTokenizer.from_pretrained(
-                        tokenizer_name)
+                        tokenizer_name,
+                        trust_remote_code=True)
 
             except (KeyError, EnvironmentError):
                 # If a KeyError/EnvironmentError is raised from the AutoTokenizer, it
@@ -329,6 +335,7 @@ def generate_tokenizer_tests():
 
             # TODO: Remove once https://github.com/huggingface/transformers/pull/26678 is fixed
             use_fast='llama' not in tokenizer_id,
+            trust_remote_code=True,
         )
         tokenizer_results = []
         for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]:
@@ -363,7 +370,7 @@ def generate_config_tests():
             print('  -', config_name)
             try:
                 # Load config
-                config = AutoConfig.from_pretrained(config_name)
+                config = AutoConfig.from_pretrained(config_name, trust_remote_code=True)
             except Exception:
                 # Something went wrong, skip this config
                 continue