huggingface · YanivDorGalron · Feb 5, 2025 · Feb 5, 2025 · Feb 6, 2025 · Feb 7, 2025
diff --git a/docs/source/en/training/controlnet.md b/docs/source/en/training/controlnet.md
@@ -283,6 +283,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -315,6 +316,7 @@ python3 train_controlnet_flax.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \

diff --git a/docs/source/en/training/t2i_adapters.md b/docs/source/en/training/t2i_adapters.md
@@ -180,6 +180,7 @@ accelerate launch train_t2i_adapter_sdxl.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --mixed_precision="fp16" \
  --resolution=1024 \
  --learning_rate=1e-5 \

diff --git a/docs/source/ko/training/controlnet.md b/docs/source/ko/training/controlnet.md
@@ -96,6 +96,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -118,6 +119,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -140,6 +142,7 @@ accelerate launch --mixed_precision="fp16" --multi_gpu train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -188,6 +191,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -216,6 +220,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -282,6 +287,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
  --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \

diff --git a/examples/controlnet/README.md b/examples/controlnet/README.md
@@ -66,6 +66,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -88,6 +89,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -109,6 +111,7 @@ accelerate launch --mixed_precision="fp16" --multi_gpu train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -156,6 +159,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -182,6 +186,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
@@ -242,6 +247,7 @@ accelerate launch train_controlnet.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
  --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
@@ -377,6 +383,7 @@ python3 train_controlnet_flax.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \

diff --git a/examples/controlnet/README_flux.md b/examples/controlnet/README_flux.md
@@ -58,7 +58,8 @@ When running `accelerate config`, if we specify torch compile mode to True there
 ## Custom Datasets
 
 We support dataset formats:
-The original dataset is hosted in the [ControlNet repo](https://huggingface.co/lllyasviel/ControlNet/blob/main/training/fill50k.zip). We re-uploaded it to be compatible with `datasets` [here](https://huggingface.co/datasets/fusing/fill50k). Note that `datasets` handles dataloading within the training script. To use our example, add `--dataset_name=fusing/fill50k \` to the script and remove line `--jsonl_for_train` mentioned below.
+The original dataset is hosted in the [ControlNet repo](https://huggingface.co/lllyasviel/ControlNet/blob/main/training/fill50k.zip). We re-uploaded it to be compatible with `datasets` [here](https://huggingface.co/datasets/fusing/fill50k). Note that `datasets` handles dataloading within the training script. To use our example, add `--dataset_name=fusing/fill50k \
+--trust_remote_code \` to the script and remove line `--jsonl_for_train` mentioned below.
 
 
 We also support importing data from jsonl(xxx.jsonl),using `--jsonl_for_train` to enable it, here is a brief example of jsonl files:
@@ -85,6 +86,7 @@ we can define the num_layers, num_single_layers, which determines the size of th
 accelerate launch train_controlnet_flux.py \
     --pretrained_model_name_or_path="black-forest-labs/FLUX.1-dev" \
     --dataset_name=fusing/fill50k \
+    --trust_remote_code \
     --conditioning_image_column=conditioning_image \
     --image_column=image \
     --caption_column=text \

diff --git a/examples/controlnet/README_sdxl.md b/examples/controlnet/README_sdxl.md
@@ -68,6 +68,7 @@ accelerate launch train_controlnet_sdxl.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --mixed_precision="fp16" \
  --resolution=1024 \
  --learning_rate=1e-5 \

diff --git a/examples/controlnet/train_controlnet.py b/examples/controlnet/train_controlnet.py
@@ -562,6 +562,11 @@ def parse_args(input_args=None):
             " more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
         ),
     )
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help="Whether to trust and execute remote code for loading datasets.",
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -613,12 +618,12 @@ def make_train_dataset(args, tokenizer, accelerator):
             args.dataset_config_name,
             cache_dir=args.cache_dir,
             data_dir=args.train_data_dir,
+            trust_remote_code=args.trust_remote_code,
         )
     else:
         if args.train_data_dir is not None:
             dataset = load_dataset(
-                args.train_data_dir,
-                cache_dir=args.cache_dir,
+                args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
             )
         # See more about loading custom images at
         # https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script

diff --git a/examples/controlnet/train_controlnet_flax.py b/examples/controlnet/train_controlnet_flax.py
@@ -460,6 +460,12 @@ def parse_args():
     )
     parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
 
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help="Whether to trust and execute remote code for loading datasets.",
+    )
+
     args = parser.parse_args()
     args.output_dir = args.output_dir.replace("{timestamp}", time.strftime("%Y%m%d_%H%M%S"))
 
@@ -515,6 +521,7 @@ def make_train_dataset(args, tokenizer, batch_size=None):
             args.dataset_config_name,
             cache_dir=args.cache_dir,
             streaming=args.streaming,
+            trust_remote_code=args.trust_remote_code,
         )
     else:
         if args.train_data_dir is not None:
@@ -524,8 +531,7 @@ def make_train_dataset(args, tokenizer, batch_size=None):
                 )
             else:
                 dataset = load_dataset(
-                    args.train_data_dir,
-                    cache_dir=args.cache_dir,
+                    args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
                 )
         # See more about loading custom images at
         # https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script

diff --git a/examples/controlnet/train_controlnet_flux.py b/examples/controlnet/train_controlnet_flux.py
@@ -641,6 +641,11 @@ def parse_args(input_args=None):
         action="store_true",
         help="Enable model cpu offload and save memory.",
     )
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help="Whether to trust and execute remote code for loading datasets.",
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -690,10 +695,13 @@ def get_train_dataset(args, accelerator):
             args.dataset_name,
             args.dataset_config_name,
             cache_dir=args.cache_dir,
+            trust_remote_code=args.trust_remote_code,
         )
     if args.jsonl_for_train is not None:
         # load from json
-        dataset = load_dataset("json", data_files=args.jsonl_for_train, cache_dir=args.cache_dir)
+        dataset = load_dataset(
+            "json", data_files=args.jsonl_for_train, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
+        )
         dataset = dataset.flatten_indices()
     # Preprocessing the datasets.
     # We need to tokenize inputs and targets.

diff --git a/examples/controlnet/train_controlnet_sd3.py b/examples/controlnet/train_controlnet_sd3.py
@@ -58,6 +58,7 @@
 if is_wandb_available():
     import wandb
 
+
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 check_min_version("0.33.0.dev0")
 
@@ -649,12 +650,12 @@ def make_train_dataset(args, tokenizer_one, tokenizer_two, tokenizer_three, acce
             args.dataset_name,
             args.dataset_config_name,
             cache_dir=args.cache_dir,
+            trust_remote_code=args.trust_remote_code,
         )
     else:
         if args.train_data_dir is not None:
             dataset = load_dataset(
-                args.train_data_dir,
-                cache_dir=args.cache_dir,
+                args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
             )
         # See more about loading custom images at
         # https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script

diff --git a/examples/controlnet/train_controlnet_sdxl.py b/examples/controlnet/train_controlnet_sdxl.py
@@ -589,6 +589,11 @@ def parse_args(input_args=None):
             " more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
         ),
     )
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help="Whether to trust and execute remote code for loading datasets.",
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -640,12 +645,12 @@ def get_train_dataset(args, accelerator):
             args.dataset_config_name,
             cache_dir=args.cache_dir,
             data_dir=args.train_data_dir,
+            trust_remote_code=args.trust_remote_code,
         )
     else:
         if args.train_data_dir is not None:
             dataset = load_dataset(
-                args.train_data_dir,
-                cache_dir=args.cache_dir,
+                args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
             )
         # See more about loading custom images at
         # https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script

diff --git a/examples/research_projects/pixart/train_controlnet_hf_diffusers.sh b/examples/research_projects/pixart/train_controlnet_hf_diffusers.sh
@@ -13,6 +13,7 @@ accelerate launch ./train_pixart_controlnet_hf.py --mixed_precision="fp16" \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --resolution=512 \
  --learning_rate=1e-5 \
  --train_batch_size=1 \

diff --git a/examples/research_projects/pixart/train_pixart_controlnet_hf.py b/examples/research_projects/pixart/train_pixart_controlnet_hf.py
@@ -545,6 +545,11 @@ def parse_args():
             " more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
         ),
     )
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help="Whether to trust and execute remote code for loading datasets.",
+    )
 
     args = parser.parse_args()
 
@@ -766,15 +771,14 @@ def load_model_hook(models, input_dir):
             args.dataset_config_name,
             cache_dir=args.cache_dir,
             data_dir=args.train_data_dir,
+            trust_remote_code=args.trust_remote_code,
         )
     else:
         data_files = {}
         if args.train_data_dir is not None:
             data_files["train"] = os.path.join(args.train_data_dir, "**")
         dataset = load_dataset(
-            "imagefolder",
-            data_files=data_files,
-            cache_dir=args.cache_dir,
+            "imagefolder", data_files=data_files, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
         )
         # See more about loading custom images at
         # https://huggingface.co/docs/datasets/v2.4.0/en/image_load#imagefolder

diff --git a/examples/t2i_adapter/README_sdxl.md b/examples/t2i_adapter/README_sdxl.md
@@ -68,6 +68,7 @@ accelerate launch train_t2i_adapter_sdxl.py \
  --pretrained_model_name_or_path=$MODEL_DIR \
  --output_dir=$OUTPUT_DIR \
  --dataset_name=fusing/fill50k \
+ --trust_remote_code \
  --mixed_precision="fp16" \
  --resolution=1024 \
  --learning_rate=1e-5 \

diff --git a/examples/t2i_adapter/train_t2i_adapter_sdxl.py b/examples/t2i_adapter/train_t2i_adapter_sdxl.py
@@ -579,6 +579,11 @@ def parse_args(input_args=None):
             " more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
         ),
     )
+    parser.add_argument(
+        "--trust_remote_code",
+        action="store_true",
+        help="Whether to trust and execute remote code for loading datasets.",
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -632,12 +637,12 @@ def get_train_dataset(args, accelerator):
             args.dataset_name,
             args.dataset_config_name,
             cache_dir=args.cache_dir,
+            trust_remote_code=args.trust_remote_code,
         )
     else:
         if args.train_data_dir is not None:
             dataset = load_dataset(
-                args.train_data_dir,
-                cache_dir=args.cache_dir,
+                args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
             )
         # See more about loading custom images at
         # https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script