Skip to content

adding trust_remote_code argument for loading dataset in controlnet traininig example #10727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/en/training/controlnet.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down Expand Up @@ -315,6 +316,7 @@ python3 train_controlnet_flax.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down
1 change: 1 addition & 0 deletions docs/source/en/training/t2i_adapters.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ accelerate launch train_t2i_adapter_sdxl.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--mixed_precision="fp16" \
--resolution=1024 \
--learning_rate=1e-5 \
Expand Down
6 changes: 6 additions & 0 deletions docs/source/ko/training/controlnet.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand All @@ -118,6 +119,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand All @@ -140,6 +142,7 @@ accelerate launch --mixed_precision="fp16" --multi_gpu train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down Expand Up @@ -188,6 +191,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down Expand Up @@ -216,6 +220,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down Expand Up @@ -282,6 +287,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
--validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
Expand Down
7 changes: 7 additions & 0 deletions examples/controlnet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand All @@ -88,6 +89,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand All @@ -109,6 +111,7 @@ accelerate launch --mixed_precision="fp16" --multi_gpu train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down Expand Up @@ -156,6 +159,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand All @@ -182,6 +186,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down Expand Up @@ -242,6 +247,7 @@ accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
--validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
Expand Down Expand Up @@ -377,6 +383,7 @@ python3 train_controlnet_flax.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
Expand Down
4 changes: 3 additions & 1 deletion examples/controlnet/README_flux.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ When running `accelerate config`, if we specify torch compile mode to True there
## Custom Datasets

We support dataset formats:
The original dataset is hosted in the [ControlNet repo](https://huggingface.co/lllyasviel/ControlNet/blob/main/training/fill50k.zip). We re-uploaded it to be compatible with `datasets` [here](https://huggingface.co/datasets/fusing/fill50k). Note that `datasets` handles dataloading within the training script. To use our example, add `--dataset_name=fusing/fill50k \` to the script and remove line `--jsonl_for_train` mentioned below.
The original dataset is hosted in the [ControlNet repo](https://huggingface.co/lllyasviel/ControlNet/blob/main/training/fill50k.zip). We re-uploaded it to be compatible with `datasets` [here](https://huggingface.co/datasets/fusing/fill50k). Note that `datasets` handles dataloading within the training script. To use our example, add `--dataset_name=fusing/fill50k \
--trust_remote_code \` to the script and remove line `--jsonl_for_train` mentioned below.


We also support importing data from jsonl(xxx.jsonl),using `--jsonl_for_train` to enable it, here is a brief example of jsonl files:
Expand All @@ -85,6 +86,7 @@ we can define the num_layers, num_single_layers, which determines the size of th
accelerate launch train_controlnet_flux.py \
--pretrained_model_name_or_path="black-forest-labs/FLUX.1-dev" \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--conditioning_image_column=conditioning_image \
--image_column=image \
--caption_column=text \
Expand Down
1 change: 1 addition & 0 deletions examples/controlnet/README_sdxl.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ accelerate launch train_controlnet_sdxl.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--mixed_precision="fp16" \
--resolution=1024 \
--learning_rate=1e-5 \
Expand Down
9 changes: 7 additions & 2 deletions examples/controlnet/train_controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,11 @@ def parse_args(input_args=None):
" more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
),
)
parser.add_argument(
"--trust_remote_code",
action="store_true",
help="Whether to trust and execute remote code for loading datasets.",
)

if input_args is not None:
args = parser.parse_args(input_args)
Expand Down Expand Up @@ -613,12 +618,12 @@ def make_train_dataset(args, tokenizer, accelerator):
args.dataset_config_name,
cache_dir=args.cache_dir,
data_dir=args.train_data_dir,
trust_remote_code=args.trust_remote_code,
)
else:
if args.train_data_dir is not None:
dataset = load_dataset(
args.train_data_dir,
cache_dir=args.cache_dir,
args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
)
# See more about loading custom images at
# https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script
Expand Down
10 changes: 8 additions & 2 deletions examples/controlnet/train_controlnet_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,12 @@ def parse_args():
)
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")

parser.add_argument(
"--trust_remote_code",
action="store_true",
help="Whether to trust and execute remote code for loading datasets.",
)

args = parser.parse_args()
args.output_dir = args.output_dir.replace("{timestamp}", time.strftime("%Y%m%d_%H%M%S"))

Expand Down Expand Up @@ -515,6 +521,7 @@ def make_train_dataset(args, tokenizer, batch_size=None):
args.dataset_config_name,
cache_dir=args.cache_dir,
streaming=args.streaming,
trust_remote_code=args.trust_remote_code,
)
else:
if args.train_data_dir is not None:
Expand All @@ -524,8 +531,7 @@ def make_train_dataset(args, tokenizer, batch_size=None):
)
else:
dataset = load_dataset(
args.train_data_dir,
cache_dir=args.cache_dir,
args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
)
# See more about loading custom images at
# https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script
Expand Down
10 changes: 9 additions & 1 deletion examples/controlnet/train_controlnet_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,11 @@ def parse_args(input_args=None):
action="store_true",
help="Enable model cpu offload and save memory.",
)
parser.add_argument(
"--trust_remote_code",
action="store_true",
help="Whether to trust and execute remote code for loading datasets.",
)

if input_args is not None:
args = parser.parse_args(input_args)
Expand Down Expand Up @@ -690,10 +695,13 @@ def get_train_dataset(args, accelerator):
args.dataset_name,
args.dataset_config_name,
cache_dir=args.cache_dir,
trust_remote_code=args.trust_remote_code,
)
if args.jsonl_for_train is not None:
# load from json
dataset = load_dataset("json", data_files=args.jsonl_for_train, cache_dir=args.cache_dir)
dataset = load_dataset(
"json", data_files=args.jsonl_for_train, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
)
dataset = dataset.flatten_indices()
# Preprocessing the datasets.
# We need to tokenize inputs and targets.
Expand Down
5 changes: 3 additions & 2 deletions examples/controlnet/train_controlnet_sd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
if is_wandb_available():
import wandb


# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
check_min_version("0.33.0.dev0")

Expand Down Expand Up @@ -649,12 +650,12 @@ def make_train_dataset(args, tokenizer_one, tokenizer_two, tokenizer_three, acce
args.dataset_name,
args.dataset_config_name,
cache_dir=args.cache_dir,
trust_remote_code=args.trust_remote_code,
)
else:
if args.train_data_dir is not None:
dataset = load_dataset(
args.train_data_dir,
cache_dir=args.cache_dir,
args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
)
# See more about loading custom images at
# https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script
Expand Down
9 changes: 7 additions & 2 deletions examples/controlnet/train_controlnet_sdxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,11 @@ def parse_args(input_args=None):
" more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
),
)
parser.add_argument(
"--trust_remote_code",
action="store_true",
help="Whether to trust and execute remote code for loading datasets.",
)

if input_args is not None:
args = parser.parse_args(input_args)
Expand Down Expand Up @@ -640,12 +645,12 @@ def get_train_dataset(args, accelerator):
args.dataset_config_name,
cache_dir=args.cache_dir,
data_dir=args.train_data_dir,
trust_remote_code=args.trust_remote_code,
)
else:
if args.train_data_dir is not None:
dataset = load_dataset(
args.train_data_dir,
cache_dir=args.cache_dir,
args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
)
# See more about loading custom images at
# https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ accelerate launch ./train_pixart_controlnet_hf.py --mixed_precision="fp16" \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--resolution=512 \
--learning_rate=1e-5 \
--train_batch_size=1 \
Expand Down
10 changes: 7 additions & 3 deletions examples/research_projects/pixart/train_pixart_controlnet_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,11 @@ def parse_args():
" more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
),
)
parser.add_argument(
"--trust_remote_code",
action="store_true",
help="Whether to trust and execute remote code for loading datasets.",
)

args = parser.parse_args()

Expand Down Expand Up @@ -766,15 +771,14 @@ def load_model_hook(models, input_dir):
args.dataset_config_name,
cache_dir=args.cache_dir,
data_dir=args.train_data_dir,
trust_remote_code=args.trust_remote_code,
)
else:
data_files = {}
if args.train_data_dir is not None:
data_files["train"] = os.path.join(args.train_data_dir, "**")
dataset = load_dataset(
"imagefolder",
data_files=data_files,
cache_dir=args.cache_dir,
"imagefolder", data_files=data_files, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
)
# See more about loading custom images at
# https://huggingface.co/docs/datasets/v2.4.0/en/image_load#imagefolder
Expand Down
1 change: 1 addition & 0 deletions examples/t2i_adapter/README_sdxl.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ accelerate launch train_t2i_adapter_sdxl.py \
--pretrained_model_name_or_path=$MODEL_DIR \
--output_dir=$OUTPUT_DIR \
--dataset_name=fusing/fill50k \
--trust_remote_code \
--mixed_precision="fp16" \
--resolution=1024 \
--learning_rate=1e-5 \
Expand Down
9 changes: 7 additions & 2 deletions examples/t2i_adapter/train_t2i_adapter_sdxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,11 @@ def parse_args(input_args=None):
" more information see https://huggingface.co/docs/accelerate/v0.17.0/en/package_reference/accelerator#accelerate.Accelerator"
),
)
parser.add_argument(
"--trust_remote_code",
action="store_true",
help="Whether to trust and execute remote code for loading datasets.",
)

if input_args is not None:
args = parser.parse_args(input_args)
Expand Down Expand Up @@ -632,12 +637,12 @@ def get_train_dataset(args, accelerator):
args.dataset_name,
args.dataset_config_name,
cache_dir=args.cache_dir,
trust_remote_code=args.trust_remote_code,
)
else:
if args.train_data_dir is not None:
dataset = load_dataset(
args.train_data_dir,
cache_dir=args.cache_dir,
args.train_data_dir, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code
)
# See more about loading custom images at
# https://huggingface.co/docs/datasets/v2.0.0/en/dataset_script
Expand Down
Loading