Set device_map only for int8

mryab · mryab · commit 9bee2b734164 · 2023-02-25T16:45:13.000+01:00
diff --git a/src/petals/cli/convert_model.py b/src/petals/cli/convert_model.py
@@ -60,7 +60,7 @@ def main():
         revision=args.revision,
         torch_dtype=DTYPE_MAP[args.torch_dtype] if args.torch_dtype != "int8" else "float16",
         load_in_8bit=args.torch_dtype == "int8",
-        device_map={"word_embeddings": "cuda", "word_embeddings_layernorm": "cuda", "h": "cuda", "ln_f": "cuda"},
+        device_map="auto" if args.torch_dtype == "int8" else None,
     )
     if args.torch_dtype == "int8":
         # trigger weight quantization

Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ def main():`
`60`	`60`	`revision=args.revision,`
`61`	`61`	`torch_dtype=DTYPE_MAP[args.torch_dtype] if args.torch_dtype != "int8" else "float16",`
`62`	`62`	`load_in_8bit=args.torch_dtype == "int8",`
`63`		`- device_map={"word_embeddings": "cuda", "word_embeddings_layernorm": "cuda", "h": "cuda", "ln_f": "cuda"},`
	`63`	`+ device_map="auto" if args.torch_dtype == "int8" else None,`
`64`	`64`	`)`
`65`	`65`	`if args.torch_dtype == "int8":`
`66`	`66`	`# trigger weight quantization`