Skip to content

Commit 6fad3e7

Browse files
authored
[DOC] Enhance Documentation on Usage of Prebuilt Executable (#15)
[DOC] Enhance Documentation on Usage of Prebuilt Executable --------- Co-authored-by: tjtanaa <[email protected]>
1 parent fb2abcc commit 6fad3e7

File tree

5 files changed

+174
-123
lines changed

5 files changed

+174
-123
lines changed

README.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,15 @@ Run local LLMs on iGPU, APU and CPU (AMD , Intel, and Qualcomm (Coming Soon)). E
119119
120120
1. `ellm_chatbot --port 7788 --host localhost --server_port <ellm_server_port> --server_host localhost`. **Note:** To find out more of the supported arguments. `ellm_chatbot --help`.
121121
122-
![asset/ellm_chatbot_vid.webp](asset/ellm_chatbot_vid.webp)
122+
![asset/ellm_chatbot_vid.webp](asset/ellm_chatbot_vid.webp)
123123
124124
### Launch Model Management UI
125125
126126
It is an interface that allows you to download and deploy OpenAI API compatible server. You can find out the disk space required to download the model in the UI.
127127
128128
1. `ellm_modelui --port 6678`. **Note:** To find out more of the supported arguments. `ellm_modelui --help`.
129129
130-
![Model Management UI](asset/ellm_modelui.png)
130+
![Model Management UI](asset/ellm_modelui.png)
131131
132132
## Compile OpenAI-API Compatible Server into Windows Executable
133133
@@ -138,13 +138,20 @@ It is an interface that allows you to download and deploy OpenAI API compatible
138138
5. Use it like `ellm_server`. `.\ellm_api_server.exe --model_path <path/to/model/weight>`.
139139
140140
## Prebuilt OpenAI API Compatible Windows Executable (Alpha)
141+
141142
You can find the prebuilt OpenAI API Compatible Windows Executable in the Release page.
142143
143-
*Powershell/Terminal Usage (Use it like `ellm_server`)*:
144+
_Powershell/Terminal Usage (Use it like `ellm_server`)_:
145+
144146
```powershell
145147
.\ellm_api_server.exe --model_path <path/to/model/weight>
146-
```
147148
149+
# DirectML
150+
.\ellm_api_server.exe --model_path 'EmbeddedLLM_Phi-3-mini-4k-instruct-062024-onnx\onnx\directml\Phi-3-mini-4k-instruct-062024-int4' --port 5555
151+
152+
# IPEX-LLM
153+
.\ellm_api_server.exe --model_path '.\meta-llama_Meta-Llama-3.1-8B-Instruct\' --backend 'ipex' --device 'xpu' --port 5555 --served_model_name 'meta-llama_Meta/Llama-3.1-8B-Instruct'
154+
```
148155

149156
## Acknowledgements
150157

setup.py

Lines changed: 60 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -53,44 +53,67 @@ def _is_ipex() -> bool:
5353
class ELLMInstallCommand(install):
5454
def run(self):
5555
install.run(self)
56-
print("is_ipex(): " + str(_is_ipex()))
5756
if _is_ipex():
58-
print("Install Ipex-LLM")
59-
result = subprocess.run([
60-
'pip', 'install', '--pre', '--upgrade', 'ipex-llm[xpu]',
61-
'--extra-index-url', 'https://pytorch-extension.intel.com/release-whl/stable/xpu/us/'
62-
], capture_output=True, text=True)
63-
64-
result = subprocess.run([
65-
'pip', 'install', '--upgrade', 'transformers==4.43.3'
66-
], capture_output=True, text=True)
57+
result = subprocess.run(
58+
[
59+
"pip",
60+
"install",
61+
"--pre",
62+
"--upgrade",
63+
"ipex-llm[xpu]",
64+
"--extra-index-url",
65+
"https://pytorch-extension.intel.com/release-whl/stable/xpu/us/",
66+
],
67+
capture_output=True,
68+
text=True,
69+
)
70+
71+
result = subprocess.run(
72+
["pip", "install", "--upgrade", "transformers==4.43.3"],
73+
capture_output=True,
74+
text=True,
75+
)
6776

6877
if _is_directml():
69-
result = subprocess.run([
70-
'conda', 'install', 'conda-forge::vs2015_runtime' , '-y'
71-
], capture_output=True, text=True)
78+
result = subprocess.run(
79+
["conda", "install", "conda-forge::vs2015_runtime", "-y"],
80+
capture_output=True,
81+
text=True,
82+
)
83+
7284

7385
class ELLMDevelopCommand(develop):
7486
def run(self):
7587
develop.run(self)
76-
print("is_ipex(): " + str(_is_ipex()))
7788
if _is_ipex():
7889
print("Install Ipex-LLM")
79-
result = subprocess.run([
80-
'pip', 'install', '--pre', '--upgrade', 'ipex-llm[xpu]',
81-
'--extra-index-url', 'https://pytorch-extension.intel.com/release-whl/stable/xpu/us/'
82-
], capture_output=True, text=True)
83-
84-
result = subprocess.run([
85-
'pip', 'install', '--upgrade', 'transformers==4.43.3'
86-
], capture_output=True, text=True)
90+
result = subprocess.run(
91+
[
92+
"pip",
93+
"install",
94+
"--pre",
95+
"--upgrade",
96+
"ipex-llm[xpu]",
97+
"--extra-index-url",
98+
"https://pytorch-extension.intel.com/release-whl/stable/xpu/us/",
99+
],
100+
capture_output=True,
101+
text=True,
102+
)
103+
104+
result = subprocess.run(
105+
["pip", "install", "--upgrade", "transformers==4.43.3"],
106+
capture_output=True,
107+
text=True,
108+
)
87109

88110
if _is_directml():
89-
result = subprocess.run([
90-
'conda', 'install', 'conda-forge::vs2015_runtime' , '-y'
91-
], capture_output=True, text=True)
92-
93-
print(result)
111+
result = subprocess.run(
112+
["conda", "install", "conda-forge::vs2015_runtime", "-y"],
113+
capture_output=True,
114+
text=True,
115+
)
116+
94117

95118
def get_path(*filepath) -> str:
96119
return os.path.join(ROOT_DIR, *filepath)
@@ -158,14 +181,12 @@ def get_ellm_version() -> str:
158181
dependency_links = []
159182
extra_install_requires = []
160183

161-
if(_is_directml() or _is_cuda() or _is_cpu()):
162-
dependency_links.extend(["https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-genai/pypi/simple/"])
163-
# elif(_is_ipex()):
164-
# dependency_links.extend(["https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"])
165-
# extra_install_requires.extend(["torch==2.1.0a0 @ https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"])
166-
# extra_install_requires.extend(["ipex-llm[xpu]==2.1.0b20240702 @ https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"])
167-
168-
# extra_install_requires = ['ipex-llm[xpu]==2.1.0b20240702 @ https://pytorch-extension.intel.com/release-whl/stable/xpu/us/']
184+
if _is_directml() or _is_cuda() or _is_cpu():
185+
dependency_links.extend(
186+
[
187+
"https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-genai/pypi/simple/"
188+
]
189+
)
169190

170191
setup(
171192
name="embeddedllm",
@@ -192,7 +213,8 @@ def get_ellm_version() -> str:
192213
],
193214
install_requires=get_requirements()
194215
+ _read_requirements("requirements-common.txt")
195-
+ _read_requirements("requirements-build.txt") + extra_install_requires,
216+
+ _read_requirements("requirements-build.txt")
217+
+ extra_install_requires,
196218
# Add other metadata and dependencies as needed
197219
extras_require={
198220
"lint": _read_requirements("requirements-lint.txt"),
@@ -209,7 +231,7 @@ def get_ellm_version() -> str:
209231
],
210232
},
211233
cmdclass={
212-
'install': ELLMInstallCommand,
213-
'develop': ELLMDevelopCommand,
234+
"install": ELLMInstallCommand,
235+
"develop": ELLMDevelopCommand,
214236
},
215237
)

src/embeddedllm/backend/base_engine.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,15 @@ def _get_and_verify_max_len(
135135
for key in possible_keys:
136136
max_len = getattr(hf_config, key, None)
137137
if max_len is not None:
138-
max_len_key = key if max_len < derived_max_model_len \
139-
else max_len_key
138+
max_len_key = key if max_len < derived_max_model_len else max_len_key
140139
derived_max_model_len = min(derived_max_model_len, max_len)
141140

142141
# If sliding window is manually disabled, max_length should be less
143142
# than the sliding window length in the model config.
144143
if disable_sliding_window and sliding_window_len is not None:
145-
max_len_key = "sliding_window" \
146-
if sliding_window_len < derived_max_model_len else max_len_key
144+
max_len_key = (
145+
"sliding_window" if sliding_window_len < derived_max_model_len else max_len_key
146+
)
147147
derived_max_model_len = min(derived_max_model_len, sliding_window_len)
148148

149149
# If none of the keys were found in the config, use a default and
@@ -157,8 +157,10 @@ def _get_and_verify_max_len(
157157
logger.warning(
158158
"The model's config.json does not contain any of the following "
159159
"keys to determine the original maximum length of the model: "
160-
"%s. Assuming the model's maximum length is %d.", possible_keys,
161-
default_max_len)
160+
"%s. Assuming the model's maximum length is %d.",
161+
possible_keys,
162+
default_max_len,
163+
)
162164
derived_max_model_len = default_max_len
163165

164166
rope_scaling = getattr(hf_config, "rope_scaling", None)
@@ -168,8 +170,7 @@ def _get_and_verify_max_len(
168170
elif "rope_type" in rope_scaling:
169171
rope_type = rope_scaling["rope_type"]
170172
else:
171-
raise ValueError(
172-
"rope_scaling must have a 'type' or 'rope_type' key.")
173+
raise ValueError("rope_scaling must have a 'type' or 'rope_type' key.")
173174

174175
# The correct one should be "longrope", kept "su" here
175176
# to be backward compatible
@@ -180,13 +181,13 @@ def _get_and_verify_max_len(
180181
raise NotImplementedError(
181182
"Disabling sliding window is not supported for models "
182183
"with rope_scaling. Please raise an issue so we can "
183-
"investigate.")
184+
"investigate."
185+
)
184186

185187
assert "factor" in rope_scaling
186188
scaling_factor = rope_scaling["factor"]
187189
if rope_type == "yarn":
188-
derived_max_model_len = rope_scaling[
189-
"original_max_position_embeddings"]
190+
derived_max_model_len = rope_scaling["original_max_position_embeddings"]
190191
derived_max_model_len *= scaling_factor
191192

192193
# If the user specified a max length, make sure it is smaller than the
@@ -205,7 +206,8 @@ def _get_and_verify_max_len(
205206
raise NotImplementedError(
206207
"Disabling sliding window is not supported for models "
207208
"model_max_length in the config. Please raise an issue "
208-
"so we can investigate.")
209+
"so we can investigate."
210+
)
209211
pass
210212
else:
211213
raise ValueError(
@@ -214,5 +216,6 @@ def _get_and_verify_max_len(
214216
f"({max_len_key}={derived_max_model_len} or model_max_length="
215217
f"{model_max_length} in model's config.json). This may lead "
216218
"to incorrect model outputs or CUDA errors. Make sure the "
217-
"value is correct and within the model context size.")
219+
"value is correct and within the model context size."
220+
)
218221
return int(max_model_len)

0 commit comments

Comments
 (0)