Skip to content

Commit

Permalink
feat: gradio demo integration (#16)
Browse files Browse the repository at this point in the history
Co-authored-by: Bo Liu <[email protected]>
Co-authored-by: Haoyu Lu <[email protected]>
  • Loading branch information
3 people authored Mar 13, 2024
1 parent 86a3096 commit 6014260
Show file tree
Hide file tree
Showing 24 changed files with 1,787 additions and 44 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ COPYRIGHT = "DeepSeek."
PROJECT_PATH = deepseek_vl
SHELL = /bin/bash
SOURCE_FOLDERS = deepseek_vl
PYTHON_FILES = $(shell find $(SOURCE_FOLDERS) -type f -name "*.py" -o -name "*.pyi")
PYTHON_FILES = $(shell find $(SOURCE_FOLDERS) -type f -name "*.py" -o -name "*.pyi") cli_chat.py inference.py
COMMIT_HASH = $(shell git log -1 --format=%h)
PATH := $(HOME)/go/bin:$(PATH)
PYTHON ?= $(shell command -v python3 || command -v python)
Expand Down Expand Up @@ -86,7 +86,7 @@ format: py-format-install ruff-install addlicense-install
$(PYTHON) -m isort --project $(PROJECT_PATH) $(PYTHON_FILES)
$(PYTHON) -m black $(PYTHON_FILES)
$(PYTHON) -m ruff check . --fix --exit-zero
addlicense -c $(COPYRIGHT) -ignore tests/coverage.xml -l mit -y 2023-$(shell date +"%Y") $(SOURCE_FOLDERS)
addlicense -c $(COPYRIGHT) -ignore tests/coverage.xml -l mit -y 2023-$(shell date +"%Y") $(SOURCE_FOLDERS) cli_chat.py inference.py

clean-py:
find . -type f -name '*.py[co]' -delete
Expand Down
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,17 @@ Introducing DeepSeek-VL, an open-source Vision-Language (VL) Model designed for

[DeepSeek-VL: Towards Real-World Vision-Language Understanding](https://arxiv.org/abs/2403.05525)

Haoyu Lu*, Wen Liu*, Bo Zhang**, Bingxuan Wang, Kai Dong, Bo Liu, Jingxiang Sun, Tongzheng Ren, Zhuoshu Li, Yaofeng Sun, Chengqi Deng, Hanwei Xu, Zhenda Xie, Chong Ruan (*Equal Contribution, **Project Lead)
Haoyu Lu*, Wen Liu*, Bo Zhang**, Bingxuan Wang, Kai Dong, Bo Liu, Jingxiang Sun, Tongzheng Ren, Zhuoshu Li, Hao Yang, Yaofeng Sun, Chengqi Deng, Hanwei Xu, Zhenda Xie, Chong Ruan (*Equal Contribution, **Project Lead)

![](https://github.com/deepseek-ai/DeepSeek-VL/blob/main/images/sample.jpg)

## 2. Release

<details>
<summary>✅ <b>2024-03-13</b>: Support DeepSeek-VL gradio demo.

</details>

<details>
<summary>✅ <b>2024-03-11</b>: DeepSeek-VL family released, including <code>DeepSeek-VL-7B-base</code>, <code>DeepSeek-VL-7B-chat</code>, <code>DeepSeek-VL-1.3B-base</code>, and <code>DeepSeek-VL-1.3B-chat</code>.</summary>
<br>The release includes a diverse set of models tailored for various applications within the DeepSeek-VL family. The models come in two sizes: 7B and 1.3B parameters, each offering base and chat variants to cater to different needs and integration scenarios.
Expand Down Expand Up @@ -170,6 +175,16 @@ python cli_chat.py --model_path "deepseek-ai/deepseek-vl-7b-chat"
python cli_chat.py --model_path "local model path"
```

### Gradio Demo
```bash
pip install -e .[gradio]

python deepseek_vl/serve/app_deepseek.py
```
![](./images/gradio_demo.png)

Have Fun!

## 5. License

This code repository is licensed under [the MIT License](https://github.com/deepseek-ai/DeepSeek-LLM/blob/HEAD/LICENSE-CODE). The use of DeepSeek-VL Base/Chat models is subject to [DeepSeek Model License](https://github.com/deepseek-ai/DeepSeek-LLM/blob/HEAD/LICENSE-MODEL). DeepSeek-VL series (including Base and Chat) supports commercial use.
Expand All @@ -179,7 +194,7 @@ This code repository is licensed under [the MIT License](https://github.com/deep
```
@misc{lu2024deepseekvl,
title={DeepSeek-VL: Towards Real-World Vision-Language Understanding},
author={Haoyu Lu and Wen Liu and Bo Zhang and Bingxuan Wang and Kai Dong and Bo Liu and Jingxiang Sun and Tongzheng Ren and Zhuoshu Li and Yaofeng Sun and Chengqi Deng and Hanwei Xu and Zhenda Xie and Chong Ruan},
author={Haoyu Lu and Wen Liu and Bo Zhang and Bingxuan Wang and Kai Dong and Bo Liu and Jingxiang Sun and Tongzheng Ren and Zhuoshu Li and Hao Yang and Yaofeng Sun and Chengqi Deng and Hanwei Xu and Zhenda Xie and Chong Ruan},
year={2024},
eprint={2403.05525},
archivePrefix={arXiv},
Expand Down
79 changes: 55 additions & 24 deletions cli_chat.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
# Copyright (c) 2023-2024 DeepSeek.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# -*- coding: utf-8 -*-

import argparse
import os
import sys
from PIL import Image
from threading import Thread

import torch
from PIL import Image
from transformers import TextIteratorStreamer

from deepseek_vl.utils.io import load_pretrained_model
Expand Down Expand Up @@ -33,22 +53,19 @@ def get_help_message(image_token):


@torch.inference_mode()
def response(args, conv, pil_images, tokenizer, vl_chat_processor, vl_gpt, generation_config):

def response(
args, conv, pil_images, tokenizer, vl_chat_processor, vl_gpt, generation_config
):
prompt = conv.get_prompt()
prepare_inputs = vl_chat_processor.__call__(
prompt=prompt,
images=pil_images,
force_batchify=True
prompt=prompt, images=pil_images, force_batchify=True
).to(vl_gpt.device)

# run image encoder to get the image embeddings
inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)

streamer = TextIteratorStreamer(
tokenizer=tokenizer,
skip_prompt=True,
skip_special_tokens=True
tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True
)
generation_config["inputs_embeds"] = inputs_embeds
generation_config["attention_mask"] = prepare_inputs.attention_mask
Expand Down Expand Up @@ -79,17 +96,17 @@ def chat(args, tokenizer, vl_chat_processor, vl_gpt, generation_config):
help_msg = get_help_message(image_token)

while True:

print(help_msg)

pil_images = []
conv = vl_chat_processor.new_chat_template()
roles = conv.roles

while True:

# get user input
user_input = get_user_input(f"{roles[0]} [{image_token} indicates an image]: ")
user_input = get_user_input(
f"{roles[0]} [{image_token} indicates an image]: "
)

if user_input == "exit":
print("Chat program exited.")
Expand Down Expand Up @@ -135,11 +152,21 @@ def chat(args, tokenizer, vl_chat_processor, vl_gpt, generation_config):
sys.exit(0)

else:
print(f"File error, `{image_file}` does not exist. Please input the correct file path.")
print(
f"File error, `{image_file}` does not exist. Please input the correct file path."
)

# get the answer by the model's prediction
answer = ""
answer_iter = response(args, conv, pil_images, tokenizer, vl_chat_processor, vl_gpt, generation_config)
answer_iter = response(
args,
conv,
pil_images,
tokenizer,
vl_chat_processor,
vl_gpt,
generation_config,
)
sys.stdout.write(f"{conv.roles[1]}: ")
for char in answer_iter:
answer += char
Expand All @@ -153,7 +180,6 @@ def chat(args, tokenizer, vl_chat_processor, vl_gpt, generation_config):


def main(args):

# setup
tokenizer, vl_chat_processor, vl_gpt = load_pretrained_model(args.model_path)
generation_config = dict(
Expand All @@ -164,12 +190,14 @@ def main(args):
use_cache=True,
)
if args.temperature > 0:
generation_config.update({
"do_sample": True,
"top_p": args.top_p,
"temperature": args.temperature,
"repetition_penalty": args.repetition_penalty,
})
generation_config.update(
{
"do_sample": True,
"top_p": args.top_p,
"temperature": args.temperature,
"repetition_penalty": args.repetition_penalty,
}
)
else:
generation_config.update({"do_sample": False})

Expand All @@ -178,12 +206,15 @@ def main(args):

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, default="deepseek-ai/deepseek-vl-7b-chat",
help="the huggingface model name or the local path of the downloaded huggingface model.")
parser.add_argument(
"--model_path",
type=str,
default="deepseek-ai/deepseek-vl-7b-chat",
help="the huggingface model name or the local path of the downloaded huggingface model.",
)
parser.add_argument("--temperature", type=float, default=0.2)
parser.add_argument("--top_p", type=float, default=0.95)
parser.add_argument("--repetition_penalty", type=float, default=1.1)
parser.add_argument("--max_gen_len", type=int, default=512)
args = parser.parse_args()
main(args)

Loading

0 comments on commit 6014260

Please sign in to comment.