Skip to content

20240324修改 #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions .idea/openai-translator.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion ai_translator/book/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def set_translation(self, translation, status):

LOG.debug(translation)
# Convert the string to a list of lists
table_data = [row.strip().split() for row in translation.strip().split('\n')]
table_data = [row.strip().split('|') for row in translation.strip().split('\n')]
LOG.debug(table_data)
# Create a DataFrame from the table_data
translated_df = pd.DataFrame(table_data[1:], columns=table_data[0])
Expand Down
36 changes: 23 additions & 13 deletions ai_translator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,35 @@
from utils import ArgumentParser, ConfigLoader, LOG
from model import GLMModel, OpenAIModel
from translator import PDFTranslator
from argparse import ArgumentParser

if __name__ == "__main__":
argument_parser = ArgumentParser()
args = argument_parser.parse_arguments()
argument_parser.add_argument("--config", help="Path to configuration file" ,default="/Users/linyan/Documents/worksp/openai-translator/config.yaml")
argument_parser.add_argument("--model_type", help="Type of model (e.g., OpenAIModel, GLMModel)",default="OpenAIModel")
argument_parser.add_argument("--openai_model", help="Name of OpenAI model")
argument_parser.add_argument("--openai_api_key", help="OpenAI API key")
argument_parser.add_argument("--timeout", help="Timeout for GLMModel")
argument_parser.add_argument("--model_url", help="URL for GLMModel")
argument_parser.add_argument("--book", help="Path to book file") # 添加 --book 参数
argument_parser.add_argument("--file_format", help="File format") # 添加 --file_format 参数
args = argument_parser.parse_args()
config_loader = ConfigLoader(args.config)

print("Config path:", config_loader)
config = config_loader.load_config()

if args.model_type == "OpenAIModel":
model_name = args.openai_model if args.openai_model else config['OpenAIModel']['model']
api_key = args.openai_api_key if args.openai_api_key else config['OpenAIModel']['api_key']
model = OpenAIModel(model=model_name, api_key=api_key)
elif args.model_type == "GLMModel":
timeout = args.timeout if args.timeout else config['GLMModel']['timeout']
model_url = args.model_url if args.model_url else config['GLMModel']['model_url']
model = GLMModel(model_url=model_url, timeout=timeout)
if hasattr(args, "model_type"):
if args.model_type == "OpenAIModel":
model_name = args.openai_model if args.openai_model else config['OpenAIModel']['model']
api_key = args.openai_api_key if args.openai_api_key else config['OpenAIModel']['api_key']
model = OpenAIModel(model=model_name, api_key=api_key)
elif args.model_type == "GLMModel":
timeout = args.timeout if args.timeout else config['GLMModel']['timeout']
model_url = args.model_url if args.model_url else config['GLMModel']['model_url']
model = GLMModel(model_url=model_url, timeout=timeout)
else:
raise ValueError("Invalid model_type specified. Please choose either 'GLMModel' or 'OpenAIModel'.")
else:
raise ValueError("Invalid model_type specified. Please choose either 'GLMModel' or 'OpenAIModel'.")

raise ValueError("Model type not provided. Please provide a model_type argument.")

pdf_file_path = args.book if args.book else config['common']['book']
file_format = args.file_format if args.file_format else config['common']['file_format']
Expand Down
4 changes: 2 additions & 2 deletions ai_translator/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

class Model:
def make_text_prompt(self, text: str, target_language: str) -> str:
return f"翻译为{target_language}:{text}"
return f"你是一个翻译专家,记住所有能翻译的文字都要翻译为{target_language}:{text}"

def make_table_prompt(self, table: str, target_language: str) -> str:
return f"翻译为{target_language},保持间距(空格,分隔符),以表格形式返回:\n{table}"
return f"你是一个翻译专家,记住所有能翻译的文字都要翻译为{target_language},保持间距(空格,分隔符),以表格形式返回:\n{table}"

def translate_prompt(self, content, target_language: str) -> str:
if content.content_type == ContentType.TEXT:
Expand Down
8 changes: 4 additions & 4 deletions ai_translator/model/openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ def make_request(self, prompt):
while attempts < 3:
try:
if self.model == "gpt-3.5-turbo":
response = openai.ChatCompletion.create(
response = openai.chat.completions.create(
model=self.model,
messages=[
{"role": "user", "content": prompt}
]
)
translation = response.choices[0].message['content'].strip()
translation = response.choices[0].message.content.strip()
else:
response = openai.Completion.create(
response = openai.chat.completions.create(
model=self.model,
prompt=prompt,
max_tokens=150,
Expand All @@ -33,7 +33,7 @@ def make_request(self, prompt):
translation = response.choices[0].text.strip()

return translation, True
except openai.error.RateLimitError:
except openai.RateLimitError:
attempts += 1
if attempts < 3:
LOG.warning("Rate limit reached. Waiting for 60 seconds before retrying.")
Expand Down
2 changes: 1 addition & 1 deletion ai_translator/translator/pdf_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, model: Model):
self.pdf_parser = PDFParser()
self.writer = Writer()

def translate_pdf(self, pdf_file_path: str, file_format: str = 'PDF', target_language: str = '中文', output_file_path: str = None, pages: Optional[int] = None):
def translate_pdf(self, pdf_file_path: str, file_format: str = 'PDF', target_language: str = '日语', output_file_path: str = None, pages: Optional[int] = None):
self.book = self.pdf_parser.parse_pdf(pdf_file_path, pages)

for page_idx, page in enumerate(self.book.pages):
Expand Down
1 change: 1 addition & 0 deletions ai_translator/utils/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ def __init__(self, config_path):
self.config_path = config_path

def load_config(self):
print("Config path:", self.config_path)
with open(self.config_path, "r") as f:
config = yaml.safe_load(f)
return config
7 changes: 2 additions & 5 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
OpenAIModel:
model: "gpt-3.5-turbo"
api_key: "your_openai_api_key"
api_key: "yourkey"

GLMModel:
model_url: "your_chatglm_model_url"
timeout: 300

common:
book: "test/test.pdf"
book: "/Users/linyan/Documents/worksp/openai-translator/test/test.pdf"
file_format: "markdown"