forked from hanfangyuan4396/jina_sum
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjina_sum.py
130 lines (113 loc) · 5.59 KB
/
jina_sum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# encoding:utf-8
import json
import os
import html
from urllib.parse import urlparse
import requests
import plugins
from bridge.context import ContextType
from bridge.reply import Reply, ReplyType
from common.log import logger
from plugins import *
@plugins.register(
name="JinaSum",
desire_priority=10,
hidden=False,
desc="Sum url link content with jina reader and llm",
version="0.0.1",
author="hanfangyuan",
)
class JinaSum(Plugin):
jina_reader_base = "https://r.jina.ai"
open_ai_api_base = "https://api.openai.com/v1"
open_ai_model = "gpt-3.5-turbo"
max_words = 8000
prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n"
def __init__(self):
super().__init__()
try:
self.config = super().load_config()
if not self.config:
self.config = self._load_config_template()
self.jina_reader_base = self.config.get("jina_reader_base", self.jina_reader_base)
self.open_ai_api_base = self.config.get("open_ai_api_base", self.open_ai_api_base)
self.open_ai_api_key = self.config.get("open_ai_api_key", "")
self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model)
self.max_words = self.config.get("max_words", self.max_words)
self.prompt = self.config.get("prompt", self.prompt)
logger.info(f"[JinaSum] inited, config={self.config}")
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
except Exception as e:
logger.error(f"[JinaSum] 初始化异常:{e}")
raise "[JinaSum] init failed, ignore "
def on_handle_context(self, e_context: EventContext, retry_count: int = 0):
try:
context = e_context["context"]
content = context.content
if context.type != ContextType.SHARING and context.type != ContextType.TEXT:
return
if not self._check_url(content):
logger.debug(f"[JinaSum] {content} not a url, skip")
return
if retry_count == 0:
logger.debug("[JinaSum] on_handle_context. content: %s" % content)
reply = Reply(ReplyType.TEXT, "🎉正在为您生成总结,请稍候...")
channel = e_context["channel"]
channel.send(reply, context)
target_url = html.unescape(content) # 解决公众号卡片链接校验问题,参考 https://github.com/fatwang2/sum4all/commit/b983c49473fc55f13ba2c44e4d8b226db3517c45
jina_url = self._get_jina_url(target_url)
response = requests.get(jina_url, timeout=60)
response.raise_for_status()
target_url_content = response.text
openai_chat_url = self._get_openai_chat_url()
openai_headers = self._get_openai_headers()
openai_payload = self._get_openai_payload(target_url_content)
logger.debug(f"[JinaSum] openai_chat_url: {openai_chat_url}, openai_headers: {openai_headers}, openai_payload: {openai_payload}")
response = requests.post(openai_chat_url, headers=openai_headers, json=openai_payload, timeout=60)
response.raise_for_status()
result = response.json()['choices'][0]['message']['content']
reply = Reply(ReplyType.TEXT, result)
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS
except Exception as e:
if retry_count < 3:
logger.warning(f"[JinaSum] {str(e)}, retry {retry_count + 1}")
self.on_handle_context(e_context, retry_count + 1)
return
logger.exception(f"[JinaSum] {str(e)}")
reply = Reply(ReplyType.ERROR, "我暂时无法总结链接,请稍后再试")
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS
def get_help_text(self, verbose, **kwargs):
return f'使用jina reader和ChatGPT总结网页链接内容'
def _load_config_template(self):
logger.debug("No Suno plugin config.json, use plugins/jina_sum/config.json.template")
try:
plugin_config_path = os.path.join(self.path, "config.json.template")
if os.path.exists(plugin_config_path):
with open(plugin_config_path, "r", encoding="utf-8") as f:
plugin_conf = json.load(f)
return plugin_conf
except Exception as e:
logger.exception(e)
def _get_jina_url(self, target_url):
return self.jina_reader_base + "/" + target_url
def _get_openai_chat_url(self):
return self.open_ai_api_base + "/chat/completions"
def _get_openai_headers(self):
return {
'Authorization': f"Bearer {self.open_ai_api_key}",
'Host': urlparse(self.open_ai_api_base).netloc
}
def _get_openai_payload(self, target_url_content):
target_url_content = target_url_content[:self.max_words] # 通过字符串长度简单进行截断
sum_prompt = f"{self.prompt}\n\n'''{target_url_content}'''"
messages = [{"role": "user", "content": sum_prompt}]
payload = {
'model': self.open_ai_model,
'messages': messages
}
return payload
def _check_url(self, target_url: str):
# 简单校验是否是url
return target_url.strip().startswith("http://") or target_url.strip().startswith("https://")