Skip to content

Commit

Permalink
feat: 增加url阅读能力,通过 jina.ai 实现
Browse files Browse the repository at this point in the history
  • Loading branch information
liuhuapiaoyuan committed Oct 27, 2024
1 parent 00a4e33 commit 6c9f2bc
Show file tree
Hide file tree
Showing 16 changed files with 556 additions and 48 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ node_modules/
/playwright-report/
/blob-report/
/playwright/.cache/
.env
35 changes: 31 additions & 4 deletions backend/api/routes/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,27 @@
import json
from typing import Dict, Optional
from constants import SPEEKERS
from utils import combine_audio, generate_dialogue, generate_podcast_info, generate_podcast_summary, get_pdf_text
from utils import combine_audio, generate_dialogue, generate_podcast_info, generate_podcast_summary, get_link_text, get_pdf_text

router = APIRouter()

@router.post("/generate_transcript")
async def generate_transcript(
pdfFile: Optional[UploadFile] = File(None),
textInput: str = Form(...),
mode: str = Form(...),
url: Optional[str] = Form(None),
tone: str = Form(...),
duration: str = Form(...),
language: str = Form(...),

):
pdfContent = await get_pdf_text(pdfFile)
pdfContent =""
if mode=='pdf':
pdfContent = await get_pdf_text(pdfFile)
else:
linkData = get_link_text(url)
pdfContent = linkData['text']
new_text = pdfContent
return StreamingResponse(generate_dialogue(new_text,textInput, tone, duration, language), media_type="application/json")

Expand All @@ -31,16 +38,28 @@ def test():
def speeker():
return JSONResponse(content=SPEEKERS)

@router.get("/jina")
def jina():
result = get_link_text("https://ui.shadcn.com/docs/components/select")
return JSONResponse(content=result)


@router.post("/summarize")
async def get_summary(
textInput: str = Form(...),
tone: str = Form(...),
duration: str = Form(...),
language: str = Form(...),
mode: str = Form(...),
url: Optional[str] = Form(None),
pdfFile: Optional[UploadFile] = File(None)
):
pdfContent = await get_pdf_text(pdfFile)
pdfContent =""
if mode=='pdf':
pdfContent = await get_pdf_text(pdfFile)
else:
linkData = get_link_text(url)
pdfContent = linkData['text']
new_text = pdfContent
return StreamingResponse(
generate_podcast_summary(
Expand All @@ -59,9 +78,17 @@ async def get_pod_info(
tone: str = Form(...),
duration: str = Form(...),
language: str = Form(...),
mode: str = Form(...),
url: Optional[str] = Form(None),
pdfFile: Optional[UploadFile] = File(None)
):
pdfContent = await get_pdf_text(pdfFile)
pdfContent =""
if mode=='pdf':
pdfContent = await get_pdf_text(pdfFile)
else:
linkData = get_link_text(url)
pdfContent = linkData['text']

new_text = pdfContent[:100]

return StreamingResponse(generate_podcast_info(new_text, textInput, tone, duration, language), media_type="application/json")
Expand Down
2 changes: 1 addition & 1 deletion backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# 添加CORS中间件
app.add_middleware(
CORSMiddleware,
allow_origins=["https://ai.podcastlm.fun/"],
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
Expand Down
12 changes: 11 additions & 1 deletion backend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import re
import time
import hashlib

from typing import Any, Dict, Generator
import uuid
from openai import OpenAI
import requests
from fishaudio import fishaudio_tts
from prompts import LANGUAGE_MODIFIER, LENGTH_MODIFIERS, PODCAST_INFO_PROMPT, QUESTION_MODIFIER, SUMMARY_INFO_PROMPT, SYSTEM_PROMPT, TONE_MODIFIER
import json
Expand Down Expand Up @@ -254,6 +254,16 @@ def clear_pdf_cache():
global pdf_cache
pdf_cache.clear()

def get_link_text(url: str):
""" 通过jina.ai 抓取url内容 """
url = f"https://r.jina.ai/{url}"
headers = {}
headers['Authorization'] = 'Bearer jina_c1759c7f49e14ced990ac7776800dc44ShJNTXBCizzwjE7IMFYJ6LD960cG'
headers['Accept'] = 'application/json'
headers['X-Return-Format'] = 'text'
response = requests.get(url, headers=headers)
return response.json()['data']

async def get_pdf_text(pdf_file: UploadFile):
text = ""
print(pdf_file)
Expand Down
2 changes: 2 additions & 0 deletions frontend/.env.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
BASE_URL=
HOST_URL=
1 change: 1 addition & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"@radix-ui/react-select": "^2.1.2",
"@radix-ui/react-slot": "^1.1.0",
"@radix-ui/react-tabs": "^1.1.1",
"@radix-ui/react-toast": "^1.2.2",
"@radix-ui/react-toggle": "^1.1.0",
"@radix-ui/react-toggle-group": "^1.1.0",
"axios": "^1.7.7",
Expand Down
36 changes: 36 additions & 0 deletions frontend/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { useJsonData } from "./hooks/useJsonData";
import { useStreamText } from './hooks/useStreamText';
import { BASE_URL } from "./lib/constant";
import MobileMenu from "./components/mobile-menu";
import { Toaster } from "@/components/ui/toaster"

function App() {
const [isGenerating, setIsGenerating] = useState(false);
Expand Down Expand Up @@ -37,6 +38,7 @@ function App() {
}
return (
<div className="h-screen flex flex-col overflow-hidden">
<Toaster />
<main className="flex-grow flex bg-[rgb(245,245,245)] h-full">
<Menu className="hidden md:flex "
handleGenerate={handleGenerate}
Expand Down
Loading

0 comments on commit 6c9f2bc

Please sign in to comment.