Skip to content

Commit

Permalink
- feature: added youtube transcript tool (#34)
Browse files Browse the repository at this point in the history
* - feature: added youtube transcript tool

* - fix: removed unused dependency

* - cicd: version bump
  • Loading branch information
agallardol authored Sep 4, 2024
1 parent 6ea96d2 commit d32ce05
Show file tree
Hide file tree
Showing 12 changed files with 538 additions and 127 deletions.
6 changes: 6 additions & 0 deletions apps/shinkai-tool-youtube-transcript/jest.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/* eslint-disable */
export default {
displayName: '@shinkai_protocol/shinkai-tool-youtube-transcript',
preset: '../../jest.preset.js',
coverageDirectory: '../../coverage/apps/shinkai-tool-youtube-transcript',
};
4 changes: 4 additions & 0 deletions apps/shinkai-tool-youtube-transcript/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"name": "@shinkai_protocol/shinkai-tool-youtube-transcript",
"type": "commonjs"
}
30 changes: 30 additions & 0 deletions apps/shinkai-tool-youtube-transcript/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"name": "@shinkai_protocol/shinkai-tool-youtube-transcript",
"$schema": "../../node_modules/nx/schemas/project-schema.json",
"sourceRoot": "apps/shinkai-tool-youtube-transcript/src",
"projectType": "library",
"tags": ["tool"],
"targets": {
"build": {
"executor": "nx:run-commands",
"defaultConfiguration": "production",
"options": {
"command": "npx ts-node scripts/tool-bundler.ts --entry ./apps/shinkai-tool-youtube-transcript/src/index.ts --outputFolder ./dist/apps/shinkai-tool-youtube-transcript"
},
"configurations": {
"development": {},
"production": {}
}
},
"lint": {
"executor": "@nx/linter:eslint",
"outputs": ["{options.outputFile}"],
"options": {
"lintFilePatterns": [
"apps/shinkai-tool-youtube-transcript/**/*.ts",
"apps/shinkai-tool-youtube-transcript/package.json"
]
}
}
}
}
36 changes: 36 additions & 0 deletions apps/shinkai-tool-youtube-transcript/src/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { Tool } from '../src/index';

test('exists definition', async () => {
const tool = new Tool({});
const definition = tool.getDefinition();
expect(definition).toBeInstanceOf(Object);
});

test('transcript video', async () => {
const tool = new Tool({});
const result = await tool.run({
// Video about Shinkai Sheets
url: 'https://youtu.be/RxxuM4wbVQc',
model: 'llama3.1:8b-instruct-q4_1'
});
expect(result.data.transcript).toBeInstanceOf(Array);
expect(result.data.transcript.length).toBeGreaterThan(0);
expect(result.data.message.length).toBeGreaterThan(0);
console.log(result.data.message);
}, 30000);


// test('transcript video using openai', async () => {
// const tool = new Tool({});
// const result = await tool.run({
// // Video about Shinkai Sheets
// url: 'https://youtu.be/RxxuM4wbVQc',
// apiUrl: 'https://api.openai.com/v1',
// apiKey: '',
// model: 'gpt-4o-mini'
// });
// expect(result.data.transcript).toBeInstanceOf(Array);
// expect(result.data.transcript.length).toBeGreaterThan(0);
// expect(result.data.message.length).toBeGreaterThan(0);
// console.log(result.data.message);
// }, 30000);
123 changes: 123 additions & 0 deletions apps/shinkai-tool-youtube-transcript/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import { BaseTool, RunResult } from '@shinkai_protocol/shinkai-tools-builder';
import { ToolDefinition } from 'libs/shinkai-tools-builder/src/tool-definition';
import { TranscriptResponse, YoutubeTranscript } from 'youtube-transcript';
import OpenAI from 'openai';

type Config = {};
type Params = {
url: string;
apiUrl?: string;
apiKey?: string;
model: string;
};
type Result = { transcript: TranscriptResponse[]; message: string };

export class Tool extends BaseTool<Config, Params, Result> {
definition: ToolDefinition<Config, Params, Result> = {
id: 'shinkai-tool-youtube-transcript',
name: 'Shinkai: YouTube Transcript',
description: 'Retrieve the transcript of a YouTube video',
author: 'Shinkai',
keywords: ['youtube', 'transcript', 'video', 'captions', 'subtitles'],
configurations: {
type: 'object',
properties: {},
required: [],
},
parameters: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL of the YouTube video to transcribe',
},
apiUrl: {
type: 'string',
description: 'The OpenAI api compatible URL',
nullable: true,
},
apiKey: {
type: 'string',
description: 'Api Key to call OpenAI compatible endpoint',
nullable: true,
},
model: {
type: 'string',
description: 'The model to use for generating the summary',
},
},
required: ['url'],
},
result: {
type: 'object',
properties: {
transcript: {
type: 'array',
items: {
type: 'object',
properties: {
text: { type: 'string' },
duration: { type: 'number' },
offset: { type: 'number' },
lang: { type: 'string', nullable: true },
},
required: ['text', 'duration', 'offset'],
},
},
message: { type: 'string' },
},
required: ['transcript'],
},
};

async run(params: Params): Promise<RunResult<Result>> {
console.log(`transcripting ${params.url}`);

// Get transcription
const transcript = await YoutubeTranscript.fetchTranscript(params.url);

// Send to ollama to build a formatted response
const message: OpenAI.ChatCompletionUserMessageParam = {
role: 'user',
content: `
According to this transcription of a youtube video (which is in csv separated by ';'):
offset;text
${transcript.map((v) => `${Math.floor(v.offset)};${v.text}`).join('\n')}
---------------
The video URL is ${params.url}
---------------
Write a detailed summary divided in sections along the video.
Format the answer using markdown.
Add markdown links referencing every section using this format https://www.youtube.com/watch?v={video_id}&t={offset} where 'offset' is a number and can be obtained from the transcription in csv format to generate the URL
`,
};

let url = params.apiUrl || 'http://127.0.0.1:11435';
url = url?.endsWith('/v1') ? url : `${url}/v1`;
console.log('url', url);
const client = new OpenAI({
baseURL: url,
apiKey: params.apiKey || '',
});
try {
const response = await client.chat.completions.create({
model: params.model,
messages: [message],
stream: false,
});
return Promise.resolve({
data: {
transcript,
message: response.choices[0]?.message?.content || '',
},
});
} catch (error) {
console.error('Error calling Ollama API:', error);
throw error;
}
}
}
4 changes: 4 additions & 0 deletions apps/shinkai-tool-youtube-transcript/tsconfig.app.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"extends": "./tsconfig.json",
"include": ["./src/**/*.ts"]
}
10 changes: 10 additions & 0 deletions apps/shinkai-tool-youtube-transcript/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {

},
"include": [
"./src/**/*.ts",
"webpack.config.ts"
],
}
14 changes: 14 additions & 0 deletions apps/shinkai-tool-youtube-transcript/tsconfig.spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"extends": "./tsconfig.json",
"compilerOptions": {
"outDir": "../../dist/out-tsc",
"module": "commonjs",
"types": ["jest", "node"]
},
"include": [
"jest.config.ts",
"src/**/*.test.ts",
"src/**/*.spec.ts",
"src/**/*.d.ts"
]
}
Loading

0 comments on commit d32ce05

Please sign in to comment.