Skip to content

Commit

Permalink
Merge pull request #15 from nalbam/main
Browse files Browse the repository at this point in the history
chore: Update KNOWLEDGE_BASE
  • Loading branch information
nalbam authored Aug 19, 2024
2 parents c8df3a3 + f3d0405 commit cd2fc8e
Show file tree
Hide file tree
Showing 13 changed files with 652 additions and 319 deletions.
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ SLACK_SIGNING_SECRET="xxxx"

DYNAMODB_TABLE_NAME="gurumi-ai-bot-context"

KNOWLEDGE_BASE_ID="None"

MODEL_ID_TEXT="anthropic.claude-3-5-sonnet-20240620-v1:0"
MODEL_ID_IMAGE="stability.stable-diffusion-xl-v1"

Expand Down
48 changes: 48 additions & 0 deletions .github/workflows/4-sync-notion.yml.stop
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: sync-notion

on:
# push:
# branches:
# - main

schedule:
- cron: "0 20 * * 0-4"

env:
NOTION_PAGE_NAME: "notion"
NOTION_PAGE_ID: "0c7c08203a9b4435a4ca07b6454151d7"

AWS_DEST_PATH: ${{ vars.AWS_DEST_PATH }}

jobs:
sync:
runs-on: ubuntu-latest

steps:
- name: Checkout 🛎️
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Python 3.9 🐍
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Setup Dependencies
run: pip install python-notion-exporter

- name: Run Notion Exporter
env:
NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }}
NOTION_FILE_TOKEN: ${{ secrets.NOTION_FILE_TOKEN }}
run: |
python bin/notion_exporter.py

- name: Sync to AWS S3 Data Source
run: |
aws s3 sync build/ ${{ env.AWS_DEST_PATH }} --delete
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "us-east-1"
38 changes: 38 additions & 0 deletions .github/workflows/6-start-ingestion.yml.stop
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: start-ingestion-job

on:
# push:
# branches:
# - main

schedule:
- cron: "0 21 * * 0-4"

env:
KNOWLEDGE_BASE_ID: ${{ vars.KNOWLEDGE_BASE_ID }}
DATA_SOURCE_ID: ${{ vars.DATA_SOURCE_ID }}

jobs:
ingestion:
runs-on: ubuntu-latest

steps:
- name: Checkout 🛎️
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Python 3.9 🐍
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Sync to AWS Bedrock Knowledge Base
run: |
aws bedrock-agent start-ingestion-job \
--knowledge-base-id ${{ env.KNOWLEDGE_BASE_ID }} \
--data-source-id ${{ env.DATA_SOURCE_ID }}
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: "us-east-1"
6 changes: 2 additions & 4 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ env:
ALLOWED_CHANNEL_IDS: ${{ vars.ALLOWED_CHANNEL_IDS }}
BOT_CURSOR: ${{ vars.BOT_CURSOR }}
DYNAMODB_TABLE_NAME: ${{ vars.DYNAMODB_TABLE_NAME }}
ENABLE_IMAGE: ${{ vars.ENABLE_IMAGE }}
KB_ID: ${{ vars.KB_ID }}
KNOWLEDGE_BASE_ID: ${{ vars.KNOWLEDGE_BASE_ID }}
MODEL_ID_IMAGE: ${{ vars.MODEL_ID_IMAGE }}
MODEL_ID_TEXT: ${{ vars.MODEL_ID_TEXT }}
SYSTEM_MESSAGE: ${{ vars.SYSTEM_MESSAGE }}
Expand Down Expand Up @@ -52,8 +51,7 @@ jobs:
echo "ALLOWED_CHANNEL_IDS=${ALLOWED_CHANNEL_IDS}" >> .env
echo "BOT_CURSOR=${BOT_CURSOR}" >> .env
echo "DYNAMODB_TABLE_NAME=${DYNAMODB_TABLE_NAME}" >> .env
echo "ENABLE_IMAGE=${ENABLE_IMAGE}" >> .env
echo "KB_ID=${KB_ID}" >> .env
echo "KNOWLEDGE_BASE_ID=${KNOWLEDGE_BASE_ID}" >> .env
echo "MODEL_ID_IMAGE=${MODEL_ID_IMAGE}" >> .env
echo "MODEL_ID_TEXT=${MODEL_ID_TEXT}" >> .env
echo "SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN}" >> .env
Expand Down
69 changes: 40 additions & 29 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,33 +1,44 @@
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# NodeJS dependencies
node_modules/

# Serverless runtime cache
.serverless_sdk

# Serverless directories
.serverless

# Local configuration
.env
# Windows image file caches
Thumbs.db
ehthumbs.db

# Folder config file
Desktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows shortcuts
*.lnk

# Mac
.DS_Store

# JetBrains
.idea/
*.iml

# Eclipse
.settings/
.metadata/

# Build
target/
build/
dist/

# Temp
*.pid
*.log
*.tmp

# python
venv
*.pyc
staticfiles
.env
db.sqlite3
__pycache__

# node
node_modules
5 changes: 5 additions & 0 deletions bin/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
NOTION_TOKEN = "v02%3Auser_token_or_cookies"
NOTION_FILE_TOKEN = "v02%3Afile_token"

NOTION_PAGE_ID = "0c7c08203a9b4435a4ca07b6454151d7"
NOTION_PAGE_NAME = "notion"
19 changes: 19 additions & 0 deletions bin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# bin

## Install

```bash
$ brew install [email protected]

$ python -m pip install --upgrade -r requirements.txt
```

## Test

```bash
python notion_exporter.py
```

## References

* <https://github.com/Strvm/python-notion-exporter>
Empty file added bin/__init__.py
Empty file.
26 changes: 26 additions & 0 deletions bin/notion_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os

from python_notion_exporter import NotionExporter, ExportType, ViewExportType


NOTION_TOKEN = os.getenv("NOTION_TOKEN")
NOTION_FILE_TOKEN = os.getenv("NOTION_FILE_TOKEN")

NOTION_PAGE_ID = os.getenv("NOTION_PAGE_ID", "0c7c08203a9b4435a4ca07b6454151d7")
NOTION_PAGE_NAME = os.getenv("NOTION_PAGE_NAME", "demo")


if __name__ == "__main__":
exporter = NotionExporter(
token_v2=NOTION_TOKEN,
file_token=NOTION_FILE_TOKEN,
pages={NOTION_PAGE_NAME: NOTION_PAGE_ID},
export_directory="build",
flatten_export_file_tree=True,
export_type=ExportType.MARKDOWN,
current_view_export_type=ViewExportType.CURRENT_VIEW,
include_files=False,
recursive=True,
export_name=NOTION_PAGE_NAME,
)
exporter.process()
Loading

0 comments on commit cd2fc8e

Please sign in to comment.