Skip to content

Commit 347ce41

Browse files
authored
Llm (#7)
* tutorial for llm usage
1 parent f56434e commit 347ce41

File tree

23 files changed

+757
-1
lines changed

23 files changed

+757
-1
lines changed

notebooks/toc.edn

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,11 @@
6161
:cmd "clj render.clj"
6262
:tags [:nlp :ml]}
6363

64+
{:created "2024-12-12"
65+
:updated "2024-12-12"
66+
:title "Using LLMs from Clojure"
67+
:url "projects/ml/llm/index.html"
68+
:source-path "projects/ml/llm"
69+
:cmd "clj render.c"
70+
:tags [:nlp :ml]}
6471
]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ARG BASE_IMAGE=temurin-21-tools-deps-jammy
2+
FROM clojure:${BASE_IMAGE}
3+
4+
ARG USERNAME=vscode
5+
ARG USER_UID=1000
6+
ARG USER_GID=$USER_UID
7+
8+
# Create the user
9+
RUN groupadd --gid $USER_GID $USERNAME \
10+
&& useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
11+
#
12+
# [Optional] Add sudo support. Omit if you don't need to install software after connecting.
13+
&& apt-get update \
14+
&& apt-get install -y sudo \
15+
&& echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
16+
&& chmod 0440 /etc/sudoers.d/$USERNAME
17+
18+
19+
# [Optional] Set the default user. Omit if you want to keep the default as root.
20+
USER $USERNAME
21+
SHELL ["/bin/bash", "-ec"]
22+
ENTRYPOINT ["bash"]
23+
24+
25+
# Prepare clojure tools
26+
RUN clojure -Ttools list && \
27+
clojure -Ttools install io.github.seancorfield/clj-new '{:git/tag "v1.2.404" :git/sha "d4a6508"}' :as clj-new && \
28+
clojure -Ttools install-latest :lib io.github.seancorfield/deps-new :as new && \
29+
clojure -Ttools list
30+
31+
RUN sudo apt-get update && \
32+
sudo apt-get install -y lsb-release maven apt-get install libxrender1 libxtst6 libxi6
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
2+
// README at: https://github.com/scicloj/devcontainer-templates/tree/main/src/basecloj
3+
{
4+
"name": "Base clojure dev env",
5+
"build": {
6+
"dockerfile": "Dockerfile",
7+
"args": {
8+
"BASE_IMAGE": "temurin-21-tools-deps-jammy"
9+
}
10+
},
11+
"features": {
12+
"ghcr.io/devcontainers/features/git:1": {},
13+
"ghcr.io/rocker-org/devcontainer-features/quarto-cli:1": {}
14+
15+
},
16+
"customizations": {
17+
"vscode": {
18+
"extensions": [
19+
"betterthantomorrow.calva",
20+
"vscjava.vscode-java-pack"
21+
]
22+
}
23+
}
24+
}

projects/ml/llm/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
target
2+
secrets.edn
3+
opinrank+review+dataset.zip
4+
open_ai_secret.txt
202 KB
Binary file not shown.

projects/ml/llm/build.clj

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
(ns build
2+
(:require [clojure.tools.build.api :as b]))
3+
4+
(def lib 'my/lib1)
5+
(def version (format "1.2.%s" (b/git-count-revs nil)))
6+
(def class-dir "target/classes")
7+
(def jar-file (format "target/%s-%s.jar" (name lib) version))
8+
9+
;; delay to defer side effects (artifact downloads)
10+
(def basis (delay (b/create-basis {:project "deps.edn"})))
11+
12+
(defn clean [_]
13+
(b/delete {:path "target"}))
14+
15+
(defn compile [_]
16+
(b/javac {:src-dirs ["java"]
17+
:class-dir class-dir
18+
:basis @basis
19+
:javac-opts ["--release" "11"]}))
20+
21+
(defn jar [_]
22+
(compile nil)
23+
(b/write-pom {:class-dir class-dir
24+
:lib lib
25+
:version version
26+
:basis @basis
27+
:src-dirs ["src"]})
28+
(b/copy-dir {:src-dirs ["src" "resources"]
29+
:target-dir class-dir})
30+
(b/jar {:class-dir class-dir
31+
:jar-file jar-file}))

projects/ml/llm/clay.edn

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{:remote-repo {:git-url "https://github.com/scicloj/clojure-data-tutorials"
2+
:branch "main"}
3+
:base-target-path "temp"}

projects/ml/llm/config.edn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{:openai {:temperature 0 :model :gpt-4}}

projects/ml/llm/deps.edn

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{:deps {org.clojure/clojure {:mvn/version "1.12.0"}
2+
org.scicloj/noj {:mvn/version "2-beta2"}
3+
;clj-http/clj-http {:mvn/version "3.13.0"}
4+
cheshire/cheshire {:mvn/version "5.13.0"}
5+
io.github.zmedelis/bosquet {:mvn/version "2024.08.08"}
6+
org.apache.opennlp/opennlp-tools {:mvn/version "1.9.4"}
7+
dev.langchain4j/langchain4j-document-parser-apache-pdfbox {:mvn/version "0.36.2"}
8+
dev.langchain4j/langchain4j {:mvn/version "0.36.2"}
9+
dev.langchain4j/langchain4j-open-ai {:mvn/version "0.36.2"}
10+
dev.langchain4j/langchain4j-embeddings-all-minilm-l6-v2 {:mvn/version "0.36.2"}
11+
virgil/virgil {:mvn/version "0.3.1"}
12+
13+
}
14+
:paths ["src" "notebooks" "target/classes"]
15+
:aliases
16+
{:build {:deps {io.github.clojure/tools.build {:mvn/version "0.10.6"}}
17+
18+
:ns-default build}
19+
}
20+
}

projects/ml/llm/notebooks/llms.clj

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
(ns llms
2+
(:require
3+
[org.httpkit.client :as hk-client]
4+
[cheshire.core :as json]))
5+
6+
;; # using Large Language Models from Clojure
7+
;; LLMs often come as APIs, as they require computing power (GPUs), which most users do not have
8+
;; localy.
9+
;; OpenAI offers their models behind an (paid) API for example. In the following we will see three
10+
;;diferent ways to use the GPT-4 model from OpenAI
11+
12+
;; get the openai API key either from environemnt or a specific file
13+
(def open-ai-key
14+
(or (System/getenv "OPEN_AI_KEY")
15+
(slurp "open_ai_secret.txt")
16+
)
17+
)
18+
19+
;## Use OpenAI API directly
20+
;; OpenAI offers a rather simple API, text-in text-out for "chatting" with GPT
21+
;;
22+
;; The following shows how to ask a simple question, and getting the answer using an http libray,
23+
;; [http-kit](https://github.com/http-kit/http-kit). The API is based on JSON, so easy to use
24+
;; from Clojure
25+
26+
27+
(->
28+
@(hk-client/post "https://api.openai.com/v1/chat/completions"
29+
{:headers
30+
{"content-type" "application/json"
31+
"authorization" (format "Bearer %s" open-ai-key)}
32+
:body
33+
(json/encode
34+
{:model "gpt-4"
35+
:messages [{:role "system",
36+
:content "You are a helpful assistant."},
37+
{:role "user",
38+
:content "What is Clojure ?"}]})})
39+
:body
40+
(json/decode keyword))
41+
42+
; ## use Bosquet
43+
; [bosquet](https://github.com/zmedelis/bosquet) abstracts some of the concepts of LLMs
44+
; on a higher level API. Its has further notions of "memory" and "tools"
45+
; and has feature we find for exampl in python "LangChain"
46+
47+
;; Bosque wants the API key in a config file
48+
(spit "secrets.edn"
49+
(pr-str
50+
{:openai {:api-key open-ai-key}}))
51+
52+
53+
(require '[bosquet.llm.generator :refer [generate llm]])
54+
55+
(generate
56+
[[:user "What is Clojure"]
57+
[:assistant (llm :openai
58+
:llm/model-params {:model :gpt-4
59+
})]])
60+
61+
62+
;# use langchain4j
63+
;; We can use LLMs as well via a Java Interop and teh library
64+
;; [lnagchain4j](https://github.com/langchain4j/langchain4j) which aims
65+
;; to be a copy of the pythin langcahin, and offers support or
66+
;; build blcoks for several consept arround LLMs (model, vecstorstores, document loaders)
67+
;; We see it used in te following chapters
68+
69+
(import '[dev.langchain4j.model.openai OpenAiChatModel OpenAiChatModelName])
70+
71+
;; For now just the simplest call to an GPT model, asking it the same question:
72+
(def open-ai-chat-model
73+
(.. (OpenAiChatModel/builder)
74+
(apiKey open-ai-key)
75+
(modelName OpenAiChatModelName/GPT_4)
76+
build))
77+
78+
79+
(.generate open-ai-chat-model "What is Clojure ?")

0 commit comments

Comments
 (0)