Skip to content

Commit 406b905

Browse files
sabrennerszegedi
authored andcommitted
chore: add some shared llm-type plugin utilities (#5109)
* wip utilities * fixes * add tests * update comment * update * add math.random stubs to test hooks * fix
1 parent 246b7f7 commit 406b905

File tree

7 files changed

+143
-72
lines changed

7 files changed

+143
-72
lines changed
Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
'use strict'
22

3-
const Sampler = require('../../../dd-trace/src/sampler')
3+
const makeUtilities = require('../../../dd-trace/src/plugins/util/llm')
44

5-
const RE_NEWLINE = /\n/g
6-
const RE_TAB = /\t/g
7-
8-
// TODO: should probably refactor the OpenAI integration to use a shared LLMTracingPlugin base class
9-
// This logic isn't particular to LangChain
105
class LangChainHandler {
11-
constructor (config) {
12-
this.config = config
13-
this.sampler = new Sampler(config.spanPromptCompletionSampleRate)
6+
constructor (tracerConfig) {
7+
const utilities = makeUtilities('langchain', tracerConfig)
8+
9+
this.normalize = utilities.normalize
10+
this.isPromptCompletionSampled = utilities.isPromptCompletionSampled
1411
}
1512

1613
// no-op for default handler
@@ -27,27 +24,6 @@ class LangChainHandler {
2724

2825
// no-op for default handler
2926
extractModel (instance) {}
30-
31-
normalize (text) {
32-
if (!text) return
33-
if (typeof text !== 'string' || !text || (typeof text === 'string' && text.length === 0)) return
34-
35-
const max = this.config.spanCharLimit
36-
37-
text = text
38-
.replace(RE_NEWLINE, '\\n')
39-
.replace(RE_TAB, '\\t')
40-
41-
if (text.length > max) {
42-
return text.substring(0, max) + '...'
43-
}
44-
45-
return text
46-
}
47-
48-
isPromptCompletionSampled () {
49-
return this.sampler.isSampled()
50-
}
5127
}
5228

5329
module.exports = LangChainHandler

packages/datadog-plugin-langchain/src/tracing.js

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,12 @@ class LangChainTracingPlugin extends TracingPlugin {
2626
constructor () {
2727
super(...arguments)
2828

29-
const langchainConfig = this._tracerConfig.langchain || {}
3029
this.handlers = {
31-
chain: new LangChainChainHandler(langchainConfig),
32-
chat_model: new LangChainChatModelHandler(langchainConfig),
33-
llm: new LangChainLLMHandler(langchainConfig),
34-
embedding: new LangChainEmbeddingHandler(langchainConfig),
35-
default: new LangChainHandler(langchainConfig)
30+
chain: new LangChainChainHandler(this._tracerConfig),
31+
chat_model: new LangChainChatModelHandler(this._tracerConfig),
32+
llm: new LangChainLLMHandler(this._tracerConfig),
33+
embedding: new LangChainEmbeddingHandler(this._tracerConfig),
34+
default: new LangChainHandler(this._tracerConfig)
3635
}
3736
}
3837

packages/datadog-plugin-openai/src/tracing.js

Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,9 @@ const Sampler = require('../../dd-trace/src/sampler')
99
const { MEASURED } = require('../../../ext/tags')
1010
const { estimateTokens } = require('./token-estimator')
1111

12-
// String#replaceAll unavailable on Node.js@v14 (dd-trace@<=v3)
13-
const RE_NEWLINE = /\n/g
14-
const RE_TAB = /\t/g
12+
const makeUtilities = require('../../dd-trace/src/plugins/util/llm')
1513

16-
// TODO: In the future we should refactor config.js to make it requirable
17-
let MAX_TEXT_LEN = 128
14+
let normalize
1815

1916
function safeRequire (path) {
2017
try {
@@ -44,9 +41,11 @@ class OpenAiTracingPlugin extends TracingPlugin {
4441

4542
this.sampler = new Sampler(0.1) // default 10% log sampling
4643

47-
// hoist the max length env var to avoid making all of these functions a class method
44+
// hoist the normalize function to avoid making all of these functions a class method
4845
if (this._tracerConfig) {
49-
MAX_TEXT_LEN = this._tracerConfig.openaiSpanCharLimit
46+
const utilities = makeUtilities('openai', this._tracerConfig)
47+
48+
normalize = utilities.normalize
5049
}
5150
}
5251

@@ -116,7 +115,7 @@ class OpenAiTracingPlugin extends TracingPlugin {
116115
// createEdit, createEmbedding, createModeration
117116
if (payload.input) {
118117
const normalized = normalizeStringOrTokenArray(payload.input, false)
119-
tags['openai.request.input'] = truncateText(normalized)
118+
tags['openai.request.input'] = normalize(normalized)
120119
openaiStore.input = normalized
121120
}
122121

@@ -594,7 +593,7 @@ function commonImageResponseExtraction (tags, body) {
594593
for (let i = 0; i < body.data.length; i++) {
595594
const image = body.data[i]
596595
// exactly one of these two options is provided
597-
tags[`openai.response.images.${i}.url`] = truncateText(image.url)
596+
tags[`openai.response.images.${i}.url`] = normalize(image.url)
598597
tags[`openai.response.images.${i}.b64_json`] = image.b64_json && 'returned'
599598
}
600599
}
@@ -731,14 +730,14 @@ function commonCreateResponseExtraction (tags, body, openaiStore, methodName) {
731730

732731
tags[`openai.response.choices.${choiceIdx}.finish_reason`] = choice.finish_reason
733732
tags[`openai.response.choices.${choiceIdx}.logprobs`] = specifiesLogProb ? 'returned' : undefined
734-
tags[`openai.response.choices.${choiceIdx}.text`] = truncateText(choice.text)
733+
tags[`openai.response.choices.${choiceIdx}.text`] = normalize(choice.text)
735734

736735
// createChatCompletion only
737736
const message = choice.message || choice.delta // delta for streamed responses
738737
if (message) {
739738
tags[`openai.response.choices.${choiceIdx}.message.role`] = message.role
740-
tags[`openai.response.choices.${choiceIdx}.message.content`] = truncateText(message.content)
741-
tags[`openai.response.choices.${choiceIdx}.message.name`] = truncateText(message.name)
739+
tags[`openai.response.choices.${choiceIdx}.message.content`] = normalize(message.content)
740+
tags[`openai.response.choices.${choiceIdx}.message.name`] = normalize(message.name)
742741
if (message.tool_calls) {
743742
const toolCalls = message.tool_calls
744743
for (let toolIdx = 0; toolIdx < toolCalls.length; toolIdx++) {
@@ -795,24 +794,6 @@ function truncateApiKey (apiKey) {
795794
return apiKey && `sk-...${apiKey.substr(apiKey.length - 4)}`
796795
}
797796

798-
/**
799-
* for cleaning up prompt and response
800-
*/
801-
function truncateText (text) {
802-
if (!text) return
803-
if (typeof text !== 'string' || !text || (typeof text === 'string' && text.length === 0)) return
804-
805-
text = text
806-
.replace(RE_NEWLINE, '\\n')
807-
.replace(RE_TAB, '\\t')
808-
809-
if (text.length > MAX_TEXT_LEN) {
810-
return text.substring(0, MAX_TEXT_LEN) + '...'
811-
}
812-
813-
return text
814-
}
815-
816797
function tagChatCompletionRequestContent (contents, messageIdx, tags) {
817798
if (typeof contents === 'string') {
818799
tags[`openai.request.messages.${messageIdx}.content`] = contents
@@ -824,10 +805,10 @@ function tagChatCompletionRequestContent (contents, messageIdx, tags) {
824805
const type = content.type
825806
tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.type`] = content.type
826807
if (type === 'text') {
827-
tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.text`] = truncateText(content.text)
808+
tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.text`] = normalize(content.text)
828809
} else if (type === 'image_url') {
829810
tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.image_url.url`] =
830-
truncateText(content.image_url.url)
811+
normalize(content.image_url.url)
831812
}
832813
// unsupported type otherwise, won't be tagged
833814
}
@@ -1004,7 +985,7 @@ function normalizeStringOrTokenArray (input, truncate) {
1004985
const normalized = Array.isArray(input)
1005986
? `[${input.join(', ')}]` // "[1, 2, 999]"
1006987
: input // "foo"
1007-
return truncate ? truncateText(normalized) : normalized
988+
return truncate ? normalize(normalized) : normalized
1008989
}
1009990

1010991
function defensiveArrayLength (maybeArray) {

packages/dd-trace/src/config.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ class Config {
522522
this._setValue(defaults, 'inferredProxyServicesEnabled', false)
523523
this._setValue(defaults, 'memcachedCommandEnabled', false)
524524
this._setValue(defaults, 'openAiLogsEnabled', false)
525-
this._setValue(defaults, 'openaiSpanCharLimit', 128)
525+
this._setValue(defaults, 'openai.spanCharLimit', 128)
526526
this._setValue(defaults, 'peerServiceMapping', {})
527527
this._setValue(defaults, 'plugins', true)
528528
this._setValue(defaults, 'port', '8126')
@@ -805,7 +805,7 @@ class Config {
805805
// Requires an accompanying DD_APM_OBFUSCATION_MEMCACHED_KEEP_COMMAND=true in the agent
806806
this._setBoolean(env, 'memcachedCommandEnabled', DD_TRACE_MEMCACHED_COMMAND_ENABLED)
807807
this._setBoolean(env, 'openAiLogsEnabled', DD_OPENAI_LOGS_ENABLED)
808-
this._setValue(env, 'openaiSpanCharLimit', maybeInt(DD_OPENAI_SPAN_CHAR_LIMIT))
808+
this._setValue(env, 'openai.spanCharLimit', maybeInt(DD_OPENAI_SPAN_CHAR_LIMIT))
809809
this._envUnprocessed.openaiSpanCharLimit = DD_OPENAI_SPAN_CHAR_LIMIT
810810
if (DD_TRACE_PEER_SERVICE_MAPPING) {
811811
this._setValue(env, 'peerServiceMapping', fromEntries(
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
const Sampler = require('../../sampler')
2+
3+
const RE_NEWLINE = /\n/g
4+
const RE_TAB = /\t/g
5+
6+
function normalize (text, limit = 128) {
7+
if (!text) return
8+
if (typeof text !== 'string' || !text || (typeof text === 'string' && text.length === 0)) return
9+
10+
text = text
11+
.replace(RE_NEWLINE, '\\n')
12+
.replace(RE_TAB, '\\t')
13+
14+
if (text.length > limit) {
15+
return text.substring(0, limit) + '...'
16+
}
17+
18+
return text
19+
}
20+
21+
function isPromptCompletionSampled (sampler) {
22+
return sampler.isSampled()
23+
}
24+
25+
module.exports = function (integrationName, tracerConfig) {
26+
const integrationConfig = tracerConfig[integrationName] || {}
27+
const { spanCharLimit, spanPromptCompletionSampleRate } = integrationConfig
28+
29+
const sampler = new Sampler(spanPromptCompletionSampleRate ?? 1.0)
30+
31+
return {
32+
normalize: str => normalize(str, spanCharLimit),
33+
isPromptCompletionSampled: () => isPromptCompletionSampled(sampler)
34+
}
35+
}

packages/dd-trace/test/config.spec.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ describe('Config', () => {
351351
{ name: 'logInjection', value: false, origin: 'default' },
352352
{ name: 'lookup', value: undefined, origin: 'default' },
353353
{ name: 'openAiLogsEnabled', value: false, origin: 'default' },
354-
{ name: 'openaiSpanCharLimit', value: 128, origin: 'default' },
354+
{ name: 'openai.spanCharLimit', value: 128, origin: 'default' },
355355
{ name: 'peerServiceMapping', value: {}, origin: 'default' },
356356
{ name: 'plugins', value: true, origin: 'default' },
357357
{ name: 'port', value: '8126', origin: 'default' },
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
'use strict'
2+
3+
require('../../setup/tap')
4+
5+
const makeUtilities = require('../../../src/plugins/util/llm')
6+
7+
describe('llm utils', () => {
8+
let utils
9+
10+
describe('with default configuration', () => {
11+
beforeEach(() => {
12+
utils = makeUtilities('langchain', {})
13+
})
14+
15+
it('should normalize text to 128 characters', () => {
16+
const text = 'a'.repeat(256)
17+
expect(utils.normalize(text)).to.equal('a'.repeat(128) + '...')
18+
})
19+
20+
it('should return undefined for empty text', () => {
21+
expect(utils.normalize('')).to.be.undefined
22+
})
23+
24+
it('should return undefined for a non-string', () => {
25+
expect(utils.normalize(42)).to.be.undefined
26+
})
27+
28+
it('should replace special characters', () => {
29+
expect(utils.normalize('a\nb\tc')).to.equal('a\\nb\\tc')
30+
})
31+
32+
it('should always sample prompt completion', () => {
33+
expect(utils.isPromptCompletionSampled()).to.be.true
34+
})
35+
})
36+
37+
describe('with custom configuration available', () => {
38+
beforeEach(() => {
39+
utils = makeUtilities('langchain', {
40+
langchain: {
41+
spanCharLimit: 100,
42+
spanPromptCompletionSampleRate: 0.6
43+
}
44+
})
45+
})
46+
47+
it('should normalize text to 100 characters', () => {
48+
const text = 'a'.repeat(256)
49+
expect(utils.normalize(text)).to.equal('a'.repeat(100) + '...')
50+
})
51+
52+
describe('with a random value greater than 0.6', () => {
53+
beforeEach(() => {
54+
sinon.stub(Math, 'random').returns(0.7)
55+
})
56+
57+
afterEach(() => {
58+
Math.random.restore()
59+
})
60+
61+
it('should not sample prompt completion', () => {
62+
expect(utils.isPromptCompletionSampled()).to.be.false
63+
})
64+
})
65+
66+
describe('with a random value less than 0.6', () => {
67+
beforeEach(() => {
68+
sinon.stub(Math, 'random').returns(0.5)
69+
})
70+
71+
afterEach(() => {
72+
Math.random.restore()
73+
})
74+
75+
it('should sample prompt completion', () => {
76+
expect(utils.isPromptCompletionSampled()).to.be.true
77+
})
78+
})
79+
})
80+
})

0 commit comments

Comments
 (0)