Skip to content

[Firebase AI] Add support for thought summaries #15096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 33 additions & 16 deletions FirebaseAI/Sources/Chat.swift
Original file line number Diff line number Diff line change
Expand Up @@ -149,29 +149,46 @@ public final class Chat: Sendable {
private func aggregatedChunks(_ chunks: [ModelContent]) -> ModelContent {
var parts: [any Part] = []
var combinedText = ""
for aggregate in chunks {
// Loop through all the parts, aggregating the text and adding the images.
for part in aggregate.parts {
switch part {
case let textPart as TextPart:
combinedText += textPart.text
var combinedThoughts = ""

func flush() {
if !combinedThoughts.isEmpty {
parts.append(TextPart(combinedThoughts))
combinedThoughts = ""
}
if !combinedText.isEmpty {
parts.append(TextPart(combinedText))
combinedText = ""
}
}

default:
// Don't combine it, just add to the content. If there's any text pending, add that as
// a part.
// Loop through all the parts, aggregating the text.
for part in chunks.flatMap({ $0.parts }) {
// Only text parts may be combined.
if let textPart = part as? TextPart, part.thoughtSignature == nil {
// Thought summaries must not be combined with regular text.
if textPart.isThought {
// If we were combining regular text, flush it before handling "thoughts".
if !combinedText.isEmpty {
parts.append(TextPart(combinedText))
combinedText = ""
flush()
}

parts.append(part)
combinedThoughts += textPart.text
} else {
// If we were combining "thoughts", flush it before handling regular text.
if !combinedThoughts.isEmpty {
flush()
}
combinedText += textPart.text
}
} else {
// This is a non-combinable part (not text), flush any pending text.
flush()
parts.append(part)
}
}

if !combinedText.isEmpty {
parts.append(TextPart(combinedText))
}
// Flush any remaining text.
flush()

return ModelContent(role: "model", parts: parts)
}
Expand Down
19 changes: 10 additions & 9 deletions FirebaseAI/Sources/GenerateContentResponse.swift
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,10 @@ public struct GenerateContentResponse: Sendable {
return nil
}
let textValues: [String] = candidate.content.parts.compactMap { part in
switch part {
case let textPart as TextPart:
return textPart.text
default:
guard let textPart = part as? TextPart, !part.isThought else {
return nil
}
return textPart.text
}
guard textValues.count > 0 else {
AILog.error(
Expand All @@ -89,12 +87,10 @@ public struct GenerateContentResponse: Sendable {
return []
}
return candidate.content.parts.compactMap { part in
switch part {
case let functionCallPart as FunctionCallPart:
return functionCallPart
default:
guard let functionCallPart = part as? FunctionCallPart, !part.isThought else {
return nil
}
return functionCallPart
}
}

Expand All @@ -107,7 +103,12 @@ public struct GenerateContentResponse: Sendable {
""")
return []
}
return candidate.content.parts.compactMap { $0 as? InlineDataPart }
return candidate.content.parts.compactMap { part in
guard let inlineDataPart = part as? InlineDataPart, !part.isThought else {
return nil
}
return inlineDataPart
}
}

/// Initializer for SwiftUI previews or tests.
Expand Down
127 changes: 96 additions & 31 deletions FirebaseAI/Sources/ModelContent.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,34 @@ extension [ModelContent] {
}
}

/// A type describing data in media formats interpretable by an AI model. Each generative AI
/// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value
/// may comprise multiple heterogeneous ``Part``s.
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ModelContent: Equatable, Sendable {
enum InternalPart: Equatable, Sendable {
struct InternalPart: Equatable, Sendable {
enum OneOfData: Equatable, Sendable {
case text(String)
case inlineData(mimetype: String, Data)
case fileData(mimetype: String, uri: String)
case inlineData(InlineData)
case fileData(FileData)
case functionCall(FunctionCall)
case functionResponse(FunctionResponse)
}

let data: OneOfData

let isThought: Bool?

let thoughtSignature: String?

init(_ data: OneOfData, isThought: Bool?, thoughtSignature: String?) {
self.data = data
self.isThought = isThought
self.thoughtSignature = thoughtSignature
}
}

/// A type describing data in media formats interpretable by an AI model. Each generative AI
/// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value
/// may comprise multiple heterogeneous ``Part``s.
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ModelContent: Equatable, Sendable {
/// The role of the entity creating the ``ModelContent``. For user-generated client requests,
/// for example, the role is `user`.
public let role: String?
Expand All @@ -52,17 +67,29 @@ public struct ModelContent: Equatable, Sendable {
public var parts: [any Part] {
var convertedParts = [any Part]()
for part in internalParts {
switch part {
switch part.data {
case let .text(text):
convertedParts.append(TextPart(text))
case let .inlineData(mimetype, data):
convertedParts.append(InlineDataPart(data: data, mimeType: mimetype))
case let .fileData(mimetype, uri):
convertedParts.append(FileDataPart(uri: uri, mimeType: mimetype))
convertedParts.append(
TextPart(text, isThought: part.isThought, thoughtSignature: part.thoughtSignature)
)
case let .inlineData(inlineData):
convertedParts.append(InlineDataPart(
inlineData, isThought: part.isThought, thoughtSignature: part.thoughtSignature
))
case let .fileData(fileData):
convertedParts.append(FileDataPart(
fileData,
isThought: part.isThought,
thoughtSignature: part.thoughtSignature
))
case let .functionCall(functionCall):
convertedParts.append(FunctionCallPart(functionCall))
convertedParts.append(FunctionCallPart(
functionCall, isThought: part.isThought, thoughtSignature: part.thoughtSignature
))
case let .functionResponse(functionResponse):
convertedParts.append(FunctionResponsePart(functionResponse))
convertedParts.append(FunctionResponsePart(
functionResponse, isThought: part.isThought, thoughtSignature: part.thoughtSignature
))
}
}
return convertedParts
Expand All @@ -78,17 +105,35 @@ public struct ModelContent: Equatable, Sendable {
for part in parts {
switch part {
case let textPart as TextPart:
convertedParts.append(.text(textPart.text))
convertedParts.append(InternalPart(
.text(textPart.text),
isThought: textPart._isThought,
thoughtSignature: textPart.thoughtSignature
))
case let inlineDataPart as InlineDataPart:
let inlineData = inlineDataPart.inlineData
convertedParts.append(.inlineData(mimetype: inlineData.mimeType, inlineData.data))
convertedParts.append(InternalPart(
.inlineData(inlineDataPart.inlineData),
isThought: inlineDataPart._isThought,
thoughtSignature: inlineDataPart.thoughtSignature
))
case let fileDataPart as FileDataPart:
let fileData = fileDataPart.fileData
convertedParts.append(.fileData(mimetype: fileData.mimeType, uri: fileData.fileURI))
convertedParts.append(InternalPart(
.fileData(fileDataPart.fileData),
isThought: fileDataPart._isThought,
thoughtSignature: fileDataPart.thoughtSignature
))
case let functionCallPart as FunctionCallPart:
convertedParts.append(.functionCall(functionCallPart.functionCall))
convertedParts.append(InternalPart(
.functionCall(functionCallPart.functionCall),
isThought: functionCallPart._isThought,
thoughtSignature: functionCallPart.thoughtSignature
))
case let functionResponsePart as FunctionResponsePart:
convertedParts.append(.functionResponse(functionResponsePart.functionResponse))
convertedParts.append(InternalPart(
.functionResponse(functionResponsePart.functionResponse),
isThought: functionResponsePart._isThought,
thoughtSignature: functionResponsePart.thoughtSignature
))
default:
fatalError()
}
Expand Down Expand Up @@ -121,7 +166,29 @@ extension ModelContent: Codable {
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ModelContent.InternalPart: Codable {
extension InternalPart: Codable {
enum CodingKeys: String, CodingKey {
case isThought = "thought"
case thoughtSignature
}

public func encode(to encoder: Encoder) throws {
try data.encode(to: encoder)
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encodeIfPresent(isThought, forKey: .isThought)
try container.encodeIfPresent(thoughtSignature, forKey: .thoughtSignature)
}

public init(from decoder: Decoder) throws {
data = try OneOfData(from: decoder)
let container = try decoder.container(keyedBy: CodingKeys.self)
isThought = try container.decodeIfPresent(Bool.self, forKey: .isThought)
thoughtSignature = try container.decodeIfPresent(String.self, forKey: .thoughtSignature)
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension InternalPart.OneOfData: Codable {
enum CodingKeys: String, CodingKey {
case text
case inlineData
Expand All @@ -135,10 +202,10 @@ extension ModelContent.InternalPart: Codable {
switch self {
case let .text(text):
try container.encode(text, forKey: .text)
case let .inlineData(mimetype, bytes):
try container.encode(InlineData(data: bytes, mimeType: mimetype), forKey: .inlineData)
case let .fileData(mimetype: mimetype, url):
try container.encode(FileData(fileURI: url, mimeType: mimetype), forKey: .fileData)
case let .inlineData(inlineData):
try container.encode(inlineData, forKey: .inlineData)
case let .fileData(fileData):
try container.encode(fileData, forKey: .fileData)
case let .functionCall(functionCall):
try container.encode(functionCall, forKey: .functionCall)
case let .functionResponse(functionResponse):
Expand All @@ -151,11 +218,9 @@ extension ModelContent.InternalPart: Codable {
if values.contains(.text) {
self = try .text(values.decode(String.self, forKey: .text))
} else if values.contains(.inlineData) {
let inlineData = try values.decode(InlineData.self, forKey: .inlineData)
self = .inlineData(mimetype: inlineData.mimeType, inlineData.data)
self = try .inlineData(values.decode(InlineData.self, forKey: .inlineData))
} else if values.contains(.fileData) {
let fileData = try values.decode(FileData.self, forKey: .fileData)
self = .fileData(mimetype: fileData.mimeType, uri: fileData.fileURI)
self = try .fileData(values.decode(FileData.self, forKey: .fileData))
} else if values.contains(.functionCall) {
self = try .functionCall(values.decode(FunctionCall.self, forKey: .functionCall))
} else if values.contains(.functionResponse) {
Expand Down
3 changes: 3 additions & 0 deletions FirebaseAI/Sources/Types/Internal/InternalPart.swift
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ struct FunctionResponse: Codable, Equatable, Sendable {
struct ErrorPart: Part, Error {
let error: Error

let isThought = false
let thoughtSignature: String? = nil

init(_ error: Error) {
self.error = error
}
Expand Down
Loading
Loading