Skip to content

Commit 19c5599

Browse files
Version 1.3.2
1 parent 59b4176 commit 19c5599

File tree

481 files changed

+134152
-60688
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

481 files changed

+134152
-60688
lines changed

abacusai/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
from .streaming_client import StreamingClient
55

66

7-
__version__ = "1.2.5"
7+
__version__ = "1.3.2"

abacusai/api_class/ai_agents.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class WorkflowNodeInputSchema(ApiClass):
2828
A react-jsonschema-form conformant schema for workflow node input.
2929
3030
Args:
31-
json_schema (dict): The json schema for the input conformant to react-jsonschema-form specification. Must define keys like "title", "type" and "properties".
31+
json_schema (dict): The json schema for the input conformant to react-jsonschema-form specification. Must define keys like "title", "type" and "properties". Supported elements - Checkbox, Radio Button, Dropdown, Textarea, Number, Date, File Upload. Not supported - Nested elements, arrays and other complex types.
3232
ui_schema (dict): The ui schema for the input conformant to react-jsonschema-form specification.
3333
"""
3434
json_schema: dict

abacusai/api_class/connectors.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import dataclasses
2+
3+
from . import enums
4+
from .abstract import _ApiClassFactory
5+
from .dataset import DatasetConfig
6+
7+
8+
@dataclasses.dataclass
9+
class StreamingConnectorDatasetConfig(DatasetConfig):
10+
"""
11+
An abstract class for dataset configs specific to streaming connectors.
12+
13+
Args:
14+
streaming_connector_type (StreamingConnectorType): The type of streaming connector
15+
"""
16+
streaming_connector_type: enums.StreamingConnectorType = dataclasses.field(default=None, repr=False, init=False)
17+
18+
@classmethod
19+
def _get_builder(cls):
20+
return _StreamingConnectorDatasetConfigFactory
21+
22+
23+
@dataclasses.dataclass
24+
class KafkaDatasetConfig(StreamingConnectorDatasetConfig):
25+
"""
26+
Dataset config for Kafka Streaming Connector
27+
28+
Args:
29+
topic (str): The kafka topic to consume
30+
"""
31+
topic: str
32+
33+
def __post_init__(self):
34+
self.streaming_connector_type = enums.StreamingConnectorType.KAFKA
35+
36+
37+
@dataclasses.dataclass
38+
class _StreamingConnectorDatasetConfigFactory(_ApiClassFactory):
39+
config_abstract_class = StreamingConnectorDatasetConfig
40+
config_class_key = 'streaming_connector_type'
41+
config_class_map = {
42+
enums.StreamingConnectorType.KAFKA: KafkaDatasetConfig,
43+
}

abacusai/api_class/dataset.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@
44
from .enums import OcrMode
55

66

7+
@dataclasses.dataclass
8+
class DatasetConfig(ApiClass):
9+
"""
10+
An abstract class for dataset configs
11+
12+
Args:
13+
is_documentset (bool): Whether the dataset is a document set
14+
"""
15+
is_documentset: bool = dataclasses.field(default=None)
16+
17+
718
@dataclasses.dataclass
819
class ParsingConfig(ApiClass):
920
"""

abacusai/api_class/dataset_application_connector.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,27 @@
11
import dataclasses
22

33
from . import enums
4-
from .abstract import ApiClass, _ApiClassFactory
4+
from .abstract import _ApiClassFactory
5+
from .dataset import DatasetConfig
56

67

78
@dataclasses.dataclass
8-
class DatasetConfig(ApiClass):
9+
class ApplicationConnectorDatasetConfig(DatasetConfig):
910
"""
1011
An abstract class for dataset configs specific to application connectors.
1112
1213
Args:
1314
application_connector_type(enums.ApplicationConnectorType): The type of application connector
14-
is_documentset (bool): Whether the dataset is a document set
1515
"""
1616
application_connector_type: enums.ApplicationConnectorType = dataclasses.field(default=None, repr=False, init=False)
17-
is_documentset: bool = dataclasses.field(default=None)
1817

1918
@classmethod
2019
def _get_builder(cls):
21-
return _DatasetConfigFactory
20+
return _ApplicationConnectorDatasetConfigFactory
2221

2322

2423
@dataclasses.dataclass
25-
class ConfluenceDatasetConfig(DatasetConfig):
24+
class ConfluenceDatasetConfig(ApplicationConnectorDatasetConfig):
2625
"""
2726
Dataset config for Confluence Application Connector
2827
Args:
@@ -42,7 +41,7 @@ def __post_init__(self):
4241

4342

4443
@dataclasses.dataclass
45-
class GoogleAnalyticsDatasetConfig(DatasetConfig):
44+
class GoogleAnalyticsDatasetConfig(ApplicationConnectorDatasetConfig):
4645
"""
4746
Dataset config for Google Analytics Application Connector
4847
@@ -60,7 +59,7 @@ def __post_init__(self):
6059

6160

6261
@dataclasses.dataclass
63-
class GoogleDriveDatasetConfig(DatasetConfig):
62+
class GoogleDriveDatasetConfig(ApplicationConnectorDatasetConfig):
6463
"""
6564
Dataset config for Google Drive Application Connector
6665
@@ -80,7 +79,7 @@ def __post_init__(self):
8079

8180

8281
@dataclasses.dataclass
83-
class JiraDatasetConfig(DatasetConfig):
82+
class JiraDatasetConfig(ApplicationConnectorDatasetConfig):
8483
"""
8584
Dataset config for Jira Application Connector
8685
@@ -100,7 +99,7 @@ def __post_init__(self):
10099

101100

102101
@dataclasses.dataclass
103-
class OneDriveDatasetConfig(DatasetConfig):
102+
class OneDriveDatasetConfig(ApplicationConnectorDatasetConfig):
104103
"""
105104
Dataset config for OneDrive Application Connector
106105
@@ -120,13 +119,12 @@ def __post_init__(self):
120119

121120

122121
@dataclasses.dataclass
123-
class SharepointDatasetConfig(DatasetConfig):
122+
class SharepointDatasetConfig(ApplicationConnectorDatasetConfig):
124123
"""
125124
Dataset config for Sharepoint Application Connector
126125
127126
Args:
128127
location (str): The regex location of the files to fetch
129-
is_documentset (bool): Whether the dataset is a document set
130128
csv_delimiter (str): If the file format is CSV, use a specific csv delimiter
131129
extract_bounding_boxes (bool): Signifies whether to extract bounding boxes out of the documents. Only valid if is_documentset if True
132130
merge_file_schemas (bool): Signifies if the merge file schema policy is enabled. Not applicable if is_documentset is True
@@ -141,7 +139,7 @@ def __post_init__(self):
141139

142140

143141
@dataclasses.dataclass
144-
class ZendeskDatasetConfig(DatasetConfig):
142+
class ZendeskDatasetConfig(ApplicationConnectorDatasetConfig):
145143
"""
146144
Dataset config for Zendesk Application Connector
147145
@@ -155,7 +153,7 @@ def __post_init__(self):
155153

156154

157155
@dataclasses.dataclass
158-
class AbacusUsageMetricsDatasetConfig(DatasetConfig):
156+
class AbacusUsageMetricsDatasetConfig(ApplicationConnectorDatasetConfig):
159157
"""
160158
Dataset config for Abacus Usage Metrics Application Connector
161159
@@ -172,7 +170,7 @@ def __post_init__(self):
172170

173171

174172
@dataclasses.dataclass
175-
class FreshserviceDatasetConfig(DatasetConfig):
173+
class FreshserviceDatasetConfig(ApplicationConnectorDatasetConfig):
176174
"""
177175
Dataset config for Freshservice Application Connector
178176
"""
@@ -182,8 +180,8 @@ def __post_init__(self):
182180

183181

184182
@dataclasses.dataclass
185-
class _DatasetConfigFactory(_ApiClassFactory):
186-
config_abstract_class = DatasetConfig
183+
class _ApplicationConnectorDatasetConfigFactory(_ApiClassFactory):
184+
config_abstract_class = ApplicationConnectorDatasetConfig
187185
config_class_key = 'application_connector_type'
188186
config_class_map = {
189187
enums.ApplicationConnectorType.CONFLUENCE: ConfluenceDatasetConfig,

abacusai/api_class/document_retriever.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
@dataclasses.dataclass
88
class VectorStoreConfig(ApiClass):
99
"""
10-
Configs for vector store indexing.
10+
Config for indexing options of a document retriever. Default values of optional arguments are heuristically selected by the Abacus.AI platform based on the underlying data.
1111
1212
Args:
1313
chunk_size (int): The size of text chunks in the vector store.
@@ -25,8 +25,4 @@ class VectorStoreConfig(ApiClass):
2525
prune_vectors: bool = dataclasses.field(default=None)
2626

2727

28-
@dataclasses.dataclass
29-
class DocumentRetrieverConfig(VectorStoreConfig):
30-
"""
31-
Configs for document retriever. If any configuration value is not explicitly provided, Abacus.AI will automatically infer default values based on the data.
32-
"""
28+
DocumentRetrieverConfig = VectorStoreConfig

abacusai/api_class/enums.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,11 @@ class ApplicationConnectorType(ApiEnum):
362362
ABACUSUSAGEMETRICS = 'ABACUSUSAGEMETRICS'
363363
MICROSOFTAUTH = 'MICROSOFTAUTH'
364364
FRESHSERVICE = 'FRESHSERVICE'
365-
ZENDESKSUNSHINEMESSAGING = 'zendesksunshinemessaging'
365+
ZENDESKSUNSHINEMESSAGING = 'ZENDESKSUNSHINEMESSAGING'
366+
367+
368+
class StreamingConnectorType(ApiEnum):
369+
KAFKA = 'KAFKA'
366370

367371

368372
class PythonFunctionArgumentType(ApiEnum):
@@ -428,7 +432,7 @@ class LLMName(ApiEnum):
428432
GEMINI_1_5_PRO = 'GEMINI_1_5_PRO'
429433
MIXTRAL_CHAT = 'MIXTRAL_CHAT'
430434
MISTRAL_MEDIUM = 'MISTRAL_MEDIUM'
431-
ABACUS_SMAUG2 = 'ABACUS_SMAUG2'
435+
ABACUS_SMAUG3 = 'ABACUS_SMAUG3'
432436

433437

434438
class MonitorAlertType(ApiEnum):
@@ -532,13 +536,6 @@ class DataType(ApiEnum):
532536
STRUCT = 'struct'
533537
NULL = 'null'
534538

535-
@classmethod
536-
def from_str(cls, value):
537-
if not value:
538-
return None
539-
default_map = {val.value: val for val in DataType}
540-
return default_map[value.lower()]
541-
542539

543540
class AgentInterface(ApiEnum):
544541
# Duplicated in reainternal.enums, both should be kept in sync

abacusai/api_class/model.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -438,18 +438,19 @@ class ChatLLMTrainingConfig(TrainingConfig):
438438
Training config for the CHAT_LLM problem type
439439
440440
Args:
441-
document_retrievers (List[str]): List of document retriever names to use for the feature stores this model was trained with.
442-
num_completion_tokens (int): Default for maximum number of tokens for chat answers. Reducing this will get faster responses which are more succinct
443-
temperature (float): The generative LLM temperature
441+
document_retrievers (List[str]): List of names of document retrievers to use as vector stores of information for RAG responses.
442+
num_completion_tokens (int): Default for maximum number of tokens for chat answers. Reducing this will get faster responses which are more succinct.
443+
temperature (float): The generative LLM temperature.
444444
retrieval_columns (list): Include the metadata column values in the retrieved search results.
445445
filter_columns (list): Allow users to filter the document retrievers on these metadata columns.
446-
include_general_knowledge (bool): Allow the LLM to rely not just on search results, but to fall back on general knowledge.
446+
include_general_knowledge (bool): Allow the LLM to rely not just on RAG search results, but to fall back on general knowledge. Disabled by default.
447+
enable_web_search (bool) : Allow the LLM to use Web Search Engines to retrieve information for better results.
447448
behavior_instructions (str): Customize the overall role instructions for the LLM.
448-
response_instructions (str): Customize instructions for what the LLM responses should look like.
449-
enable_llm_rewrite (bool): Enable LLM rewrite for the ChatLLM. If None, LLM rewrite will happen automatically. Defaults to False.
449+
response_instructions (str): Customized instructions for how the LLM should respond.
450+
enable_llm_rewrite (bool): If enabled, an LLM will rewrite the RAG queries sent to document retriever. Disabled by default.
450451
column_filtering_instructions (str): Instructions for a LLM call to automatically generate filter expressions on document metadata to retrieve relevant documents for the conversation.
451452
keyword_requirement_instructions (str): Instructions for a LLM call to automatically generate keyword requirements to retrieve relevant documents for the conversation.
452-
query_rewrite_instructions (str): Instructions for a LLM call to rewrite a search query.
453+
query_rewrite_instructions (str): Special instructions for the LLM which rewrites the RAG query.
453454
max_search_results (int): Maximum number of search results in the retrieval augmentation step. If we know that the questions are likely to have snippets which are easily matched in the documents, then a lower number will help with accuracy.
454455
data_feature_group_ids: (List[str]): List of feature group IDs to use to possibly query for the ChatLLM. The created ChatLLM is commonly referred to as DataLLM.
455456
data_prompt_context (str): Prompt context for the data feature group IDs.
@@ -458,33 +459,32 @@ class ChatLLMTrainingConfig(TrainingConfig):
458459
search_score_cutoff (float): Minimum search score to consider a document as a valid search result.
459460
database_connector_id (str): Database connector ID to use for the ChatLLM.
460461
database_connector_tables (List[str]): List of tables to use from the database connector for the ChatLLM.
461-
enable_code_execution (bool): Enable code execution in the ChatLLM.
462-
metadata_columns (list): DEPRECATED. Include the metadata column values in the retrieved search results.
463-
lookup_rewrite_instructions (str): DEPRECATED. Instructions for a LLM call to rewrite a search query.
464-
"""
465-
document_retrievers: List[str] = None
466-
num_completion_tokens: int = None
467-
temperature: float = None
468-
retrieval_columns: list = None
469-
filter_columns: list = None
470-
include_general_knowledge: bool = None
471-
behavior_instructions: str = None
472-
response_instructions: str = None
473-
enable_llm_rewrite: bool = False
474-
column_filtering_instructions: str = None
475-
keyword_requirement_instructions: str = None
476-
query_rewrite_instructions: str = None
477-
max_search_results: int = None
478-
data_feature_group_ids: List[str] = None
479-
data_prompt_context: str = None
480-
hide_generated_sql: bool = None
481-
disable_data_summarization: bool = None
482-
search_score_cutoff: float = None
483-
database_connector_id: str = None
484-
database_connector_tables: List[str] = None
485-
enable_code_execution: bool = None
486-
metadata_columns: list = None
487-
lookup_rewrite_instructions: str = None
462+
enable_code_execution (bool): Enable python code execution in the ChatLLM. This equips the LLM with a python kernel in which all its code is executed.
463+
"""
464+
document_retrievers: List[str] = dataclasses.field(default=None)
465+
num_completion_tokens: int = dataclasses.field(default=None)
466+
temperature: float = dataclasses.field(default=None)
467+
retrieval_columns: list = dataclasses.field(default=None)
468+
filter_columns: list = dataclasses.field(default=None)
469+
include_general_knowledge: bool = dataclasses.field(default=None)
470+
enable_web_search: bool = dataclasses.field(default=None)
471+
behavior_instructions: str = dataclasses.field(default=None)
472+
response_instructions: str = dataclasses.field(default=None)
473+
enable_llm_rewrite: bool = dataclasses.field(default=None)
474+
column_filtering_instructions: str = dataclasses.field(default=None)
475+
keyword_requirement_instructions: str = dataclasses.field(default=None)
476+
query_rewrite_instructions: str = dataclasses.field(default=None)
477+
max_search_results: int = dataclasses.field(default=None)
478+
data_feature_group_ids: List[str] = dataclasses.field(default=None)
479+
data_prompt_context: str = dataclasses.field(default=None)
480+
hide_generated_sql: bool = dataclasses.field(default=None)
481+
disable_data_summarization: bool = dataclasses.field(default=None)
482+
search_score_cutoff: float = dataclasses.field(default=None)
483+
database_connector_id: str = dataclasses.field(default=None)
484+
database_connector_tables: List[str] = dataclasses.field(default=None)
485+
enable_code_execution: bool = dataclasses.field(default=None)
486+
metadata_columns: list = dataclasses.field(default=None, metadata={'deprecated': True})
487+
lookup_rewrite_instructions: str = dataclasses.field(default=None, metadata={'deprecated': True})
488488

489489
def __post_init__(self):
490490
self.problem_type = enums.ProblemType.CHAT_LLM

abacusai/batch_prediction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def __init__(self, client, batchPredictionId=None, createdAt=None, name=None, de
8484
BatchPredictionArgs, globalPredictionArgs)
8585
self.batch_prediction_args = client._build_class(getattr(
8686
api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
87-
self.deprecated_keys = {'explanations', 'global_prediction_args'}
87+
self.deprecated_keys = {'global_prediction_args', 'explanations'}
8888

8989
def __repr__(self):
9090
repr_dict = {f'batch_prediction_id': repr(self.batch_prediction_id), f'created_at': repr(self.created_at), f'name': repr(self.name), f'deployment_id': repr(self.deployment_id), f'file_connector_output_location': repr(self.file_connector_output_location), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'output_feature_group_id': repr(self.output_feature_group_id), f'feature_group_table_name': repr(self.feature_group_table_name), f'output_feature_group_table_name': repr(self.output_feature_group_table_name), f'summary_feature_group_table_name': repr(self.summary_feature_group_table_name), f'csv_input_prefix': repr(

abacusai/batch_prediction_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def __init__(self, client, batchPredictionVersion=None, batchPredictionId=None,
100100
BatchPredictionArgs, globalPredictionArgs)
101101
self.batch_prediction_args = client._build_class(getattr(
102102
api_class, batchPredictionArgsType, BatchPredictionArgs) if batchPredictionArgsType else BatchPredictionArgs, batchPredictionArgs)
103-
self.deprecated_keys = {'explanations', 'global_prediction_args'}
103+
self.deprecated_keys = {'global_prediction_args', 'explanations'}
104104

105105
def __repr__(self):
106106
repr_dict = {f'batch_prediction_version': repr(self.batch_prediction_version), f'batch_prediction_id': repr(self.batch_prediction_id), f'status': repr(self.status), f'drift_monitor_status': repr(self.drift_monitor_status), f'deployment_id': repr(self.deployment_id), f'model_id': repr(self.model_id), f'model_version': repr(self.model_version), f'predictions_started_at': repr(self.predictions_started_at), f'predictions_completed_at': repr(self.predictions_completed_at), f'database_output_error': repr(self.database_output_error), f'total_predictions': repr(self.total_predictions), f'failed_predictions': repr(self.failed_predictions), f'database_connector_id': repr(self.database_connector_id), f'database_output_configuration': repr(self.database_output_configuration), f'file_connector_output_location': repr(self.file_connector_output_location), f'file_output_format': repr(self.file_output_format), f'connector_type': repr(self.connector_type), f'legacy_input_location': repr(self.legacy_input_location), f'error': repr(self.error), f'drift_monitor_error': repr(self.drift_monitor_error), f'monitor_warnings': repr(self.monitor_warnings), f'csv_input_prefix': repr(

0 commit comments

Comments
 (0)