Skip to content

Additional logging statements for AI Quick Actions operations #1034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ads/aqua/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Copyright (c) 2024 Oracle and/or its affiliates.
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import json
Expand Down Expand Up @@ -298,7 +298,7 @@ def get_config(self, model_id: str, config_file_name: str) -> Dict:
config = {}
artifact_path = get_artifact_path(oci_model.custom_metadata_list)
if not artifact_path:
logger.error(
logger.debug(
f"Failed to get artifact path from custom metadata for the model: {model_id}"
)
return config
Expand Down
57 changes: 54 additions & 3 deletions ads/aqua/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Copyright (c) 2024 Oracle and/or its affiliates.
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""AQUA utils and constants."""

Expand All @@ -12,11 +12,12 @@
import re
import shlex
import subprocess
from dataclasses import fields
from datetime import datetime, timedelta
from functools import wraps
from pathlib import Path
from string import Template
from typing import List, Union
from typing import Any, List, Optional, Type, TypeVar, Union

import fsspec
import oci
Expand All @@ -30,6 +31,7 @@
)
from oci.data_science.models import JobRun, Model
from oci.object_storage.models import ObjectSummary
from pydantic import BaseModel, ValidationError

from ads.aqua.common.enums import (
InferenceContainerParamType,
Expand Down Expand Up @@ -74,6 +76,7 @@
from ads.model import DataScienceModel, ModelVersionSet

logger = logging.getLogger("ads.aqua")
T = TypeVar("T", bound=Union[BaseModel, Any])


class LifecycleStatus(str, metaclass=ExtendedEnumMeta):
Expand Down Expand Up @@ -788,7 +791,9 @@ def get_ocid_substring(ocid: str, key_len: int) -> str:
return ocid[-key_len:] if ocid and len(ocid) > key_len else ""


def upload_folder(os_path: str, local_dir: str, model_name: str, exclude_pattern: str = None) -> str:
def upload_folder(
os_path: str, local_dir: str, model_name: str, exclude_pattern: str = None
) -> str:
"""Upload the local folder to the object storage

Args:
Expand Down Expand Up @@ -1159,3 +1164,49 @@ def validate_cmd_var(cmd_var: List[str], overrides: List[str]) -> List[str]:

combined_cmd_var = cmd_var + overrides
return combined_cmd_var


def validate_dataclass_params(dataclass_type: Type[T], **kwargs: Any) -> Optional[T]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if there is some built-in solution to do this. It would be so easy to do with pydantic.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, the error validation here covers both dataclass and pydantic's basemodel since we use both in aqua. We can simplify this once all dataclasses are updated to use pydantic.

"""This method tries to initialize a dataclass with the provided keyword arguments. It handles
errors related to missing, unexpected or invalid arguments.

Parameters
----------
dataclass_type (Type[T]):
the dataclass type to instantiate.
kwargs (Any):
the keyword arguments to initialize the dataclass
Returns
-------
Optional[T]
instance of dataclass if successfully initialized
"""

try:
return dataclass_type(**kwargs)
except TypeError as ex:
error_message = str(ex)
allowed_params = ", ".join(
field.name for field in fields(dataclass_type)
).rstrip()
if "__init__() missing" in error_message:
missing_params = error_message.split("missing ")[1]
raise AquaValueError(
"Error: Missing required parameters: "
f"{missing_params}. Allowable parameters are: {allowed_params}."
) from ex
elif "__init__() got an unexpected keyword argument" in error_message:
unexpected_param = error_message.split("argument '")[1].rstrip("'")
raise AquaValueError(
"Error: Unexpected parameter: "
f"{unexpected_param}. Allowable parameters are: {allowed_params}."
) from ex
else:
raise AquaValueError(
"Invalid parameters. Allowable parameters are: " f"{allowed_params}."
) from ex
except ValidationError as ex:
custom_errors = {".".join(map(str, e["loc"])): e["msg"] for e in ex.errors()}
raise AquaValueError(
f"Invalid parameters. Error details: {custom_errors}."
) from ex
46 changes: 24 additions & 22 deletions ads/aqua/evaluation/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Copyright (c) 2024 Oracle and/or its affiliates.
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import base64
import json
Expand Down Expand Up @@ -43,6 +43,7 @@
get_container_image,
is_valid_ocid,
upload_local_to_os,
validate_dataclass_params,
)
from ads.aqua.config.config import get_evaluation_service_config
from ads.aqua.constants import (
Expand Down Expand Up @@ -155,16 +156,9 @@ def create(
The instance of AquaEvaluationSummary.
"""
if not create_aqua_evaluation_details:
try:
create_aqua_evaluation_details = CreateAquaEvaluationDetails(**kwargs)
except Exception as ex:
custom_errors = {
".".join(map(str, e["loc"])): e["msg"]
for e in json.loads(ex.json())
}
raise AquaValueError(
f"Invalid create evaluation parameters. Error details: {custom_errors}."
) from ex
create_aqua_evaluation_details = validate_dataclass_params(
CreateAquaEvaluationDetails, **kwargs
)

if not is_valid_ocid(create_aqua_evaluation_details.evaluation_source_id):
raise AquaValueError(
Expand Down Expand Up @@ -199,11 +193,11 @@ def create(
eval_inference_configuration = (
container.spec.evaluation_configuration
)
except Exception:
except Exception as ex:
logger.debug(
f"Could not load inference config details for the evaluation source id: "
f"{create_aqua_evaluation_details.evaluation_source_id}. Please check if the container"
f" runtime has the correct SMC image information."
f" runtime has the correct SMC image information.\nError: {str(ex)}"
)
elif (
DataScienceResource.MODEL
Expand Down Expand Up @@ -289,7 +283,7 @@ def create(
f"Invalid experiment name. Please provide an experiment with `{Tags.AQUA_EVALUATION}` in tags."
)
except Exception:
logger.debug(
logger.info(
f"Model version set {experiment_model_version_set_name} doesn't exist. "
"Creating new model version set."
)
Expand Down Expand Up @@ -711,21 +705,27 @@ def get(self, eval_id) -> AquaEvaluationDetail:
try:
log = utils.query_resource(log_id, return_all=False)
log_name = log.display_name if log else ""
except Exception:
except Exception as ex:
logger.debug(f"Failed to get associated log name. Error: {ex}")
pass

if loggroup_id:
try:
loggroup = utils.query_resource(loggroup_id, return_all=False)
loggroup_name = loggroup.display_name if loggroup else ""
except Exception:
except Exception as ex:
logger.debug(f"Failed to get associated loggroup name. Error: {ex}")
pass

try:
introspection = json.loads(
self._get_attribute_from_model_metadata(resource, "ArtifactTestResults")
)
except Exception:
except Exception as ex:
logger.debug(
f"There was an issue loading the model attribute as json object for evaluation {eval_id}. "
f"Setting introspection to empty.\n Error:{ex}"
)
introspection = {}

summary = AquaEvaluationDetail(
Expand Down Expand Up @@ -878,13 +878,13 @@ def get_status(self, eval_id: str) -> dict:
try:
log_id = job_run_details.log_details.log_id
except Exception as e:
logger.debug(f"Failed to get associated log. {str(e)}")
logger.debug(f"Failed to get associated log.\nError: {str(e)}")
log_id = ""

try:
loggroup_id = job_run_details.log_details.log_group_id
except Exception as e:
logger.debug(f"Failed to get associated log. {str(e)}")
logger.debug(f"Failed to get associated log.\nError: {str(e)}")
loggroup_id = ""

loggroup_url = get_log_links(region=self.region, log_group_id=loggroup_id)
Expand Down Expand Up @@ -958,7 +958,7 @@ def load_metrics(self, eval_id: str) -> AquaEvalMetrics:
)
except Exception as e:
logger.debug(
"Failed to load `report.json` from evaluation artifact" f"{str(e)}"
f"Failed to load `report.json` from evaluation artifact.\nError: {str(e)}"
)
json_report = {}

Expand Down Expand Up @@ -1047,6 +1047,7 @@ def download_report(self, eval_id) -> AquaEvalReport:
return report

with tempfile.TemporaryDirectory() as temp_dir:
logger.info(f"Downloading evaluation artifact for {eval_id}.")
DataScienceModel.from_id(eval_id).download_artifact(
temp_dir,
auth=self._auth,
Expand Down Expand Up @@ -1200,6 +1201,7 @@ def _delete_job_and_model(job, model):
def load_evaluation_config(self, container: Optional[str] = None) -> Dict:
"""Loads evaluation config."""

logger.info("Loading evaluation container config.")
# retrieve the evaluation config by container family name
evaluation_config = get_evaluation_service_config(container)

Expand Down Expand Up @@ -1279,9 +1281,9 @@ def _get_source(
raise AquaRuntimeError(
f"Not supported source type: {resource_type}"
)
except Exception:
except Exception as ex:
logger.debug(
f"Failed to retrieve source information for evaluation {evaluation.identifier}."
f"Failed to retrieve source information for evaluation {evaluation.identifier}.\nError: {str(ex)}"
)
source_name = ""

Expand Down
21 changes: 14 additions & 7 deletions ads/aqua/extension/aqua_ws_msg_handler.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2024 Oracle and/or its affiliates.
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import traceback
import uuid
from abc import abstractmethod
from http.client import responses
from typing import List
Expand Down Expand Up @@ -34,7 +34,7 @@ def __init__(self, message: str):
self.telemetry = TelemetryClient(
bucket=AQUA_TELEMETRY_BUCKET, namespace=AQUA_TELEMETRY_BUCKET_NS
)
except:
except Exception:
pass

@staticmethod
Expand Down Expand Up @@ -66,24 +66,31 @@ def write_error(self, status_code, **kwargs):
"message": message,
"service_payload": service_payload,
"reason": reason,
"request_id": str(uuid.uuid4()),
}
exc_info = kwargs.get("exc_info")
if exc_info:
logger.error("".join(traceback.format_exception(*exc_info)))
logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {''.join(traceback.format_exception(*exc_info))}"
)
e = exc_info[1]
if isinstance(e, HTTPError):
reply["message"] = e.log_message or message
reply["reason"] = e.reason
else:
logger.warning(reply["message"])

logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {reply['message']} {reply['reason']}"
)
# telemetry may not be present if there is an error while initializing
if hasattr(self, "telemetry"):
aqua_api_details = kwargs.get("aqua_api_details", {})
self.telemetry.record_event_async(
category="aqua/error",
action=str(status_code),
value=reason,
**aqua_api_details
**aqua_api_details,
)
response = AquaWsError(
status=status_code,
Expand Down
21 changes: 12 additions & 9 deletions ads/aqua/extension/base_handler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2024 Oracle and/or its affiliates.
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/


Expand Down Expand Up @@ -35,7 +34,7 @@ def __init__(
self.telemetry = TelemetryClient(
bucket=AQUA_TELEMETRY_BUCKET, namespace=AQUA_TELEMETRY_BUCKET_NS
)
except:
except Exception:
pass

@staticmethod
Expand Down Expand Up @@ -82,19 +81,23 @@ def write_error(self, status_code, **kwargs):
"message": message,
"service_payload": service_payload,
"reason": reason,
"request_id": str(uuid.uuid4()),
}
exc_info = kwargs.get("exc_info")
if exc_info:
logger.error("".join(traceback.format_exception(*exc_info)))
logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {''.join(traceback.format_exception(*exc_info))}"
)
e = exc_info[1]
if isinstance(e, HTTPError):
reply["message"] = e.log_message or message
reply["reason"] = e.reason if e.reason else reply["reason"]
reply["request_id"] = str(uuid.uuid4())
else:
reply["request_id"] = str(uuid.uuid4())

logger.warning(reply["message"])
logger.error(
f"Error Request ID: {reply['request_id']}\n"
f"Error: {reply['message']} {reply['reason']}"
)

# telemetry may not be present if there is an error while initializing
if hasattr(self, "telemetry"):
Expand All @@ -103,7 +106,7 @@ def write_error(self, status_code, **kwargs):
category="aqua/error",
action=str(status_code),
value=reason,
**aqua_api_details
**aqua_api_details,
)

self.finish(json.dumps(reply))
Expand Down
Loading