Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion QEfficient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
from QEfficient.peft import QEffAutoPeftModelForCausalLM
from QEfficient.transformers.transform import transform
from QEfficient.utils import custom_format_warning
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")

# custom warning for the better logging experience
warnings.formatwarning = custom_format_warning
Expand Down
6 changes: 4 additions & 2 deletions QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import gc
import inspect
import logging
import shutil
import subprocess
import warnings
Expand Down Expand Up @@ -35,8 +34,9 @@
load_json,
)
from QEfficient.utils.export_utils import export_wrapper
from QEfficient.utils.logging_utils import QEFFLogger

logger = logging.getLogger(__name__)
logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class QEFFBaseModel(ABC):
Expand Down Expand Up @@ -326,6 +326,7 @@ def _export(
self.prefill_onnx_path = onnx_path
else:
self.onnx_path = onnx_path
logger.info("Model export is finished and saved at: %s", onnx_path)
return onnx_path

def get_onnx_path(
Expand Down Expand Up @@ -539,4 +540,5 @@ def _compile(
logger.info("Hashed parameters exported successfully.")

self.qpc_path = qpc_path
logger.info("Model compilation is finished and saved at: %s", qpc_path)
return qpc_path
4 changes: 3 additions & 1 deletion QEfficient/base/pytorch_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

from torch import nn

from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class PytorchTransform:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from QEfficient.base.common import QEFFCommonLoader
from QEfficient.utils import check_and_assign_cache_dir
from QEfficient.utils.custom_yaml import generate_custom_io
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")

# Specifically for Docker images.
ROOT_DIR = os.path.dirname(os.path.abspath(""))
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

from QEfficient.base.common import QEFFCommonLoader
from QEfficient.utils import check_and_assign_cache_dir, load_hf_processor, load_hf_tokenizer
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


# TODO: Remove after adding support for VLM's compile and execute
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/compile/compile_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
from QEfficient.compile.qnn_compiler import compile as qnn_compile
from QEfficient.utils import constants
from QEfficient.utils._utils import load_json, load_yaml
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


def create_and_dump_specializations(
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/compile/qnn_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
generate_qnn_specialization,
)
from QEfficient.utils.hash_utils import to_hashable
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class QNN:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
)

from QEfficient.diffusers.models.modeling_utils import compute_blocked_attention, get_attention_blocking_config
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def qeff_apply_rotary_emb(
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/diffusers/pipelines/flux/pipeline_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
set_module_device_ids,
)
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffFluxPipeline:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/diffusers/pipelines/pipeline_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from tqdm import tqdm

from QEfficient.utils._utils import load_json
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def calculate_compressed_latent_dimension(height: int, width: int, vae_scale_factor: int) -> int:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/diffusers/pipelines/wan/pipeline_wan.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@
)
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import constants
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffWanPipeline:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/exporter/export_hf_to_cloud_ai_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from QEfficient.utils import load_hf_tokenizer
from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
from QEfficient.utils.generate_inputs import InputHandler
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


def convert_to_cloud_bertstyle(
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/dataset/alpaca_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
import torch
from torch.utils.data import Dataset

from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")

PROMPT_DICT = {
"prompt_input": (
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from QEfficient.finetune.configs.training import TrainConfig
from QEfficient.finetune.dataset.dataset_config import DATASET_PREPROC
from QEfficient.finetune.utils.helper import Peft_Method
from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


def update_config(config, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/utils/plot_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

import matplotlib.pyplot as plt

from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


def plot_metric(data, metric_name, x_label, y_label, title, colors):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/generation/embedding_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@

from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import constants
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class VisionHandler:
Expand Down
5 changes: 4 additions & 1 deletion QEfficient/generation/text_generation_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import padding_check_and_fix
from QEfficient.utils.constants import Constants
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger
from QEfficient.utils.sampler_utils import validate_sampler_inputs

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


@dataclass
class PerfMetrics:
Expand Down Expand Up @@ -1316,4 +1318,5 @@ def generate(
generated_ids=self._qaic_model.generated_ids,
perf_metrics=perf_metrics,
)
logger.info("Text Generated finised")
return latency_stats
4 changes: 3 additions & 1 deletion QEfficient/generation/vlm_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
)
from QEfficient.utils import LRUCache
from QEfficient.utils.constants import Constants
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class VisionLanguageGeneration(QEffTextGenerationBase):
Expand Down
4 changes: 2 additions & 2 deletions QEfficient/peft/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# ----------------------------------------------------------------------------

import hashlib
import logging
import warnings
from typing import List, Optional, Union

Expand All @@ -32,8 +31,9 @@
from QEfficient.utils import constants
from QEfficient.utils._utils import get_padding_shape_from_config
from QEfficient.utils.hash_utils import to_hashable
from QEfficient.utils.logging_utils import QEFFLogger

logger = logging.getLogger(__name__)
logger = QEFFLogger.get_logger("FT", loglevel="INFO")


class QEffAutoPeftModelForCausalLM(QEFFBaseModel):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/peft/lora/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
from QEfficient.peft.lora.pytorch_transforms import LoraModelInputsTransform, TargetModulesTransform
from QEfficient.utils import constants, get_padding_shape_from_config
from QEfficient.utils.hash_utils import to_hashable
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


class QEffAutoLoraModelForCausalLM(QEFFAutoModelForCausalLM):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@
from QEfficient.transformers.cache_utils import QEffHybridCacheForGPTOSS
from QEfficient.transformers.modeling_attn_mask_utils import _create_causal_mask
from QEfficient.utils.constants import MIN_MASKED_ATTENTION_VALUE
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffGptOssExperts(GptOssExperts):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffInternEncoderWrapper(nn.Module):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/models/llava/modeling_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
)

from QEfficient.utils._utils import IOInfo
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")

BS = 1
FBS = 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")

BS = constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE
FBS = constants.ONNX_EXPORT_EXAMPLE_FBS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def custom_cumsum(tensor):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/models/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@
get_padding_shape_from_config,
)
from QEfficient.utils.check_ccl_specializations import process_ccl_specializations
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger
from QEfficient.utils.sampler_utils import get_sampling_inputs_and_outputs

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEFFTransformersBase(QEFFBaseModel):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config
from QEfficient.utils.constants import MIN_MASKED_ATTENTION_VALUE
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def qeff_apply_rotary_pos_emb(q, k, cos, sin, position_ids, mrope_section, unsqueeze_dim=1):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/quantizers/quantizer_awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
replace_linear_layer_with_target_layer,
replace_quantization_scales,
)
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffAwqConfig(AwqConfig):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
from transformers.utils.quantization_config import CompressedTensorsConfig, QuantizationConfigMixin, QuantizationMethod

from QEfficient.transformers.quantizers.quantizer_utils import get_keys_to_not_convert
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")

FP8_DTYPE = torch.float8_e4m3fn

Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/quantizers/quantizer_gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
repack_zeros,
replace_linear_layer_with_target_layer,
)
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffGPTQConfig(GPTQConfig):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/quantizers/quantizer_mxfp4.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
from transformers.utils.quantization_config import Mxfp4Config

from QEfficient.transformers.quantizers.quantizer_utils import convert_moe_packed_tensors, get_keys_to_not_convert
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffMxfp4GptOssExperts(nn.Module):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from QEfficient.base.modeling_qeff import QEFFBaseModel
from QEfficient.transformers.cache_utils import QEffDynamicCache
from QEfficient.transformers.modeling_utils import TransformersToQEffModulesDict
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


def replace_module_with_qeff_layers(model: nn.Module) -> None:
Expand Down
Loading
Loading