codewithdark-git
diff --git a/‎quantllm/api/quantization.py
Lines changed: 205 additions & 0 deletions b/‎quantllm/api/quantization.py
Lines changed: 205 additions & 0 deletions
diff --git a/‎quantllm/cli/commands.py
Lines changed: 3 additions & 5 deletions b/‎quantllm/cli/commands.py
Lines changed: 3 additions & 5 deletions
diff --git a/‎quantllm/config/config_manager.py
Lines changed: 15 additions & 21 deletions b/‎quantllm/config/config_manager.py
Lines changed: 15 additions & 21 deletions
@@ -0,0 +1,205 @@
+"""High-level API for model quantization."""
+
+from typing import Optional, Dict, Any, Union, Tuple
+import torch
+from transformers import PreTrainedModel, AutoTokenizer
+from ..quant.gguf import GGUFQuantizer, SUPPORTED_GGUF_BITS, SUPPORTED_GGUF_TYPES
+from ..utils.logger import logger
+from ..utils.memory_tracker import memory_tracker
+from ..utils.benchmark import QuantizationBenchmark
+
+class QuantizationAPI:
+    """High-level API for model quantization with GGUF support."""
+    
+    @staticmethod
+    def quantize_model(
+        model_name_or_path: Union[str, PreTrainedModel],
+        bits: int = 4,
+        group_size: int = 128,
+        quant_type: Optional[str] = None,
+        use_packed: bool = True,
+        cpu_offload: bool = False,
+        desc_act: bool = False,
+        desc_ten: bool = False,
+        legacy_format: bool = False,
+        batch_size: int = 4,
+        device: Optional[str] = None,
+        calibration_data: Optional[torch.Tensor] = None,
+        benchmark: bool = True,
+        benchmark_input_shape: Optional[Tuple[int, ...]] = None,
+        benchmark_steps: int = 100
+    ) -> Tuple[PreTrainedModel, Dict[str, Any]]:
+        """
+        Quantize a model using GGUF format with optional benchmarking.
+        
+        Args:
+            model_name_or_path: Model identifier or instance
+            bits: Number of bits for quantization
+            group_size: Size of quantization groups
+            quant_type: GGUF quantization type
+            use_packed: Whether to use packed format
+            cpu_offload: Whether to offload to CPU during quantization
+            desc_act: Whether to include activation descriptors
+            desc_ten: Whether to include tensor descriptors
+            legacy_format: Whether to use legacy format
+            batch_size: Batch size for processing
+            device: Device for quantization
+            calibration_data: Optional calibration data
+            benchmark: Whether to run benchmarks
+            benchmark_input_shape: Shape for benchmark inputs
+            benchmark_steps: Number of benchmark steps
+            
+        Returns:
+            Tuple of (quantized model, benchmark results)
+        """
+        try:
+            logger.log_info(f"Starting model quantization with {bits} bits")
+            memory_tracker.log_memory("quantization_start")
+            
+            # Validate parameters
+            if bits not in SUPPORTED_GGUF_BITS:
+                raise ValueError(f"Unsupported bits: {bits}. Supported values: {SUPPORTED_GGUF_BITS}")
+            
+            if quant_type and quant_type not in SUPPORTED_GGUF_TYPES.get(bits, []):
+                raise ValueError(f"Unsupported quant_type: {quant_type} for {bits} bits")
+            
+            # Initialize quantizer
+            quantizer = GGUFQuantizer(
+                model_name=model_name_or_path,
+                bits=bits,
+                group_size=group_size,
+                quant_type=quant_type,
+                use_packed=use_packed,
+                cpu_offload=cpu_offload,
+                desc_act=desc_act,
+                desc_ten=desc_ten,
+                legacy_format=legacy_format,
+                batch_size=batch_size,
+                device=device
+            )
+            
+            # Perform quantization
+            logger.log_info("Starting quantization process")
+            quantized_model = quantizer.quantize(calibration_data)
+            memory_tracker.log_memory("quantization_complete")
+            
+            # Run benchmarks if requested
+            benchmark_results = {}
+            if benchmark:
+                logger.log_info("Running benchmarks")
+                if not benchmark_input_shape:
+                    # Default shape based on model config
+                    if hasattr(quantized_model.config, 'max_position_embeddings'):
+                        seq_len = min(32, quantized_model.config.max_position_embeddings)
+                    else:
+                        seq_len = 32
+                    benchmark_input_shape = (1, seq_len)
+                
+                benchmarker = QuantizationBenchmark(
+                    model=quantized_model,
+                    calibration_data=calibration_data,
+                    input_shape=benchmark_input_shape,
+                    num_inference_steps=benchmark_steps,
+                    device=device
+                )
+                
+                benchmark_results = benchmarker.run_all_benchmarks()
+                memory_tracker.log_memory("benchmarking_complete")
+                
+                # Log benchmark summary
+                logger.log_info("Benchmark Results:")
+                for metric, value in benchmark_results.items():
+                    logger.log_info(f"{metric}: {value}")
+            
+            return quantized_model, benchmark_results
+            
+        except Exception as e:
+            logger.log_error(f"Quantization failed: {str(e)}")
+            raise
+        finally:
+            memory_tracker.clear_memory()
+    
+    @staticmethod
+    def save_quantized_model(
+        model: PreTrainedModel,
+        output_path: str,
+        save_tokenizer: bool = True
+    ):
+        """
+        Save a quantized model and optionally its tokenizer.
+        
+        Args:
+            model: Quantized model to save
+            output_path: Path to save the model
+            save_tokenizer: Whether to save the tokenizer
+        """
+        try:
+            logger.log_info(f"Saving quantized model to {output_path}")
+            memory_tracker.log_memory("save_start")
+            
+            # Save model
+            model.save_pretrained(output_path)
+            
+            # Save tokenizer if requested
+            if save_tokenizer and hasattr(model, 'config'):
+                if hasattr(model.config, '_name_or_path'):
+                    try:
+                        tokenizer = AutoTokenizer.from_pretrained(
+                            model.config._name_or_path,
+                            trust_remote_code=True
+                        )
+                        tokenizer.save_pretrained(output_path)
+                        logger.log_info("Tokenizer saved successfully")
+                    except Exception as e:
+                        logger.log_warning(f"Failed to save tokenizer: {e}")
+            
+            memory_tracker.log_memory("save_complete")
+            logger.log_info("Model saved successfully")
+            
+        except Exception as e:
+            logger.log_error(f"Failed to save model: {str(e)}")
+            raise
+        finally:
+            memory_tracker.clear_memory()
+    
+    @staticmethod
+    def convert_to_gguf(
+        model: PreTrainedModel,
+        output_path: str,
+        quant_config: Optional[Dict[str, Any]] = None
+    ):
+        """
+        Convert a quantized model to GGUF format.
+        
+        Args:
+            model: Model to convert
+            output_path: Path to save GGUF file
+            quant_config: Optional quantization configuration
+        """
+        try:
+            logger.log_info(f"Converting model to GGUF format: {output_path}")
+            memory_tracker.log_memory("conversion_start")
+            
+            # Get quantization config from model if not provided
+            if not quant_config and hasattr(model.config, 'quantization_config'):
+                quant_config = model.config.quantization_config
+            
+            # Create quantizer with existing or default config
+            quantizer = GGUFQuantizer(
+                model_name=model,
+                bits=quant_config.get('bits', 4) if quant_config else 4,
+                group_size=quant_config.get('group_size', 128) if quant_config else 128,
+                quant_type=quant_config.get('quant_type', None) if quant_config else None,
+                use_packed=quant_config.get('use_packed', True) if quant_config else True
+            )
+            
+            # Convert to GGUF
+            quantizer.convert_to_gguf(output_path)
+            memory_tracker.log_memory("conversion_complete")
+            logger.log_info("GGUF conversion completed successfully")
+            
+        except Exception as e:
+            logger.log_error(f"GGUF conversion failed: {str(e)}")
+            raise
+        finally:
+            memory_tracker.clear_memory() 
@@ -4,11 +4,10 @@
 from ..training import FineTuningTrainer, ModelEvaluator
 from ..config import ModelConfig, TrainingConfig, DatasetConfig
 from ..runtime import DeviceManager
-from ..utils.monitoring import TrainingLogger
+from ..utils.logger import logger
 
 def train(args: Namespace):
     """Execute model training command."""
-    logger = TrainingLogger()
     device_manager = DeviceManager()
 
     try:
@@ -44,12 +43,11 @@ def train(args: Namespace):
         trainer.train()
 
     except Exception as e:
-        logger.error(f"Training failed: {str(e)}")
+        logger.log_error(f"Training failed: {str(e)}")
         raise
 
 def evaluate(args: Namespace):
     """Execute model evaluation command."""
-    logger = TrainingLogger()
     device_manager = DeviceManager()
 
     try:
@@ -78,7 +76,7 @@ def evaluate(args: Namespace):
             evaluator.save_results(results, args.output_file)
 
     except Exception as e:
-        logger.error(f"Evaluation failed: {str(e)}")
+        logger.log_error(f"Evaluation failed: {str(e)}")
         raise
 
 def quantize(args: Namespace):
 
@@ -2,17 +2,11 @@
 import json
 from typing import Dict, Any, Optional
 from pathlib import Path
-from ..trainer.logger import TrainingLogger
+from ..utils.logger import logger
 
 class ConfigManager:
-    def __init__(self, logger: Optional[TrainingLogger] = None):
-        """
-        Initialize the configuration manager.
-        
-        Args:
-            logger (TrainingLogger, optional): Logger instance
-        """
-        self.logger = logger or TrainingLogger()
+    def __init__(self):
+        """Initialize the configuration manager."""
         self.configs = {}
 
     def load_config(self, config_path: str) -> Dict[str, Any]:
@@ -30,7 +24,7 @@ def load_config(self, config_path: str) -> Dict[str, Any]:
             if not config_path.exists():
                 raise FileNotFoundError(f"Config file not found: {config_path}")
 
-            self.logger.log_info(f"Loading configuration from {config_path}")
+            logger.log_info(f"Loading configuration from {config_path}")
 
             if config_path.suffix in ['.yaml', '.yml']:
                 with open(config_path, 'r') as f:
@@ -42,11 +36,11 @@ def load_config(self, config_path: str) -> Dict[str, Any]:
                 raise ValueError(f"Unsupported config file format: {config_path.suffix}")
 
             self.configs[config_path.stem] = config
-            self.logger.log_info(f"Successfully loaded configuration: {config_path.stem}")
+            logger.log_info(f"Successfully loaded configuration: {config_path.stem}")
             return config
 
         except Exception as e:
-            self.logger.log_error(f"Error loading configuration: {str(e)}")
+            logger.log_error(f"Error loading configuration: {str(e)}")
             raise
 
     def save_config(self, config: Dict[str, Any], config_path: str):
@@ -59,7 +53,7 @@ def save_config(self, config: Dict[str, Any], config_path: str):
         """
         try:
             config_path = Path(config_path)
-            self.logger.log_info(f"Saving configuration to {config_path}")
+            logger.log_info(f"Saving configuration to {config_path}")
 
             if config_path.suffix in ['.yaml', '.yml']:
                 with open(config_path, 'w') as f:
@@ -71,10 +65,10 @@ def save_config(self, config: Dict[str, Any], config_path: str):
                 raise ValueError(f"Unsupported config file format: {config_path.suffix}")
 
             self.configs[config_path.stem] = config
-            self.logger.log_info(f"Successfully saved configuration: {config_path.stem}")
+            logger.log_info(f"Successfully saved configuration: {config_path.stem}")
 
         except Exception as e:
-            self.logger.log_error(f"Error saving configuration: {str(e)}")
+            logger.log_error(f"Error saving configuration: {str(e)}")
             raise
 
     def get_config(self, config_name: str) -> Dict[str, Any]:
@@ -103,12 +97,12 @@ def update_config(self, config_name: str, updates: Dict[str, Any]):
             raise KeyError(f"Configuration not found: {config_name}")
 
         try:
-            self.logger.log_info(f"Updating configuration: {config_name}")
+            logger.log_info(f"Updating configuration: {config_name}")
             self.configs[config_name].update(updates)
-            self.logger.log_info(f"Successfully updated configuration: {config_name}")
+            logger.log_info(f"Successfully updated configuration: {config_name}")
 
         except Exception as e:
-            self.logger.log_error(f"Error updating configuration: {str(e)}")
+            logger.log_error(f"Error updating configuration: {str(e)}")
             raise
 
     def validate_config(self, config_name: str, schema: Dict[str, Any]) -> bool:
@@ -126,7 +120,7 @@ def validate_config(self, config_name: str, schema: Dict[str, Any]) -> bool:
             raise KeyError(f"Configuration not found: {config_name}")
 
         try:
-            self.logger.log_info(f"Validating configuration: {config_name}")
+            logger.log_info(f"Validating configuration: {config_name}")
             config = self.configs[config_name]
 
             # Basic schema validation
@@ -136,9 +130,9 @@ def validate_config(self, config_name: str, schema: Dict[str, Any]) -> bool:
                 if not isinstance(config[key], value_type):
                     raise TypeError(f"Invalid type for {key}: expected {value_type}, got {type(config[key])}")
 
-            self.logger.log_info(f"Configuration validation successful: {config_name}")
+            logger.log_info(f"Configuration validation successful: {config_name}")
             return True
 
         except Exception as e:
-            self.logger.log_error(f"Configuration validation failed: {str(e)}")
+            logger.log_error(f"Configuration validation failed: {str(e)}")
             raise