diff --git a/python/Cargo.toml b/python/Cargo.toml index 66d5871d8e..4019323d4f 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -42,6 +42,7 @@ project-url = { Repo = "https://github.com/delta-io/delta-rs" } requires-dist = [ "pyarrow>=4", 'numpy<1.20.0;python_version<="3.6"', + 'dataclasses;python_version<="3.6"', "pandas; extra =='pandas'", "mypy; extra == 'devel'", "isort; extra == 'devel'", diff --git a/python/Makefile b/python/Makefile index 34c9c9976d..71d9c7ef02 100644 --- a/python/Makefile +++ b/python/Makefile @@ -3,7 +3,7 @@ VENV := venv -.PHON: setup-venv +.PHONY: setup-venv setup-venv: ## Setup the virtualenv $(info --- Setup virtualenv ---) python -m venv $(VENV) diff --git a/python/deltalake/schema.py b/python/deltalake/schema.py index 9260d88f03..9ac5123098 100644 --- a/python/deltalake/schema.py +++ b/python/deltalake/schema.py @@ -1,4 +1,5 @@ import json +from dataclasses import dataclass from typing import Any, Dict, List, Optional import pyarrow @@ -7,23 +8,17 @@ # https://github.com/delta-io/delta-rs/issues/95 +@dataclass class DataType: """ Base class of all Delta data types. """ - def __init__(self, type_class: str): - self.type = type_class + type: str def __str__(self) -> str: return f"DataType({self.type})" - def __repr__(self) -> str: - return self.__str__() - - def __eq__(self, other: "DataType") -> bool: # type: ignore - return self.type == other.type - @classmethod def from_dict(cls, json_dict: Dict[str, Any]) -> "DataType": """ @@ -73,9 +68,15 @@ def from_dict(cls, json_dict: Dict[str, Any]) -> "DataType": return DataType(type_class) +@dataclass(init=False) class MapType(DataType): """Concrete class for map data types.""" + key_type: DataType + value_type: DataType + value_contains_null: bool + type: str + def __init__( self, key_type: "DataType", value_type: "DataType", value_contains_null: bool ): @@ -84,93 +85,67 @@ def __init__( self.value_type = value_type self.value_contains_null = value_contains_null - def __eq__(self, other: "DataType") -> bool: # type: ignore - return ( - isinstance(other, MapType) - and self.key_type == other.key_type - and self.value_type == other.value_type - and self.value_contains_null == other.value_contains_null - ) - def __str__(self) -> str: return f"DataType(map<{self.key_type}, {self.value_type}, {self.value_contains_null}>)" +@dataclass(init=False) class ArrayType(DataType): """Concrete class for array data types.""" + element_type: DataType + contains_null: bool + type: str + def __init__(self, element_type: DataType, contains_null: bool): super().__init__("array") self.element_type = element_type self.contains_null = contains_null - def __eq__(self, other: "DataType") -> bool: # type: ignore - return ( - isinstance(other, ArrayType) - and self.element_type == other.element_type - and self.contains_null == other.contains_null - ) - def __str__(self) -> str: return f"DataType(array<{self.element_type}> {self.contains_null})" +@dataclass(init=False) class StructType(DataType): """Concrete class for struct data types.""" + fields: List["Field"] + type: str + def __init__(self, fields: List["Field"]): super().__init__("struct") self.fields = fields - def __eq__(self, other: "DataType") -> bool: # type: ignore - return isinstance(other, StructType) and self.fields == other.fields - def __str__(self) -> str: field_strs = [str(f) for f in self.fields] return f"DataType(struct<{', '.join(field_strs)}>)" +@dataclass class Field: """Create a DeltaTable Field instance.""" - def __init__( - self, - name: str, - type: DataType, - nullable: bool, - metadata: Optional[Dict[str, str]] = None, - ): - self.type = type - self.name = name - self.nullable = nullable - self.metadata = metadata + name: str + type: DataType + nullable: bool + metadata: Optional[Dict[str, str]] = None def __str__(self) -> str: return f"Field({self.name}: {self.type} nullable({self.nullable}) metadata({self.metadata}))" - def __eq__(self, other: "Field") -> bool: # type: ignore - return ( - self.type == other.type - and self.name == other.name - and self.nullable == other.nullable - and self.metadata == other.metadata - ) - +@dataclass class Schema: """Create a DeltaTable Schema instance.""" - def __init__(self, fields: List[Field], json_value: Dict[str, Any]): - self.fields = fields - self.json_value = json_value + fields: List[Field] + json_value: Dict[str, Any] def __str__(self) -> str: field_strs = [str(f) for f in self.fields] return f"Schema({', '.join(field_strs)})" - def __repr__(self) -> str: - return self.__str__() - def json(self) -> Dict[str, Any]: return self.json_value diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 53e387159a..1d64acf020 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -1,4 +1,5 @@ import os +from dataclasses import dataclass from typing import Any, List, Optional, Tuple from urllib.parse import urlparse @@ -9,6 +10,7 @@ from .schema import Schema, pyarrow_schema_from_json +@dataclass(init=False) class Metadata: """Create a Metadata instance.""" @@ -55,6 +57,7 @@ def __str__(self) -> str: ) +@dataclass(init=False) class DeltaTable: """Create a DeltaTable instance."""