Skip to content

Commit

Permalink
Improve Python binding (#274)
Browse files Browse the repository at this point in the history
* Refactor Python binding
  • Loading branch information
fvaleye authored May 28, 2021
1 parent 9866f27 commit f6591b5
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 52 deletions.
1 change: 1 addition & 0 deletions python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ project-url = { Repo = "https://github.com/delta-io/delta-rs" }
requires-dist = [
"pyarrow>=4",
'numpy<1.20.0;python_version<="3.6"',
'dataclasses;python_version<="3.6"',
"pandas; extra =='pandas'",
"mypy; extra == 'devel'",
"isort; extra == 'devel'",
Expand Down
2 changes: 1 addition & 1 deletion python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
VENV := venv


.PHON: setup-venv
.PHONY: setup-venv
setup-venv: ## Setup the virtualenv
$(info --- Setup virtualenv ---)
python -m venv $(VENV)
Expand Down
77 changes: 26 additions & 51 deletions python/deltalake/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import pyarrow
Expand All @@ -7,23 +8,17 @@
# https://github.com/delta-io/delta-rs/issues/95


@dataclass
class DataType:
"""
Base class of all Delta data types.
"""

def __init__(self, type_class: str):
self.type = type_class
type: str

def __str__(self) -> str:
return f"DataType({self.type})"

def __repr__(self) -> str:
return self.__str__()

def __eq__(self, other: "DataType") -> bool: # type: ignore
return self.type == other.type

@classmethod
def from_dict(cls, json_dict: Dict[str, Any]) -> "DataType":
"""
Expand Down Expand Up @@ -73,9 +68,15 @@ def from_dict(cls, json_dict: Dict[str, Any]) -> "DataType":
return DataType(type_class)


@dataclass(init=False)
class MapType(DataType):
"""Concrete class for map data types."""

key_type: DataType
value_type: DataType
value_contains_null: bool
type: str

def __init__(
self, key_type: "DataType", value_type: "DataType", value_contains_null: bool
):
Expand All @@ -84,93 +85,67 @@ def __init__(
self.value_type = value_type
self.value_contains_null = value_contains_null

def __eq__(self, other: "DataType") -> bool: # type: ignore
return (
isinstance(other, MapType)
and self.key_type == other.key_type
and self.value_type == other.value_type
and self.value_contains_null == other.value_contains_null
)

def __str__(self) -> str:
return f"DataType(map<{self.key_type}, {self.value_type}, {self.value_contains_null}>)"


@dataclass(init=False)
class ArrayType(DataType):
"""Concrete class for array data types."""

element_type: DataType
contains_null: bool
type: str

def __init__(self, element_type: DataType, contains_null: bool):
super().__init__("array")
self.element_type = element_type
self.contains_null = contains_null

def __eq__(self, other: "DataType") -> bool: # type: ignore
return (
isinstance(other, ArrayType)
and self.element_type == other.element_type
and self.contains_null == other.contains_null
)

def __str__(self) -> str:
return f"DataType(array<{self.element_type}> {self.contains_null})"


@dataclass(init=False)
class StructType(DataType):
"""Concrete class for struct data types."""

fields: List["Field"]
type: str

def __init__(self, fields: List["Field"]):
super().__init__("struct")
self.fields = fields

def __eq__(self, other: "DataType") -> bool: # type: ignore
return isinstance(other, StructType) and self.fields == other.fields

def __str__(self) -> str:
field_strs = [str(f) for f in self.fields]
return f"DataType(struct<{', '.join(field_strs)}>)"


@dataclass
class Field:
"""Create a DeltaTable Field instance."""

def __init__(
self,
name: str,
type: DataType,
nullable: bool,
metadata: Optional[Dict[str, str]] = None,
):
self.type = type
self.name = name
self.nullable = nullable
self.metadata = metadata
name: str
type: DataType
nullable: bool
metadata: Optional[Dict[str, str]] = None

def __str__(self) -> str:
return f"Field({self.name}: {self.type} nullable({self.nullable}) metadata({self.metadata}))"

def __eq__(self, other: "Field") -> bool: # type: ignore
return (
self.type == other.type
and self.name == other.name
and self.nullable == other.nullable
and self.metadata == other.metadata
)


@dataclass
class Schema:
"""Create a DeltaTable Schema instance."""

def __init__(self, fields: List[Field], json_value: Dict[str, Any]):
self.fields = fields
self.json_value = json_value
fields: List[Field]
json_value: Dict[str, Any]

def __str__(self) -> str:
field_strs = [str(f) for f in self.fields]
return f"Schema({', '.join(field_strs)})"

def __repr__(self) -> str:
return self.__str__()

def json(self) -> Dict[str, Any]:
return self.json_value

Expand Down
3 changes: 3 additions & 0 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from dataclasses import dataclass
from typing import Any, List, Optional, Tuple
from urllib.parse import urlparse

Expand All @@ -9,6 +10,7 @@
from .schema import Schema, pyarrow_schema_from_json


@dataclass(init=False)
class Metadata:
"""Create a Metadata instance."""

Expand Down Expand Up @@ -55,6 +57,7 @@ def __str__(self) -> str:
)


@dataclass(init=False)
class DeltaTable:
"""Create a DeltaTable instance."""

Expand Down

0 comments on commit f6591b5

Please sign in to comment.