Skip to content

Commit

Permalink
Cap decimals to the maximum Parquet's precision
Browse files Browse the repository at this point in the history
  • Loading branch information
exAspArk committed Dec 11, 2024
1 parent e576386 commit bc4a862
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 11 deletions.
2 changes: 1 addition & 1 deletion src/init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ var TEST_PG_SCHEMA_COLUMNS = []PgSchemaColumn{
ColumnName: "numeric_column",
DataType: "numeric",
UdtName: "numeric",
NumericPrecision: "10",
NumericPrecision: "40", // Will be capped to 38
NumericScale: "2",
Namespace: "pg_catalog",
},
Expand Down
2 changes: 1 addition & 1 deletion src/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"time"
)

const VERSION = "0.22.4"
const VERSION = "0.22.5"

func main() {
config := LoadConfig()
Expand Down
21 changes: 13 additions & 8 deletions src/pg_schema_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ const (
PARQUET_SCHEMA_REPETITION_TYPE_REQUIRED = "REQUIRED"
PARQUET_SCHEMA_REPETITION_TYPE_OPTIONAL = "OPTIONAL"

PARQUET_NAN = "NaN"
PARQUET_NAN = "NaN"
PARQUET_MAX_PRECISION = 38

// 0000-01-01 00:00:00 +0000 UTC
EPOCH_TIME_MS = -62167219200000
Expand Down Expand Up @@ -107,7 +108,7 @@ func (pgSchemaColumn PgSchemaColumn) ToParquetSchemaFieldMap() map[string]interf
func (pgSchemaColumn PgSchemaColumn) ToIcebergSchemaFieldMap() IcebergSchemaField {
icebergSchemaField := IcebergSchemaField{}

id, err := strconv.Atoi(pgSchemaColumn.OrdinalPosition)
id, err := StringToInt(pgSchemaColumn.OrdinalPosition)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -183,13 +184,17 @@ func (pgSchemaColumn *PgSchemaColumn) toParquetSchemaField() ParquetSchemaField
// Set other field properties
switch pgSchemaColumn.UdtName {
case "numeric":
parquetSchemaField.Scale = pgSchemaColumn.NumericScale
parquetSchemaField.Precision = pgSchemaColumn.NumericPrecision
scale, err := strconv.Atoi(pgSchemaColumn.NumericScale)
scale, err := StringToInt(pgSchemaColumn.NumericScale)
PanicIfError(err)
precision, err := strconv.Atoi(pgSchemaColumn.NumericPrecision)
precision, err := StringToInt(pgSchemaColumn.NumericPrecision)
PanicIfError(err)
parquetSchemaField.Length = strconv.Itoa(scale + precision)
if precision > PARQUET_MAX_PRECISION {
precision = PARQUET_MAX_PRECISION
}

parquetSchemaField.Scale = IntToString(scale)
parquetSchemaField.Precision = IntToString(precision)
parquetSchemaField.Length = IntToString(scale + precision)
case "uuid":
parquetSchemaField.Length = "36"
default:
Expand All @@ -215,7 +220,7 @@ func (pgSchemaColumn *PgSchemaColumn) parquetPrimitiveValue(value string) interf
trimmedValue := strings.TrimRight(value, " ")
return trimmedValue
case "int2", "int4":
intValue, err := strconv.Atoi(value)
intValue, err := StringToInt(value)
PanicIfError(err)
return int32(intValue)
case "int8":
Expand Down
7 changes: 6 additions & 1 deletion src/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"golang.org/x/crypto/pbkdf2"
"os"
"strconv"
)

func PanicIfError(err error, message ...string) {
Expand All @@ -32,7 +33,11 @@ func DeleteTemporaryFile(file *os.File) {
}

func IntToString(i int) string {
return fmt.Sprintf("%d", i)
return strconv.Itoa(i)
}

func StringToInt(s string) (int, error) {
return strconv.Atoi(s)
}

func StringToScramSha256(password string) string {
Expand Down

0 comments on commit bc4a862

Please sign in to comment.