Skip to content

Commit

Permalink
Adjust Parquet row group size
Browse files Browse the repository at this point in the history
  • Loading branch information
exAspArk committed Dec 10, 2024
1 parent ba0818d commit 91d48b6
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 16 deletions.
4 changes: 1 addition & 3 deletions scripts/install.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash

VERSION="0.22.1"

# Detect OS and architecture
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
Expand All @@ -22,7 +20,7 @@ esac

# Set the download URL and binary name
BINARY_NAME="bemidb-${OS}-${ARCH}"
DOWNLOAD_URL="https://github.com/BemiHQ/BemiDB/releases/download/v$VERSION/$BINARY_NAME"
DOWNLOAD_URL="https://github.com/BemiHQ/BemiDB/releases/latest/download/$BINARY_NAME"

# Download the binary
echo "Downloading $DOWNLOAD_URL..."
Expand Down
2 changes: 1 addition & 1 deletion src/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"time"
)

const VERSION = "0.22.1"
const VERSION = "0.22.2"

func main() {
config := LoadConfig()
Expand Down
12 changes: 0 additions & 12 deletions src/storage.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
package main

import (
"github.com/xitongsys/parquet-go/parquet"
)

const (
PARQUET_PARALLEL_NUMBER = 4
PARQUET_ROW_GROUP_SIZE = 128 * 1024 * 1024 // 128 MB
PARQUET_COMPRESSION_TYPE = parquet.CompressionCodec_ZSTD

VERSION_HINT_FILE_NAME = "version-hint.text"
)

var STORAGE_TYPES = []string{STORAGE_TYPE_LOCAL, STORAGE_TYPE_S3}

type ParquetFileStats struct {
Expand Down
14 changes: 14 additions & 0 deletions src/storage_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,21 @@ import (

"github.com/google/uuid"
"github.com/linkedin/goavro"
"github.com/xitongsys/parquet-go/parquet"
"github.com/xitongsys/parquet-go/reader"
"github.com/xitongsys/parquet-go/schema"
"github.com/xitongsys/parquet-go/source"
"github.com/xitongsys/parquet-go/writer"
)

const (
PARQUET_PARALLEL_NUMBER = 4
PARQUET_ROW_GROUP_SIZE = 64 * 1024 * 1024 // 64 MB
PARQUET_COMPRESSION_TYPE = parquet.CompressionCodec_ZSTD

VERSION_HINT_FILE_NAME = "version-hint.text"
)

type StorageBase struct {
config *Config
}
Expand All @@ -43,6 +52,7 @@ func (storage *StorageBase) WriteParquetFile(fileWriter source.ParquetFile, pgSc

parquetWriter.RowGroupSize = PARQUET_ROW_GROUP_SIZE
parquetWriter.CompressionType = PARQUET_COMPRESSION_TYPE
totalRowCount := 0

rows := loadRows()
for len(rows) > 0 {
Expand All @@ -59,9 +69,13 @@ func (storage *StorageBase) WriteParquetFile(fileWriter source.ParquetFile, pgSc
}
recordCount++
}
totalRowCount += len(rows)
LogDebug(storage.config, "Wrote", totalRowCount, "rows to Parquet file...")

rows = loadRows()
}

LogDebug(storage.config, "Stopping Parquet writer...")
if err := parquetWriter.WriteStop(); err != nil {
return 0, fmt.Errorf("Failed to stop Parquet writer: %v", err)
}
Expand Down

0 comments on commit 91d48b6

Please sign in to comment.