Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[duckdb] Created a CLI tool to interact with DuckDBDaVinciRecordTransformer #1473

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions integrations/venice-duckdb/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies {

api project(':clients:da-vinci-client')
api project(':internal:venice-client-common')
api project(':clients:venice-thin-client')

api project(':internal:venice-common')

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.linkedin.venice.duckdb;

import static com.linkedin.venice.ConfigKeys.DA_VINCI_CURRENT_VERSION_BOOTSTRAPPING_SPEEDUP_ENABLED;

import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
import com.linkedin.d2.balancer.D2Client;
import com.linkedin.d2.balancer.D2ClientBuilder;
import com.linkedin.davinci.client.DaVinciClient;
import com.linkedin.davinci.client.DaVinciConfig;
import com.linkedin.davinci.client.DaVinciRecordTransformerConfig;
import com.linkedin.davinci.client.factory.CachingDaVinciClientFactory;
import com.linkedin.venice.D2.D2ClientUtils;
import com.linkedin.venice.utils.PropertyBuilder;
import com.linkedin.venice.utils.VeniceProperties;
import io.tehuti.metrics.MetricsRepository;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.io.IOUtils;


public class DuckDBDaVinciRecordTransformerExample {
kvargha marked this conversation as resolved.
Show resolved Hide resolved
public static void main(String[] args) throws Exception {
DaVinciConfig clientConfig = new DaVinciConfig();
String clusterDiscoveryD2ServiceName = "venice-discovery";
MetricsRepository metricsRepository = new MetricsRepository();
String zkHosts = "zk-example-url.com:12913";
kvargha marked this conversation as resolved.
Show resolved Hide resolved

VeniceProperties backendConfig =
new PropertyBuilder().put(DA_VINCI_CURRENT_VERSION_BOOTSTRAPPING_SPEEDUP_ENABLED, true).build();

String outputDir = "/example/output/dir";
String storeName = "example_store_name";
kvargha marked this conversation as resolved.
Show resolved Hide resolved
Set<String> columnsToProject = Collections.emptySet();

String schema = IOUtils.toString(
Objects.requireNonNull(
DuckDBDaVinciRecordTransformerExample.class.getClassLoader().getResourceAsStream("ValueSchema.avsc")),
StandardCharsets.UTF_8);
Schema outputSchema = AvroCompatibilityHelper.parse(schema);
kvargha marked this conversation as resolved.
Show resolved Hide resolved

DaVinciRecordTransformerConfig recordTransformerConfig = new DaVinciRecordTransformerConfig(
(storeVersion, keySchema, inputValueSchema, outputValueSchema) -> new DuckDBDaVinciRecordTransformer(
storeVersion,
keySchema,
inputValueSchema,
outputValueSchema,
false,
outputDir,
storeName,
columnsToProject),
GenericRecord.class,
outputSchema);
clientConfig.setRecordTransformerConfig(recordTransformerConfig);

D2Client d2Client = new D2ClientBuilder().setZkHosts(zkHosts)
.setZkSessionTimeout(3, TimeUnit.SECONDS)
.setZkStartupTimeout(3, TimeUnit.SECONDS)
.build();
D2ClientUtils.startClient(d2Client);

try (CachingDaVinciClientFactory factory =
new CachingDaVinciClientFactory(d2Client, clusterDiscoveryD2ServiceName, metricsRepository, backendConfig)) {
DaVinciClient<Integer, Object> clientWithRecordTransformer =
factory.getAndStartGenericAvroClient(storeName, clientConfig);

// Data will get written to DucksDB
clientWithRecordTransformer.subscribeAll().get();
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"type" : "record",
"name" : "nameRecord",
"namespace" : "example.avro",
"fields" : [ {
"name" : "firstName",
"type" : "string",
"default" : "",
"custom_prop" : "custom_prop_value_2, custom_prop_value_1"
}, {
"name" : "lastName",
"type" : "string",
"default" : "",
"custom_prop" : "custom_prop_value_2, custom_prop_value_1"
}, {
"name" : "age",
"type" : "int",
"default" : -1,
"custom_prop" : "custom_prop_value_2, custom_prop_value_1"
} ]
}
Loading