Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1011,4 +1011,28 @@ <VOut> KErrorStreamX<K, V, K, VOut> processValuesCapturingErrors(
FixedKeyProcessorSupplier<? super K, ? super V, VOut> processorSupplier,
java.util.function.Predicate<Exception> errorFilter,
Named named, String... stateStoreNames);

/**
* Add lineage information to each record in the stream. This will add the following headers
* <ul>
* <li>{@link LineageHeaders#TOPIC_HEADER}</li>
* <li>{@link LineageHeaders#PARTITION_HEADER}</li>
* <li>{@link LineageHeaders#OFFSET_HEADER}</li>
* </ul>
*
* @return stream with added headers
*/
KStreamX<K, V> withLineage();

/**
* Add lineage information to each record in the stream. This will add the following headers
* <ul>
* <li>{@link LineageHeaders#TOPIC_HEADER}</li>
* <li>{@link LineageHeaders#PARTITION_HEADER}</li>
* <li>{@link LineageHeaders#OFFSET_HEADER}</li>
* </ul>
* @param named a {@link Named} config used to name the processor in the topology
* @return stream with added headers
*/
KStreamX<K, V> withLineage(Named named);
}
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,16 @@ public <VOut> KErrorStreamX<K, V, K, VOut> processValuesCapturingErrors(
ErrorCapturingValueProcessor.captureErrors(processorSupplier, errorFilter), named, stateStoreNames);
}

@Override
public KStreamX<K, V> withLineage() {
return this.processValues(LineageProcessor::new);
}

@Override
public KStreamX<K, V> withLineage(final Named named) {
return this.processValues(LineageProcessor::new, named);
}

private <KR, VR> KeyValueKErrorStreamX<K, V, KR, VR> mapCapturingErrorsInternal(
final KeyValueMapper<K, V, KeyValue<KR, ProcessedKeyValue<K, V, VR>>> mapper) {
final KStreamX<KR, ProcessedKeyValue<K, V, VR>> map = this.map(mapper);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,4 +429,55 @@ <VR, KO, VO> KTableX<K, VR> leftJoin(KTable<KO, VO> other, BiFunction<K, V, KO>
<VR, KO, VO> KTableX<K, VR> leftJoin(KTable<KO, VO> other, BiFunction<K, V, KO> foreignKeyExtractor,
ValueJoiner<V, VO, VR> joiner, TableJoined<K, KO> tableJoined,
MaterializedX<K, VR, KeyValueStore<Bytes, byte[]>> materialized);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the use case for adding lineage to a KTable?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can create a table directly via the StreamsBuilder. And because there is also the transformer API available, I added it for completeness

/**
* Add lineage information to each record in the stream. This will add the following headers
* <ul>
* <li>{@link LineageHeaders#TOPIC_HEADER}</li>
* <li>{@link LineageHeaders#PARTITION_HEADER}</li>
* <li>{@link LineageHeaders#OFFSET_HEADER}</li>
* </ul>
*
* @return stream with added headers
*/
KTableX<K, V> withLineage();

/**
* Add lineage information to each record in the stream. This will add the following headers
* <ul>
* <li>{@link LineageHeaders#TOPIC_HEADER}</li>
* <li>{@link LineageHeaders#PARTITION_HEADER}</li>
* <li>{@link LineageHeaders#OFFSET_HEADER}</li>
* </ul>
* @param named a {@link Named} config used to name the processor in the topology
* @return stream with added headers
*/
KTableX<K, V> withLineage(Named named);

/**
* Add lineage information to each record in the stream. This will add the following headers
* <ul>
* <li>{@link LineageHeaders#TOPIC_HEADER}</li>
* <li>{@link LineageHeaders#PARTITION_HEADER}</li>
* <li>{@link LineageHeaders#OFFSET_HEADER}</li>
* </ul>
* @param materialized an instance of {@link Materialized} used to describe how the state store of the resulting
* table should be materialized. Cannot be {@code null}
* @return stream with added headers
*/
KTableX<K, V> withLineage(MaterializedX<K, V, KeyValueStore<Bytes, byte[]>> materialized);

/**
* Add lineage information to each record in the stream. This will add the following headers
* <ul>
* <li>{@link LineageHeaders#TOPIC_HEADER}</li>
* <li>{@link LineageHeaders#PARTITION_HEADER}</li>
* <li>{@link LineageHeaders#OFFSET_HEADER}</li>
* </ul>
* @param materialized an instance of {@link Materialized} used to describe how the state store of the resulting
* table should be materialized. Cannot be {@code null}
* @param named a {@link Named} config used to name the processor in the topology
* @return stream with added headers
*/
KTableX<K, V> withLineage(MaterializedX<K, V, KeyValueStore<Bytes, byte[]>> materialized, Named named);
}
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,27 @@ public <VR, KO, VO> KTableX<K, VR> leftJoin(final KTable<KO, VO> other,
materialized.configure(this.context.getConfigurator()));
}

@Override
public KTableX<K, V> withLineage() {
return this.transformValues(LineageTransformer::new);
}

@Override
public KTableX<K, V> withLineage(final Named named) {
return this.transformValues(LineageTransformer::new, named);
}

@Override
public KTableX<K, V> withLineage(final MaterializedX<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
return this.transformValues(LineageTransformer::new, materialized);
}

@Override
public KTableX<K, V> withLineage(final MaterializedX<K, V, KeyValueStore<Bytes, byte[]>> materialized,
final Named named) {
return this.transformValues(LineageTransformer::new, materialized, named);
}

@Override
public String queryableStoreName() {
return this.wrapped.queryableStoreName();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* MIT License
*
* Copyright (c) 2025 bakdata
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package com.bakdata.kafka.streams.kstream;

import java.nio.charset.StandardCharsets;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.experimental.Accessors;
import org.apache.kafka.common.header.Headers;

/**
* Configure headers for data lineage of Kafka messages
*/
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class LineageHeaders {
private static final String LINEAGE_PREFIX = "lineage.";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should use a namespace that contains streams-bootstrap or make the header names configurable

/**
* Header indicating the topic the record was read from.
*/
public static final String TOPIC_HEADER = LINEAGE_PREFIX + "topic";
/**
* Header indicating the partition the record was read from.
*/
public static final String PARTITION_HEADER = LINEAGE_PREFIX + "partition";
/**
* Header indicating the offset the record was read from.
*/
public static final String OFFSET_HEADER = LINEAGE_PREFIX + "offset";

@Getter(AccessLevel.PACKAGE)
@Accessors(fluent = true)
private final @NonNull Headers headers;

LineageHeaders addTopicHeader(final String topic) {
if (topic == null) {
return this;
}
return new LineageHeaders(this.headers.add(TOPIC_HEADER, topic.getBytes(StandardCharsets.UTF_8)));
}

LineageHeaders addPartitionHeader(final int partition) {
if (partition < 0) {
return this;
}
//TODO serialize more compact as int? But then UI tools usually can't handle it
return new LineageHeaders(
this.headers.add(PARTITION_HEADER, Integer.toString(partition).getBytes(StandardCharsets.UTF_8)));
}

LineageHeaders addOffsetHeader(final long offset) {
if (offset < 0) {
return this;
}
//TODO serialize more compact as long? But then UI tools usually can't handle it
return new LineageHeaders(
this.headers.add(OFFSET_HEADER, Long.toString(offset).getBytes(StandardCharsets.UTF_8)));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* MIT License
*
* Copyright (c) 2025 bakdata
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package com.bakdata.kafka.streams.kstream;

import java.util.Optional;
import org.apache.kafka.common.header.Headers;
import org.apache.kafka.common.header.internals.RecordHeaders;
import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
import org.apache.kafka.streams.processor.api.FixedKeyProcessorContext;
import org.apache.kafka.streams.processor.api.FixedKeyRecord;
import org.apache.kafka.streams.processor.api.RecordMetadata;

class LineageProcessor<K, V> implements FixedKeyProcessor<K, V, V> {
private FixedKeyProcessorContext<K, V> context;

private static Headers addHeaders(final Headers headers, final RecordMetadata metadata) {
return new LineageHeaders(new RecordHeaders(headers))
.addTopicHeader(metadata.topic())
.addPartitionHeader(metadata.partition())
.addOffsetHeader(metadata.offset())
.headers();
}

@Override
public void init(final FixedKeyProcessorContext<K, V> context) {
this.context = context;
}

@Override
public void process(final FixedKeyRecord<K, V> rekord) {
final Optional<RecordMetadata> metadata = this.context.recordMetadata();
final Headers headers = rekord.headers();
final Headers newHeaders = metadata.map(m -> addHeaders(headers, m))
.orElse(headers);
this.context.forward(rekord.withHeaders(newHeaders));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* MIT License
*
* Copyright (c) 2025 bakdata
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package com.bakdata.kafka.streams.kstream;

import org.apache.kafka.streams.kstream.ValueTransformerWithKey;
import org.apache.kafka.streams.processor.ProcessorContext;

class LineageTransformer<K, V> implements ValueTransformerWithKey<K, V, V> {

private ProcessorContext context;

@Override
public void init(final ProcessorContext context) {
this.context = context;
}

@Override
public V transform(final K readOnlyKey, final V value) {
new LineageHeaders(this.context.headers())
.addTopicHeader(this.context.topic())
.addPartitionHeader(this.context.partition())
.addOffsetHeader(this.context.offset());
return value;
}

@Override
public void close() {
// do nothing
}
}
Loading
Loading