Skip to content

Commit cbaca2e

Browse files
committed
Merge remote-tracking branch 'origin/master' into load-indices
2 parents 668ffeb + 3eac7fa commit cbaca2e

File tree

169 files changed

+8506
-1622
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

169 files changed

+8506
-1622
lines changed

.github/workflows/build-and-test.yml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,15 @@ jobs:
6464
- name: Free up disk space
6565
run: |
6666
sudo apt-get remove 'dotnet-*' azure-cli || true
67+
sudo rm -rf /usr/local/.ghcup || true
68+
sudo rm -rf /usr/share/dotnet || true
69+
sudo rm -rf /usr/share/swift || true
70+
sudo rm -rf /usr/local/julia* || true
71+
sudo rm -rf /usr/local/share/powershell || true
72+
sudo rm -rf /usr/share/miniconda || true
6773
sudo rm -rf /usr/local/lib/android/ || true
68-
sudo docker image prune -a -f || true
74+
sudo docker system prune -a -f || true
75+
df -h
6976
- uses: szenius/[email protected]
7077
with:
7178
timezoneLinux: ${{ matrix.timezone }}
@@ -89,6 +96,13 @@ jobs:
8996
distribution: "zulu"
9097
java-version: 17
9198
- uses: gradle/actions/setup-gradle@v4
99+
- name: Disk Space Analysis
100+
run: |
101+
echo "=== Disk Usage Overview ==="
102+
df -h
103+
104+
echo -e "\n=== Docker Disk Usage ==="
105+
docker system df -v
92106
- name: Gradle build (and test) for NOT metadata ingestion
93107
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
94108
# datahub-schematron:cli excluded due to dependency on metadata-ingestion
@@ -112,6 +126,13 @@ jobs:
112126
-x :metadata-integration:java:datahub-schematron:cli:test
113127
env:
114128
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
129+
- name: Disk Space Analysis
130+
run: |
131+
echo "=== Disk Usage Overview ==="
132+
df -h
133+
134+
echo -e "\n=== Docker Disk Usage ==="
135+
docker system df -v
115136
- name: Gradle build (and test) for frontend
116137
if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }}
117138
run: |

.github/workflows/docker-unified.yml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,11 +422,17 @@ jobs:
422422
MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
423423
steps:
424424
- name: Free up disk space
425-
if: ${{ !contains(needs.setup.outputs.test_runner_type, 'depot') }}
426425
run: |
427426
sudo apt-get remove 'dotnet-*' azure-cli || true
427+
sudo rm -rf /usr/local/.ghcup || true
428+
sudo rm -rf /usr/share/dotnet || true
429+
sudo rm -rf /usr/share/swift || true
430+
sudo rm -rf /usr/local/julia* || true
431+
sudo rm -rf /usr/local/share/powershell || true
432+
sudo rm -rf /usr/share/miniconda || true
428433
sudo rm -rf /usr/local/lib/android/ || true
429-
sudo docker image prune -a -f || true
434+
sudo docker system prune -a -f || true
435+
df -h
430436
431437
- uses: actions/cache/restore@v4
432438
with:
@@ -468,6 +474,14 @@ jobs:
468474
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
469475
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
470476

477+
- name: Disk Space Analysis
478+
run: |
479+
echo "=== Disk Usage Overview ==="
480+
df -h
481+
482+
echo -e "\n=== Docker Disk Usage ==="
483+
docker system df -v
484+
471485
- name: build images
472486
if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
473487
run: |
@@ -482,6 +496,13 @@ jobs:
482496
depot pull --project ${{ env.DEPOT_PROJECT_ID }} ${{ needs.base_build.outputs.build_id }}
483497
docker images
484498
499+
- name: Disk Space Analysis
500+
run: |
501+
echo "=== Disk Usage Overview ==="
502+
df -h
503+
504+
echo -e "\n=== Docker Disk Usage ==="
505+
docker system df -v
485506
- name: run quickstart
486507
env:
487508
DATAHUB_TELEMETRY_ENABLED: false

build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ buildscript {
4646
ext.neo4jApocVersion = '5.20.0'
4747
ext.testContainersVersion = '1.21.1'
4848
ext.elasticsearchVersion = '2.11.1' // ES 7.10, Opensearch 1.x, 2.x
49+
ext.elasticsearch8Version = '8.17.4' // ES 8.x Java client
4950
ext.jacksonVersion = '2.18.4'
5051
ext.jettyVersion = '12.0.21'
5152
// see also datahub-frontend/play.gradle
@@ -151,6 +152,8 @@ project.ext.externalDependency = [
151152
'ebeanDdl': 'io.ebean:ebean-ddl-generator:' + ebeanVersion,
152153
'ebeanQueryBean': 'io.ebean:querybean-generator:' + ebeanVersion,
153154
'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion,
155+
// Multi-client shim dependencies
156+
'elasticsearch8Client': 'co.elastic.clients:elasticsearch-java:' + elasticsearch8Version,
154157
'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1',
155158
'graphqlJava': 'com.graphql-java:graphql-java:22.3',
156159
'graphqlJavaScalars': 'com.graphql-java:graphql-java-extended-scalars:22.0',

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
1414
import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants;
1515
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
16+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
1617
import java.util.List;
1718
import java.util.Map;
1819
import java.util.Optional;
@@ -24,7 +25,6 @@
2425
import org.opensearch.action.search.SearchRequest;
2526
import org.opensearch.action.search.SearchResponse;
2627
import org.opensearch.client.RequestOptions;
27-
import org.opensearch.client.RestHighLevelClient;
2828
import org.opensearch.index.query.BoolQueryBuilder;
2929
import org.opensearch.index.query.QueryBuilder;
3030
import org.opensearch.index.query.QueryBuilders;
@@ -45,7 +45,7 @@
4545
@RequiredArgsConstructor
4646
public class AnalyticsService {
4747

48-
private final RestHighLevelClient _elasticClient;
48+
private final SearchClientShim<?> _elasticClient;
4949
private final IndexConvention _indexConvention;
5050

5151
private static final String FILTERED = "filtered";

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NonBlockingConfigs.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
import com.linkedin.metadata.search.SearchService;
2020
import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
2121
import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO;
22+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
2223
import io.datahubproject.metadata.context.OperationContext;
23-
import org.opensearch.client.RestHighLevelClient;
2424
import org.springframework.beans.factory.annotation.Autowired;
2525
import org.springframework.beans.factory.annotation.Qualifier;
2626
import org.springframework.beans.factory.annotation.Value;
@@ -76,7 +76,7 @@ public NonBlockingSystemUpgrade backfillProcessInstancesHasRunEvents(
7676
final OperationContext opContext,
7777
EntityService<?> entityService,
7878
ElasticSearchService elasticSearchService,
79-
RestHighLevelClient restHighLevelClient,
79+
SearchClientShim<?> restHighLevelClient,
8080
@Value("${systemUpdate.processInstanceHasRunEvents.enabled}") final boolean enabled,
8181
@Value("${systemUpdate.processInstanceHasRunEvents.reprocess.enabled}")
8282
boolean reprocessEnabled,

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/LoadIndicesIndexManager.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import com.linkedin.metadata.models.AspectSpec;
44
import com.linkedin.metadata.models.registry.EntityRegistry;
55
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
6+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
7+
import com.linkedin.metadata.utils.elasticsearch.responses.GetIndexResponse;
68
import java.io.IOException;
79
import java.util.HashSet;
810
import java.util.Set;
@@ -11,9 +13,7 @@
1113
import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse;
1214
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
1315
import org.opensearch.client.RequestOptions;
14-
import org.opensearch.client.RestHighLevelClient;
1516
import org.opensearch.client.indices.GetIndexRequest;
16-
import org.opensearch.client.indices.GetIndexResponse;
1717
import org.opensearch.common.settings.Settings;
1818

1919
/**
@@ -26,7 +26,7 @@ public class LoadIndicesIndexManager {
2626
private static final String REFRESH_INTERVAL_SETTING = "index.refresh_interval";
2727
private static final String DISABLED_REFRESH_INTERVAL = "-1";
2828

29-
private final RestHighLevelClient searchClient;
29+
private final SearchClientShim<?> searchClient;
3030
private final IndexConvention indexConvention;
3131
private final EntityRegistry entityRegistry;
3232
private final String configuredRefreshInterval;
@@ -35,7 +35,7 @@ public class LoadIndicesIndexManager {
3535
private boolean indicesDiscovered = false;
3636

3737
public LoadIndicesIndexManager(
38-
RestHighLevelClient searchClient,
38+
SearchClientShim<?> searchClient,
3939
IndexConvention indexConvention,
4040
EntityRegistry entityRegistry,
4141
String configuredRefreshInterval) {
@@ -56,7 +56,7 @@ public Set<String> discoverDataHubIndices() throws IOException {
5656

5757
// Get all existing indices
5858
GetIndexRequest request = new GetIndexRequest("*");
59-
GetIndexResponse response = searchClient.indices().get(request, RequestOptions.DEFAULT);
59+
GetIndexResponse response = searchClient.getIndex(request, RequestOptions.DEFAULT);
6060
String[] allIndices = response.getIndices();
6161

6262
log.info("Found {} total indices in Elasticsearch", allIndices.length);
@@ -201,8 +201,7 @@ private String getIndexRefreshInterval(String indexName) throws IOException {
201201
.includeDefaults(true)
202202
.names(REFRESH_INTERVAL_SETTING);
203203

204-
GetSettingsResponse response =
205-
searchClient.indices().getSettings(request, RequestOptions.DEFAULT);
204+
GetSettingsResponse response = searchClient.getIndexSettings(request, RequestOptions.DEFAULT);
206205
return response.getSetting(indexName, REFRESH_INTERVAL_SETTING);
207206
}
208207

@@ -212,7 +211,7 @@ private void setIndexRefreshInterval(String indexName, String interval) throws I
212211
Settings settings = Settings.builder().put(REFRESH_INTERVAL_SETTING, interval).build();
213212
request.settings(settings);
214213

215-
searchClient.indices().putSettings(request, RequestOptions.DEFAULT);
214+
searchClient.updateIndexSettings(request, RequestOptions.DEFAULT);
216215
}
217216

218217
/** Returns true if refresh intervals are currently disabled. */

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/loadindices/config/LoadIndicesConfig.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
import com.linkedin.metadata.service.UpdateIndicesService;
1313
import com.linkedin.metadata.systemmetadata.SystemMetadataService;
1414
import com.linkedin.metadata.timeseries.TimeseriesAspectService;
15+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
1516
import io.datahubproject.metadata.context.OperationContext;
1617
import io.ebean.Database;
1718
import javax.annotation.Nonnull;
1819
import lombok.extern.slf4j.Slf4j;
19-
import org.opensearch.client.RestHighLevelClient;
2020
import org.springframework.beans.factory.annotation.Qualifier;
2121
import org.springframework.beans.factory.annotation.Value;
2222
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
@@ -47,7 +47,7 @@ public KafkaEventProducer noOpKafkaEventProducer() {
4747
@Nonnull
4848
public LoadIndicesIndexManager createIndexManager(
4949
@Qualifier("systemOperationContext") final OperationContext systemOperationContext,
50-
@Qualifier("elasticSearchRestHighLevelClient") final RestHighLevelClient searchClient,
50+
@Qualifier("searchClientShim") SearchClientShim<?> searchClient,
5151
@Value("${elasticsearch.index.refreshIntervalSeconds:3}")
5252
final String configuredRefreshInterval)
5353
throws Exception {

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstances.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade;
66
import com.linkedin.metadata.entity.EntityService;
77
import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
8+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
89
import io.datahubproject.metadata.context.OperationContext;
910
import java.util.List;
10-
import org.opensearch.client.RestHighLevelClient;
1111

1212
public class BackfillDataProcessInstances implements NonBlockingSystemUpgrade {
1313

@@ -17,7 +17,7 @@ public BackfillDataProcessInstances(
1717
OperationContext opContext,
1818
EntityService<?> entityService,
1919
ElasticSearchService elasticSearchService,
20-
RestHighLevelClient restHighLevelClient,
20+
SearchClientShim<?> restHighLevelClient,
2121
boolean enabled,
2222
boolean reprocessEnabled,
2323
Integer batchSize,

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import com.linkedin.metadata.entity.EntityService;
1515
import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
1616
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
17+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
1718
import com.linkedin.upgrade.DataHubUpgradeState;
1819
import io.datahubproject.metadata.context.OperationContext;
1920
import java.io.IOException;
@@ -28,7 +29,6 @@
2829
import org.opensearch.action.search.SearchRequest;
2930
import org.opensearch.action.search.SearchResponse;
3031
import org.opensearch.client.RequestOptions;
31-
import org.opensearch.client.RestHighLevelClient;
3232
import org.opensearch.index.query.QueryBuilder;
3333
import org.opensearch.index.query.QueryBuilders;
3434
import org.opensearch.search.aggregations.Aggregation;
@@ -47,7 +47,7 @@ public class BackfillDataProcessInstancesHasRunEventsStep implements UpgradeStep
4747
private final OperationContext opContext;
4848
private final EntityService<?> entityService;
4949
private final ElasticSearchService elasticSearchService;
50-
private final RestHighLevelClient restHighLevelClient;
50+
private final SearchClientShim<?> restHighLevelClient;
5151

5252
private final boolean reprocessEnabled;
5353
private final Integer batchSize;
@@ -60,7 +60,7 @@ public BackfillDataProcessInstancesHasRunEventsStep(
6060
OperationContext opContext,
6161
EntityService<?> entityService,
6262
ElasticSearchService elasticSearchService,
63-
RestHighLevelClient restHighLevelClient,
63+
SearchClientShim<?> restHighLevelClient,
6464
boolean reprocessEnabled,
6565
Integer batchSize,
6666
Integer batchDelayMs,

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
6363
boolean ack =
6464
esComponents
6565
.getSearchClient()
66-
.indices()
67-
.putSettings(request, RequestOptions.DEFAULT)
66+
.updateIndexSettings(request, RequestOptions.DEFAULT)
6867
.isAcknowledged();
6968
log.info(
7069
"Updated index {} with new settings. Settings: {}, Acknowledged: {}",

0 commit comments

Comments
 (0)