Skip to content

Commit d110b97

Browse files
[Improvement][Seatunnel-web][Hive-JDBC] Add Hive-JDBC into seatunnel-web (#218)
1 parent ddd25bf commit d110b97

File tree

11 files changed

+242
-48
lines changed

11 files changed

+242
-48
lines changed

seatunnel-datasource/seatunnel-datasource-client/src/main/java/org/apache/seatunnel/datasource/classloader/DatasourceLoadConfig.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ public class DatasourceLoadConfig {
7373
classLoaderFactoryName.put(
7474
"JDBC-TIDB",
7575
"org.apache.seatunnel.datasource.plugin.tidb.jdbc.TidbJdbcDataSourceFactory");
76+
classLoaderFactoryName.put(
77+
"JDBC-HIVE",
78+
"org.apache.seatunnel.datasource.plugin.hive.jdbc.HiveJdbcDataSourceFactory");
7679
classLoaderFactoryName.put(
7780
"KAFKA", "org.apache.seatunnel.datasource.plugin.kafka.KafkaDataSourceFactory");
7881
classLoaderFactoryName.put(
@@ -124,6 +127,7 @@ public class DatasourceLoadConfig {
124127
classLoaderJarName.put("JDBC-STARROCKS", "datasource-jdbc-starrocks-");
125128
classLoaderJarName.put("MONGODB", "datasource-mongodb-");
126129
classLoaderJarName.put("JDBC-DB2", "datasource-jdbc-db2-");
130+
classLoaderJarName.put("JDBC-HIVE", "datasource-jdbc-hive-");
127131
classLoaderJarName.put("FAKESOURCE", "datasource-fakesource-");
128132
classLoaderJarName.put("CONSOLE", "datasource-console-");
129133
}
@@ -138,6 +142,7 @@ public class DatasourceLoadConfig {
138142
"JDBC-Postgres",
139143
"JDBC-SQLServer",
140144
"JDBC-TiDB",
145+
"JDBC-Hive",
141146
"Kafka",
142147
"MySQL-CDC",
143148
"S3",

seatunnel-datasource/seatunnel-datasource-plugins/datasource-hive/pom.xml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
<artifactId>datasource-hive</artifactId>
2626

2727
<properties>
28-
<hive.exec.version>3.1.3</hive.exec.version>
2928
<guava.version>24.1-jre</guava.version>
3029
</properties>
3130

@@ -51,7 +50,7 @@
5150
<dependency>
5251
<groupId>org.apache.hive</groupId>
5352
<artifactId>hive-exec</artifactId>
54-
<version>${hive.exec.version}</version>
53+
<version>${hive.version}</version>
5554
<exclusions>
5655
<exclusion>
5756
<groupId>log4j</groupId>

seatunnel-datasource/seatunnel-datasource-plugins/datasource-jdbc-hive/pom.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@
3131
<version>${project.version}</version>
3232
<scope>provided</scope>
3333
</dependency>
34+
<dependency>
35+
<groupId>org.apache.hive</groupId>
36+
<artifactId>hive-jdbc</artifactId>
37+
<version>${hive.version}</version>
38+
<scope>provided</scope>
39+
</dependency>
40+
<dependency>
41+
<groupId>org.apache.hadoop</groupId>
42+
<artifactId>hadoop-common</artifactId>
43+
<version>${hadoop.version}</version>
44+
<scope>provided</scope>
45+
</dependency>
3446
</dependencies>
3547

3648
<build>

seatunnel-datasource/seatunnel-datasource-plugins/datasource-jdbc-hive/src/main/java/org/apache/seatunnel/datasource/plugin/hive/jdbc/HiveJdbcConstants.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
import java.util.Set;
2323

2424
public class HiveJdbcConstants {
25-
25+
public static final String PLUGIN_NAME = "JDBC-Hive";
2626
public static final Set<String> HIVE_SYSTEM_DATABASES =
27-
Sets.newHashSet(
28-
"information_schema", "mysql", "performance_schema", "sys", "test", "hivedb");
27+
Sets.newHashSet("information_schema", "performance_schema", "sys");
2928
}

seatunnel-datasource/seatunnel-datasource-plugins/datasource-jdbc-hive/src/main/java/org/apache/seatunnel/datasource/plugin/hive/jdbc/HiveJdbcDataSourceChannel.java

Lines changed: 63 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@
2424

2525
import org.apache.commons.collections4.MapUtils;
2626
import org.apache.commons.lang3.StringUtils;
27+
import org.apache.hadoop.conf.Configuration;
28+
import org.apache.hadoop.security.UserGroupInformation;
2729

2830
import lombok.NonNull;
2931
import lombok.extern.slf4j.Slf4j;
3032

3133
import java.io.IOException;
32-
import java.net.InetSocketAddress;
33-
import java.net.Socket;
3434
import java.sql.Connection;
3535
import java.sql.DatabaseMetaData;
3636
import java.sql.DriverManager;
@@ -41,6 +41,7 @@
4141
import java.util.HashMap;
4242
import java.util.List;
4343
import java.util.Map;
44+
import java.util.Properties;
4445

4546
@Slf4j
4647
public class HiveJdbcDataSourceChannel implements DataSourceChannel {
@@ -61,15 +62,15 @@ public List<String> getTables(
6162
Map<String, String> requestParams,
6263
String database,
6364
Map<String, String> option) {
64-
return getTables(pluginName, requestParams, database, option);
65+
return getTableNames(requestParams, database);
6566
}
6667

6768
@Override
6869
public List<String> getDatabases(
6970
@NonNull String pluginName, @NonNull Map<String, String> requestParams) {
7071
try {
71-
return getDataBaseNames(pluginName, requestParams);
72-
} catch (SQLException e) {
72+
return getDataBaseNames(requestParams);
73+
} catch (SQLException | IOException e) {
7374
log.error("Query Hive databases error, request params is {}", requestParams, e);
7475
throw new DataSourcePluginException("Query Hive databases error,", e);
7576
}
@@ -104,33 +105,69 @@ public Map<String, List<TableField>> getTableFields(
104105
}
105106

106107
protected boolean checkJdbcConnectivity(Map<String, String> requestParams) {
107-
try (Connection ignored = init(requestParams)) {
108+
try (Connection ignored = getHiveConnection(requestParams)) {
108109
return true;
109110
} catch (Exception e) {
110111
throw new DataSourcePluginException(
111112
"check jdbc connectivity failed, " + e.getMessage(), e);
112113
}
113114
}
114115

115-
protected Connection init(Map<String, String> requestParams) throws SQLException {
116+
protected Connection getHiveConnection(Map<String, String> requestParams)
117+
throws IOException, SQLException {
116118
if (MapUtils.isEmpty(requestParams)) {
117119
throw new DataSourcePluginException(
118120
"Hive jdbc request params is null, please check your config");
119121
}
120-
String url = requestParams.get(HiveJdbcOptionRule.URL.key());
121-
return DriverManager.getConnection(url);
122+
String driverClass =
123+
requestParams.getOrDefault(
124+
HiveJdbcOptionRule.DRIVER.key(), "org.apache.hive.jdbc.HiveDriver");
125+
try {
126+
Class.forName(driverClass);
127+
} catch (ClassNotFoundException e) {
128+
throw new DataSourcePluginException(
129+
"Hive jdbc driver " + driverClass + " not found", e);
130+
}
131+
Properties connProps = new Properties();
132+
boolean isKerberosEnabled =
133+
Boolean.parseBoolean(requestParams.get(HiveJdbcOptionRule.USE_KERBEROS.key()));
134+
if (isKerberosEnabled) {
135+
String krb5ConfPath = requestParams.get(HiveJdbcOptionRule.KRB5_PATH.key());
136+
if (StringUtils.isNotEmpty(krb5ConfPath)) {
137+
System.setProperty("java.security.krb5.conf", krb5ConfPath);
138+
}
139+
Configuration conf = new Configuration();
140+
conf.set("hadoop.security.authentication", "Kerberos");
141+
UserGroupInformation.setConfiguration(conf);
142+
String principal = requestParams.get(HiveJdbcOptionRule.KERBEROS_PRINCIPAL.key());
143+
connProps.setProperty("principal", principal);
144+
String keytabPath = requestParams.get(HiveJdbcOptionRule.KERBEROS_KEYTAB_PATH.key());
145+
UserGroupInformation.loginUserFromKeytab(principal, keytabPath);
146+
}
147+
148+
String user = requestParams.get(HiveJdbcOptionRule.USER.key());
149+
String password = requestParams.get(HiveJdbcOptionRule.PASSWORD.key());
150+
if (StringUtils.isNotEmpty(user)) {
151+
connProps.setProperty("user", user);
152+
}
153+
if (StringUtils.isNotEmpty(password)) {
154+
connProps.setProperty("password", password);
155+
}
156+
157+
String jdbcUrl = requestParams.get(HiveJdbcOptionRule.URL.key());
158+
return DriverManager.getConnection(jdbcUrl, connProps);
122159
}
123160

124-
protected List<String> getDataBaseNames(String pluginName, Map<String, String> requestParams)
125-
throws SQLException {
161+
protected List<String> getDataBaseNames(Map<String, String> requestParams)
162+
throws SQLException, IOException {
126163
List<String> dbNames = new ArrayList<>();
127-
try (Connection connection = init(requestParams);
128-
Statement statement = connection.createStatement(); ) {
129-
ResultSet re = statement.executeQuery("SHOW DATABASES;");
164+
try (Connection connection = getHiveConnection(requestParams);
165+
Statement statement = connection.createStatement()) {
166+
ResultSet re = statement.executeQuery("SHOW DATABASES");
130167
// filter system databases
131168
while (re.next()) {
132-
String dbName = re.getString("database");
133-
if (StringUtils.isNotBlank(dbName) && isNotSystemDatabase(pluginName, dbName)) {
169+
String dbName = re.getString("database_name");
170+
if (StringUtils.isNotBlank(dbName) && isNotSystemDatabase(dbName)) {
134171
dbNames.add(dbName);
135172
}
136173
}
@@ -140,25 +177,27 @@ protected List<String> getDataBaseNames(String pluginName, Map<String, String> r
140177

141178
protected List<String> getTableNames(Map<String, String> requestParams, String dbName) {
142179
List<String> tableNames = new ArrayList<>();
143-
try (Connection connection = init(requestParams); ) {
180+
try (Connection connection = getHiveConnection(requestParams)) {
144181
ResultSet resultSet =
145-
connection.getMetaData().getTables(dbName, null, null, new String[] {"TABLE"});
182+
connection
183+
.getMetaData()
184+
.getTables(dbName, dbName, null, new String[] {"TABLE"});
146185
while (resultSet.next()) {
147186
String tableName = resultSet.getString("TABLE_NAME");
148187
if (StringUtils.isNotBlank(tableName)) {
149188
tableNames.add(tableName);
150189
}
151190
}
152191
return tableNames;
153-
} catch (SQLException e) {
192+
} catch (SQLException | IOException e) {
154193
throw new DataSourcePluginException("get table names failed", e);
155194
}
156195
}
157196

158197
protected List<TableField> getTableFields(
159198
Map<String, String> requestParams, String dbName, String tableName) {
160199
List<TableField> tableFields = new ArrayList<>();
161-
try (Connection connection = init(requestParams); ) {
200+
try (Connection connection = getHiveConnection(requestParams)) {
162201
DatabaseMetaData metaData = connection.getMetaData();
163202
String primaryKey = getPrimaryKey(metaData, dbName, tableName);
164203
ResultSet resultSet = metaData.getColumns(dbName, null, tableName, null);
@@ -177,7 +216,7 @@ protected List<TableField> getTableFields(
177216
tableField.setNullable(isNullable);
178217
tableFields.add(tableField);
179218
}
180-
} catch (SQLException e) {
219+
} catch (SQLException | IOException e) {
181220
throw new DataSourcePluginException("get table fields failed", e);
182221
}
183222
return tableFields;
@@ -186,25 +225,14 @@ protected List<TableField> getTableFields(
186225
private String getPrimaryKey(DatabaseMetaData metaData, String dbName, String tableName)
187226
throws SQLException {
188227
ResultSet primaryKeysInfo = metaData.getPrimaryKeys(dbName, "%", tableName);
189-
while (primaryKeysInfo.next()) {
228+
if (primaryKeysInfo.next()) {
190229
return primaryKeysInfo.getString("COLUMN_NAME");
191230
}
192231
return null;
193232
}
194233

195-
@SuppressWarnings("checkstyle:MagicNumber")
196-
private static boolean checkHostConnectable(String host, int port) {
197-
try (Socket socket = new Socket()) {
198-
socket.connect(new InetSocketAddress(host, port), 1000);
199-
return true;
200-
} catch (IOException e) {
201-
return false;
202-
}
203-
}
204-
205-
private boolean isNotSystemDatabase(String pluginName, String dbName) {
206-
// FIXME,filters system databases
207-
return true;
234+
private boolean isNotSystemDatabase(String dbName) {
235+
return !HiveJdbcConstants.HIVE_SYSTEM_DATABASES.contains(dbName.toLowerCase());
208236
}
209237

210238
private boolean convertToBoolean(Object value) {

seatunnel-datasource/seatunnel-datasource-plugins/datasource-jdbc-hive/src/main/java/org/apache/seatunnel/datasource/plugin/hive/jdbc/HiveJdbcDataSourceFactory.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,17 @@
2828
public class HiveJdbcDataSourceFactory implements DataSourceFactory {
2929
@Override
3030
public String factoryIdentifier() {
31-
return "Hive-JDBC";
31+
return HiveJdbcConstants.PLUGIN_NAME;
3232
}
3333

3434
@Override
3535
public Set<DataSourcePluginInfo> supportedDataSources() {
3636
DataSourcePluginInfo dataSourcePluginInfo =
3737
DataSourcePluginInfo.builder()
38-
.name("Hive-JDBC")
38+
.name(HiveJdbcConstants.PLUGIN_NAME)
3939
.type(DatasourcePluginTypeEnum.DATABASE.getCode())
4040
.version("1.0.0")
41-
.icon("Hive-JDBC")
41+
.icon(HiveJdbcConstants.PLUGIN_NAME)
4242
.supportVirtualTables(false)
4343
.build();
4444
Set<DataSourcePluginInfo> dataSourceInfos = new HashSet<>();

seatunnel-datasource/seatunnel-datasource-plugins/datasource-jdbc-hive/src/main/java/org/apache/seatunnel/datasource/plugin/hive/jdbc/HiveJdbcOptionRule.java

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,65 @@ public class HiveJdbcOptionRule {
2525

2626
public static final Option<String> URL =
2727
Options.key("url")
28+
.stringType()
29+
.defaultValue("jdbc:hive2://localhost:10000/default")
30+
.withDescription(
31+
"The URL of the JDBC connection. Refer to a case: jdbc:hive2://localhost:10000/default");
32+
33+
public static final Option<String> DRIVER =
34+
Options.key("driver")
35+
.stringType()
36+
.defaultValue("org.apache.hive.jdbc.HiveDriver")
37+
.withDescription(
38+
"The jdbc class name used to connect to the remote data source");
39+
40+
public static final Option<String> USER =
41+
Options.key("user").stringType().noDefaultValue().withDescription("user");
42+
43+
public static final Option<String> PASSWORD =
44+
Options.key("password").stringType().noDefaultValue().withDescription("password");
45+
46+
public static final Option<Boolean> USE_KERBEROS =
47+
Options.key("use_kerberos")
48+
.booleanType()
49+
.defaultValue(false)
50+
.withDescription("Whether to enable Kerberos, default is false.");
51+
52+
public static final Option<String> KERBEROS_PRINCIPAL =
53+
Options.key("kerberos_principal")
54+
.stringType()
55+
.noDefaultValue()
56+
.withDescription(
57+
"When use kerberos, we should set kerberos principal such as 'test_user@xxx'. ");
58+
59+
public static final Option<String> KERBEROS_KEYTAB_PATH =
60+
Options.key("kerberos_keytab_path")
2861
.stringType()
2962
.noDefaultValue()
3063
.withDescription(
31-
"jdbc url, eg:"
32-
+ "jdbc:hive2://localhost:10000/default?useSSL=false&serverTimezone=UTC&useUnicode=true&characterEncoding=utf-8");
64+
"When use kerberos, we should set kerberos principal file path such as '/home/test/test_user.keytab'. ");
65+
66+
public static final Option<String> KRB5_PATH =
67+
Options.key("krb5_path")
68+
.stringType()
69+
.defaultValue("/etc/krb5.conf")
70+
.withDescription(
71+
"When use kerberos, we should set krb5 path file path such as '/seatunnel/krb5.conf' or use the default path '/etc/krb5.conf");
3372

3473
public static OptionRule optionRule() {
35-
return OptionRule.builder().required(URL).build();
74+
return OptionRule.builder()
75+
.required(URL)
76+
.required(DRIVER)
77+
.optional(USER)
78+
.optional(PASSWORD)
79+
.optional(USE_KERBEROS)
80+
.optional(KERBEROS_PRINCIPAL)
81+
.optional(KERBEROS_KEYTAB_PATH)
82+
.optional(KRB5_PATH)
83+
.build();
3684
}
3785

3886
public static OptionRule metadataRule() {
39-
// todo
4087
return OptionRule.builder().build();
4188
}
4289
}

seatunnel-datasource/seatunnel-datasource-plugins/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,9 @@
4949
<module>datasource-fakesource</module>
5050
<module>datasource-console</module>
5151
</modules>
52+
<properties>
53+
<hive.version>3.1.3</hive.version>
54+
<hadoop.version>3.1.0</hadoop.version>
55+
</properties>
5256

5357
</project>

seatunnel-server/seatunnel-app/src/main/java/org/apache/seatunnel/app/bean/engine/EngineDataType.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import java.util.Arrays;
3030
import java.util.List;
31+
import java.util.Locale;
3132
import java.util.Map;
3233
import java.util.function.Function;
3334
import java.util.stream.Collectors;
@@ -85,7 +86,7 @@ public static class SeaTunnelDataTypeConvertor
8586

8687
@Override
8788
public SeaTunnelDataType<?> toSeaTunnelType(String field, String connectorDataType) {
88-
return DATA_TYPE_MAP.get(connectorDataType).getRawType();
89+
return DATA_TYPE_MAP.get(connectorDataType.toLowerCase(Locale.ROOT)).getRawType();
8990
}
9091

9192
@Override

0 commit comments

Comments
 (0)