- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.dtstack.flinkx.emqx.decoder;
-
-import org.codehaus.jackson.map.ObjectMapper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Collections;
-import java.util.Map;
-
-/**
- * Date: 2020/02/12
- * Company: www.dtstack.com
- *
- * @author tudou
- */
-public class JsonDecoder implements IDecode {
- private static Logger LOG = LoggerFactory.getLogger(JsonDecoder.class);
-
- private static ObjectMapper objectMapper = new ObjectMapper();
-
- private static final String KEY_MESSAGE = "message";
-
- @Override
- @SuppressWarnings("unchecked")
- public Map decode(final String message) {
- try {
- Map event = objectMapper.readValue(message, Map.class);
- if (!event.containsKey(KEY_MESSAGE)) {
- event.put(KEY_MESSAGE, message);
- }
- return event;
- } catch (Exception e) {
- LOG.error(e.getMessage());
- return Collections.singletonMap(KEY_MESSAGE, message);
- }
-
- }
-
-}
diff --git a/flinkx-emqx/flinkx-emqx-reader/pom.xml b/flinkx-emqx/flinkx-emqx-reader/pom.xml
index 0e106143fd..205e8a9c5c 100644
--- a/flinkx-emqx/flinkx-emqx-reader/pom.xml
+++ b/flinkx-emqx/flinkx-emqx-reader/pom.xml
@@ -53,6 +53,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java
index 5534601f51..7e92921dcc 100644
--- a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java
+++ b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java
@@ -17,16 +17,21 @@
*/
package com.dtstack.flinkx.emqx.format;
-import com.dtstack.flinkx.emqx.decoder.IDecode;
-import com.dtstack.flinkx.emqx.decoder.JsonDecoder;
-import com.dtstack.flinkx.emqx.decoder.PlainDecoder;
+import com.dtstack.flinkx.decoder.IDecode;
+import com.dtstack.flinkx.decoder.JsonDecoder;
+import com.dtstack.flinkx.decoder.PlainDecoder;
import com.dtstack.flinkx.inputformat.BaseRichInputFormat;
import com.dtstack.flinkx.util.ExceptionUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.core.io.GenericInputSplit;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.types.Row;
-import org.eclipse.paho.client.mqttv3.*;
+import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken;
+import org.eclipse.paho.client.mqttv3.MqttCallback;
+import org.eclipse.paho.client.mqttv3.MqttClient;
+import org.eclipse.paho.client.mqttv3.MqttConnectOptions;
+import org.eclipse.paho.client.mqttv3.MqttException;
+import org.eclipse.paho.client.mqttv3.MqttMessage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java
index 83d70d80d1..32121eec6a 100644
--- a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java
+++ b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java
@@ -8,7 +8,13 @@
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.types.Row;
-import static com.dtstack.flinkx.emqx.EmqxConfigKeys.*;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_BROKER;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_CODEC;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_IS_CLEAN_SESSION;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_PASSWORD;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_QOS;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_TOPIC;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_USERNAME;
/**
* Date: 2020/02/12
diff --git a/flinkx-emqx/flinkx-emqx-writer/pom.xml b/flinkx-emqx/flinkx-emqx-writer/pom.xml
index a6117433f5..fcf861883a 100644
--- a/flinkx-emqx/flinkx-emqx-writer/pom.xml
+++ b/flinkx-emqx/flinkx-emqx-writer/pom.xml
@@ -53,6 +53,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java
index 79dec0f172..f1e87baba3 100644
--- a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java
+++ b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java
@@ -17,7 +17,7 @@
*/
package com.dtstack.flinkx.emqx.format;
-import com.dtstack.flinkx.emqx.decoder.JsonDecoder;
+import com.dtstack.flinkx.decoder.JsonDecoder;
import com.dtstack.flinkx.exception.WriteRecordException;
import com.dtstack.flinkx.outputformat.BaseRichOutputFormat;
import com.dtstack.flinkx.util.ExceptionUtil;
diff --git a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java
index 0ba2a1a12d..52a311b76e 100644
--- a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java
+++ b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java
@@ -25,7 +25,12 @@
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.types.Row;
-import static com.dtstack.flinkx.emqx.EmqxConfigKeys.*;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_BROKER;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_IS_CLEAN_SESSION;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_PASSWORD;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_QOS;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_TOPIC;
+import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_USERNAME;
/**
* Date: 2020/02/12
diff --git a/flinkx-es/flinkx-es-reader/pom.xml b/flinkx-es/flinkx-es-reader/pom.xml
index 227dc25d10..8ccef94149 100644
--- a/flinkx-es/flinkx-es-reader/pom.xml
+++ b/flinkx-es/flinkx-es-reader/pom.xml
@@ -53,6 +53,20 @@
+
+
+ com.fasterxml.jackson.databind
+ shade.es.com.fasterxml.jackson.databind
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java
index 282e2ac5f8..1c54a5e115 100644
--- a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java
+++ b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java
@@ -25,7 +25,11 @@
import org.apache.flink.core.io.GenericInputSplit;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.types.Row;
-import org.elasticsearch.action.search.*;
+import org.elasticsearch.action.search.ClearScrollRequest;
+import org.elasticsearch.action.search.ClearScrollResponse;
+import org.elasticsearch.action.search.SearchRequest;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
diff --git a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java
index 33f9e85d21..57584faeb4 100644
--- a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java
+++ b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java
@@ -18,6 +18,7 @@
package com.dtstack.flinkx.es.reader;
+import com.dtstack.flinkx.constants.ConstantValue;
import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder;
import java.util.List;
import java.util.Map;
@@ -103,5 +104,9 @@ protected void checkFormat() {
if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){
throw new UnsupportedOperationException("This plugin not support restore from failed state");
}
+
+ if (format.batchSize > ConstantValue.MAX_BATCH_SIZE) {
+ throw new IllegalArgumentException("批量读取数量不能大于[200000]条");
+ }
}
}
diff --git a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java
index da95c5b0ee..e57fa881a4 100644
--- a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java
+++ b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java
@@ -28,6 +28,8 @@
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.types.Row;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
@@ -42,6 +44,8 @@
*/
public class EsReader extends BaseDataReader {
+ private static Logger LOG = LoggerFactory.getLogger(EsReader.class);
+
private String address;
private String username;
private String password;
@@ -87,7 +91,8 @@ public EsReader(DataTransferConfig config, StreamExecutionEnvironment env) {
columnValue.add((String) sm.get("value"));
columnName.add((String) sm.get("name"));
}
- System.out.println("init column finished");
+
+ LOG.info("init column finished");
} else if (!ConstantValue.STAR_SYMBOL.equals(columns.get(0)) || columns.size() != 1) {
throw new IllegalArgumentException("column argument error");
}
diff --git a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReadDemo.java b/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReadDemo.java
deleted file mode 100644
index 06e304c46a..0000000000
--- a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReadDemo.java
+++ /dev/null
@@ -1,90 +0,0 @@
-package com.dtstack.flinkx.es.reader.test;
-
-
-import com.google.gson.Gson;
-import org.apache.http.HttpHost;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.Aggregations;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.elasticsearch.index.query.QueryBuilders.termQuery;
-
-public class EsReadDemo {
-
- public static void searchAll() throws IOException {
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("rdos1", 9200, "http"),
- new HttpHost("rdos2", 9200, "http")));
- SearchRequest searchRequest = new SearchRequest();
-// SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
-// searchSourceBuilder.query(QueryBuilders.matchAllQuery());
-// searchSourceBuilder().from(0);
-// searchSourceBuilder().size(100);
-
- SearchResponse searchResponse = client.search(searchRequest);
- SearchHits searchHits = searchResponse.getHits();
-
- for(SearchHit searchHit : searchHits) {
- Map source = searchHit.getSourceAsMap();
- System.out.println(source);
- }
-
-
- }
-
- public static void searchPart() throws IOException {
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("rdos1", 9200, "http"),
- new HttpHost("rdos2", 9200, "http")));
- SearchRequest searchRequest = new SearchRequest();
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
-
- sourceBuilder.from(0);
- sourceBuilder.size(100);
-
-// QueryBuilder qb2 = QueryBuilders.wrapperQuery("{\"match_all\": {}}");
- Map map = new HashMap<>();
- Map match = new HashMap<>();
- map.put("match",match);
- match.put("col2", "hallo");
- Gson gson = new Gson();
-
- //QueryBuilder qb2 = QueryBuilders.wrapperQuery("{\"match\": {\"col2\":\"hallo\"}}");
- QueryBuilder qb2 = QueryBuilders.wrapperQuery(gson.toJson(map));
- sourceBuilder.query(qb2);
- searchRequest.source(sourceBuilder);
-
- SearchResponse searchResponse = client.search(searchRequest);
- System.out.println(searchResponse);
- SearchHits searchHits = searchResponse.getHits();
- for(SearchHit searchHit : searchHits.getHits()) {
- System.out.println(searchHit.docId() + ":" + searchHit.getSourceAsMap());
- }
- long total = searchHits.getTotalHits();
- System.out.println("total: " + total);
-
- client.close();
- }
-
-
- public static void main(String[] args) throws IOException {
- //searchAll();
- searchPart();
- //searchAll();
- }
-
-}
diff --git a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReaderTest.java b/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReaderTest.java
deleted file mode 100644
index bf98581615..0000000000
--- a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReaderTest.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package com.dtstack.flinkx.es.reader.test;
-
-import com.google.gson.Gson;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Created by softfly on 18/2/8.
- */
-public class EsReaderTest {
- public static void main(String[] args) {
- Gson gson = new Gson();
- Map map = new HashMap<>();
- map.put("xxx", 111);
- map.put("yyyy", "fff");
- String json = gson.toJson(map);
- System.out.println(json);
- }
-}
diff --git a/flinkx-es/flinkx-es-writer/pom.xml b/flinkx-es/flinkx-es-writer/pom.xml
index 1cdfab73fb..8e3e15a822 100644
--- a/flinkx-es/flinkx-es-writer/pom.xml
+++ b/flinkx-es/flinkx-es-writer/pom.xml
@@ -54,6 +54,20 @@
+
+
+ com.fasterxml.jackson.databind
+ shade.es.com.fasterxml.jackson.databind
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-es/flinkx-es-writer/src/test/java/com/dtstack/flinkx/es/writer/test/EsDemo.java b/flinkx-es/flinkx-es-writer/src/test/java/com/dtstack/flinkx/es/writer/test/EsDemo.java
deleted file mode 100644
index 503c09fa7e..0000000000
--- a/flinkx-es/flinkx-es-writer/src/test/java/com/dtstack/flinkx/es/writer/test/EsDemo.java
+++ /dev/null
@@ -1,146 +0,0 @@
-package com.dtstack.flinkx.es.writer.test;
-
-import org.apache.http.HttpHost;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.action.index.IndexResponse;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.support.WriteRequest;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.action.update.UpdateResponse;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-public class EsDemo {
-
- public static void test1() throws Exception {
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("rdos1", 9200, "http"),
- new HttpHost("rdos2", 9200, "http")));
-
- IndexRequest request = new IndexRequest(
- "nani222",
- "doc222",
- "id2");
-
- String jsonString = "{" +
- "\"user\":\"user2\"," +
- "\"postDate\":\"2014-01-30\"," +
- "\"message\":\"trying out Elasticsearch\"" +
- "}";
-
-
- request.source(jsonString, XContentType.JSON);
- IndexResponse response = client.index(request);
- System.out.println(response.getResult());
- client.close();
- }
-
- public static void test3() throws Exception {
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("study", 9200, "http"),
- new HttpHost("study", 9201, "http")));
-
- IndexRequest request = new IndexRequest(
- "nani",
- "doc");
-
-// String jsonString = "{" +
-// "\"user\":\"xxxx\"," +
-// "\"postDate\":\"2013-01-30\"," +
-// "\"message\":\"trying out Elasticsearch\"" +
-// "}";
- Map jsonMap = new HashMap<>();
- jsonMap.put("xxx", "asfdasdf");
- jsonMap.put("zzz", "asdfsadf");
- request.source(jsonMap);
- IndexResponse response = client.index(request);
- System.out.println(response.getResult());
- client.close();
- }
-
- public static void test2() throws Exception {
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("localhost", 9200, "http"),
- new HttpHost("localhost", 9201, "http")));
-
- UpdateRequest request = new UpdateRequest(
- "nani250",
- "doc",
- "2");
-
- String jsonString = "{" +
- "\"user\":\"kimchy\"," +
- "\"postDate\":\"2013-01-30\"," +
- "\"message\":\"trying out Elasticsearch\"" +
- "}";
-
- request.doc(jsonString, XContentType.JSON);
- UpdateResponse response = client.update(request);
- System.out.println(response.getResult());
- client.close();
- }
-
- public static void test4() throws IOException {
-
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("study", 9200, "http"),
- new HttpHost("study", 9201, "http")));
- SearchRequest searchRequest = new SearchRequest();
- SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
- searchSourceBuilder.query(QueryBuilders.matchAllQuery());
-
- SearchResponse searchResponse = client.search(searchRequest);
- System.out.println(searchResponse.getTotalShards());
- }
-
- public static void test5() throws Exception {
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("study", 9200, "http"),
- new HttpHost("study", 9201, "http")));
- BulkRequest bulkRequest = new BulkRequest();
-
- IndexRequest request = new IndexRequest("nani", "doc1");
- Map jsonMap = new HashMap<>();
- jsonMap.put("xxx", "8888");
- jsonMap.put("yyy", "9999");
-
- bulkRequest.add(request.source(jsonMap));
- // bulkRequest.setRefreshPolicy(null);
- // WriteRequest.RefreshPolicy;
-
- BulkResponse bulkResponse = client.bulk(bulkRequest);
-
- System.out.println(bulkResponse);
- }
-
- public static void test6() throws IOException {
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(
- new HttpHost("study", 9200, "http"),
- new HttpHost("study", 9201, "http")));
- SearchRequest searchRequest = new SearchRequest();
- SearchResponse resp = client.search(searchRequest);
- resp.getAggregations();
- }
-
- public static void main(String[] args) throws Exception {
- test1();
- }
-
-
-}
diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java
index fc58ca41e7..afc45b081a 100644
--- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java
+++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java
@@ -59,7 +59,7 @@ public class FtpConfig implements Serializable {
public int timeout = FtpConfigConstants.DEFAULT_TIMEOUT;
- public long maxFileSize = 1024 * 1024 * 1024;
+ public long maxFileSize = 1024 * 1024 * 1024L;
public String getUsername() {
return username;
diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java
index d201625068..4b99e46257 100644
--- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java
+++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java
@@ -89,23 +89,14 @@ public void loginFtpServer(FtpConfig ftpConfig) {
}
@Override
- public void logoutFtpServer() {
+ public void logoutFtpServer() throws IOException{
if (ftpClient.isConnected()) {
try {
ftpClient.logout();
- } catch (IOException e) {
- LOG.error(DISCONNECT_FAIL_MESSAGE);
- throw new RuntimeException(e);
- }finally {
+ } finally {
if(ftpClient.isConnected()){
- try {
- ftpClient.disconnect();
- } catch (IOException e) {
- LOG.error(DISCONNECT_FAIL_MESSAGE);
- throw new RuntimeException(e);
- }
+ ftpClient.disconnect();
}
-
}
}
}
diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java
index cc83e0343a..8c775f062d 100644
--- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java
+++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java
@@ -19,6 +19,8 @@
package com.dtstack.flinkx.ftp;
+import org.apache.commons.lang3.StringUtils;
+
/**
* @author jiangbo
* @date 2019/11/21
@@ -28,8 +30,8 @@ public class FtpHandlerFactory {
public static IFtpHandler createFtpHandler(String protocolStr){
IFtpHandler ftpHandler;
- EProtocol protocol = EProtocol.getByName(protocolStr);
- if(EProtocol.SFTP.equals(protocol)) {
+ Protocol protocol = Protocol.getByName(protocolStr);
+ if(Protocol.SFTP.equals(protocol)) {
ftpHandler = new SftpHandler();
} else {
ftpHandler = new FtpHandler();
@@ -37,4 +39,16 @@ public static IFtpHandler createFtpHandler(String protocolStr){
return ftpHandler;
}
+
+ enum Protocol{
+ FTP, SFTP;
+
+ public static Protocol getByName(String name) {
+ if (StringUtils.isEmpty(name)) {
+ return SFTP;
+ }
+
+ return Protocol.valueOf(name.toUpperCase());
+ }
+ }
}
diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java
index c8da4124b1..16faf8e6d8 100644
--- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java
+++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java
@@ -19,6 +19,7 @@
package com.dtstack.flinkx.ftp;
+import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
@@ -40,8 +41,9 @@ public interface IFtpHandler {
/**
* 登出服务器
+ * @throws IOException logout error
*/
- void logoutFtpServer();
+ void logoutFtpServer() throws IOException;
/**
* 判断给定的目录是否存在
diff --git a/flinkx-ftp/flinkx-ftp-reader/pom.xml b/flinkx-ftp/flinkx-ftp-reader/pom.xml
index 1771055c60..42ae5c1123 100644
--- a/flinkx-ftp/flinkx-ftp-reader/pom.xml
+++ b/flinkx-ftp/flinkx-ftp-reader/pom.xml
@@ -71,6 +71,16 @@ under the License.
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java
index 6f0f4471d3..3b3b62013c 100644
--- a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java
+++ b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java
@@ -19,7 +19,12 @@
package com.dtstack.flinkx.ftp.reader;
import com.dtstack.flinkx.constants.ConstantValue;
-import com.dtstack.flinkx.ftp.*;
+import com.dtstack.flinkx.ftp.EProtocol;
+import com.dtstack.flinkx.ftp.FtpConfig;
+import com.dtstack.flinkx.ftp.FtpHandler;
+import com.dtstack.flinkx.ftp.FtpHandlerFactory;
+import com.dtstack.flinkx.ftp.IFtpHandler;
+import com.dtstack.flinkx.ftp.SftpHandler;
import com.dtstack.flinkx.inputformat.BaseRichInputFormat;
import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.StringUtil;
@@ -54,13 +59,7 @@ public class FtpInputFormat extends BaseRichInputFormat {
public void openInputFormat() throws IOException {
super.openInputFormat();
- if(EProtocol.SFTP.name().equalsIgnoreCase(ftpConfig.getProtocol())) {
- ftpHandler = new SftpHandler();
- } else if(EProtocol.FTP.name().equalsIgnoreCase(ftpConfig.getProtocol())){
- ftpHandler = new FtpHandler();
- } else {
- throw new RuntimeException("协议名称错误:" + ftpConfig.getProtocol());
- }
+ ftpHandler = FtpHandlerFactory.createFtpHandler(ftpConfig.getProtocol());
ftpHandler.loginFtpServer(ftpConfig);
}
@@ -157,5 +156,4 @@ public void closeInternal() throws IOException {
ftpHandler.logoutFtpServer();
}
}
-
}
diff --git a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java
index 411389206d..c889b249cf 100644
--- a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java
+++ b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java
@@ -21,6 +21,7 @@
import com.dtstack.flinkx.config.DataTransferConfig;
import com.dtstack.flinkx.config.ReaderConfig;
import com.dtstack.flinkx.ftp.FtpConfig;
+import com.dtstack.flinkx.ftp.FtpConfigConstants;
import com.dtstack.flinkx.reader.BaseDataReader;
import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.StringUtil;
@@ -30,8 +31,6 @@
import java.util.List;
-import static com.dtstack.flinkx.ftp.FtpConfigConstants.*;
-
/**
* The reader plugin of Ftp
*
@@ -57,7 +56,7 @@ public FtpReader(DataTransferConfig config, StreamExecutionEnvironment env) {
ftpConfig.setDefaultPort();
}
- if(!DEFAULT_FIELD_DELIMITER.equals(ftpConfig.getFieldDelimiter())){
+ if(!FtpConfigConstants.DEFAULT_FIELD_DELIMITER.equals(ftpConfig.getFieldDelimiter())){
String fieldDelimiter = StringUtil.convertRegularExpr(ftpConfig.getFieldDelimiter());
ftpConfig.setFieldDelimiter(fieldDelimiter);
}
diff --git a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java
index a19e566fa6..34755a4109 100644
--- a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java
+++ b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java
@@ -20,8 +20,13 @@
import com.dtstack.flinkx.ftp.IFtpHandler;
import com.dtstack.flinkx.ftp.FtpHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.util.Iterator;
/**
@@ -32,6 +37,8 @@
*/
public class FtpSeqBufferedReader {
+ private static Logger LOG = LoggerFactory.getLogger(FtpSeqBufferedReader.class);
+
private IFtpHandler ftpHandler;
private Iterator iter;
@@ -76,7 +83,8 @@ private void nextStream() throws IOException{
br = new BufferedReader(new InputStreamReader(in, charsetName));
for (int i = 0; i < fromLine; i++) {
- br.readLine();
+ String skipLine = br.readLine();
+ LOG.info("Skip line:{}", skipLine);
}
} else {
br = null;
diff --git a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/FtpServerDemo.java b/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/FtpServerDemo.java
deleted file mode 100644
index ed9177c3b7..0000000000
--- a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/FtpServerDemo.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package com.dtstack.flinkx.ftp.reader;
-
-import org.apache.commons.net.ftp.FTPClient;
-import org.apache.commons.net.ftp.FTPFile;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * Created by softfly on 17/11/22.
- */
-public class FtpServerDemo {
-
- public static void main(String[] args) throws IOException {
- FTPClient ftp = new FTPClient();
- String username = "test";
- String password = "qbI#5pNd";
- ftp.connect("node02", 21);
- ftp.login(username, password);
- FTPFile[] ftpFiles = ftp.listFiles();
- for(FTPFile ftpFile : ftpFiles) {
- System.out.println(ftpFile.getName());
- }
-
-
- String[] xxx = ftp.listNames();
-
- InputStream is1 = ftp.retrieveFileStream("hyf/ttt");
- ftp.getReply();
- InputStream is2 = ftp.retrieveFileStream("xxx");
- ftp.remoteRetrieve("/hyf/ttt");
- ftp.getReply();
-
- ftp.changeWorkingDirectory("/hyf");
- System.out.println(ftp.printWorkingDirectory());
-
- ftp.logout();
-
- }
-
-}
diff --git a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/SftpServerDemo.java b/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/SftpServerDemo.java
deleted file mode 100644
index 6defecb105..0000000000
--- a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/SftpServerDemo.java
+++ /dev/null
@@ -1,42 +0,0 @@
-package com.dtstack.flinkx.ftp.reader;
-
-import com.jcraft.jsch.*;
-
-import java.util.Properties;
-import java.util.Vector;
-
-/**
- * Created by softfly on 17/11/23.
- */
-public class SftpServerDemo {
-
- public static void main(String[] args) throws JSchException, SftpException {
- JSch jsch = new JSch();
-
- Session session = jsch.getSession("mysftp", "node02");
- session.setPassword("oh1986mygod");
- session.setPort(22);
- //session.setTimeout(10);
- Properties config = new Properties();
- config.put("StrictHostKeyChecking", "no");
-
- session.setConfig(config);
- session.connect();
-
- ChannelSftp channelSftp = (ChannelSftp) session.openChannel("sftp"); // 打开SFTP通道
- channelSftp.connect(); // 建立SFTP通道的连接
-
- Vector vector = channelSftp.ls("/");
-
- for(int i = 0; i < vector.size(); ++i) {
- ChannelSftp.LsEntry le = (ChannelSftp.LsEntry) vector.get(i);
- System.out.println(le.getFilename() );
- System.out.println(le.getLongname());
- }
-
-
- //session.disconnect();
-
- }
-
-}
diff --git a/flinkx-ftp/flinkx-ftp-writer/pom.xml b/flinkx-ftp/flinkx-ftp-writer/pom.xml
index 61d8ffaede..5c5b2bb6f7 100644
--- a/flinkx-ftp/flinkx-ftp-writer/pom.xml
+++ b/flinkx-ftp/flinkx-ftp-writer/pom.xml
@@ -72,6 +72,16 @@ under the License.
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java
index aeddb31bcb..4d66e893b1 100644
--- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java
+++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java
@@ -19,22 +19,27 @@
package com.dtstack.flinkx.ftp.writer;
import com.dtstack.flinkx.exception.WriteRecordException;
-import com.dtstack.flinkx.ftp.*;
+import com.dtstack.flinkx.ftp.FtpConfig;
+import com.dtstack.flinkx.ftp.FtpHandlerFactory;
+import com.dtstack.flinkx.ftp.IFtpHandler;
import com.dtstack.flinkx.outputformat.BaseFileOutputFormat;
+import com.dtstack.flinkx.util.ExceptionUtil;
+import com.dtstack.flinkx.util.GsonUtil;
import com.dtstack.flinkx.util.StringUtil;
import com.dtstack.flinkx.util.SysUtil;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.types.Row;
+import java.io.BufferedWriter;
import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Arrays;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
import java.util.List;
+import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
-import static com.dtstack.flinkx.ftp.FtpConfigConstants.SFTP_PROTOCOL;
-
/**
* The OutputFormat Implementation which reads data from ftp servers.
*
@@ -54,21 +59,18 @@ public class FtpOutputFormat extends BaseFileOutputFormat {
private transient IFtpHandler ftpHandler;
- private transient OutputStream os;
+ private static final int FILE_NAME_PART_SIZE = 3;
private static final String DOT = ".";
private static final String FILE_SUFFIX = ".csv";
private static final String OVERWRITE_MODE = "overwrite";
+ private transient BufferedWriter writer;
@Override
protected void openSource() throws IOException {
- if(SFTP_PROTOCOL.equalsIgnoreCase(ftpConfig.getProtocol())) {
- ftpHandler = new SftpHandler();
- } else {
- ftpHandler = new FtpHandler();
- }
+ ftpHandler = FtpHandlerFactory.createFtpHandler(ftpConfig.getProtocol());
ftpHandler.loginFtpServer(ftpConfig);
}
@@ -106,7 +108,7 @@ public boolean test(String file) {
}
String[] splits = fileName.split("\\.");
- if (splits.length == 3) {
+ if (splits.length == FILE_NAME_PART_SIZE) {
return Integer.parseInt(splits[2]) <= fileIndex;
}
@@ -125,11 +127,16 @@ public boolean test(String file) {
protected void nextBlock(){
super.nextBlock();
- if (os != null){
+ if (writer != null){
return;
}
-
- os = ftpHandler.getOutputStream(tmpPath + SP + currentBlockFileName);
+ String path = tmpPath + SP + currentBlockFileName;
+ try {
+ writer = new BufferedWriter(new OutputStreamWriter(ftpHandler.getOutputStream(path), ftpConfig.getEncoding()));
+ } catch (UnsupportedEncodingException e) {
+ LOG.error("exception when create BufferedWriter, path = {}, e = {}", path, ExceptionUtil.getErrorMessage(e));
+ throw new RuntimeException(e);
+ }
blockIndex++;
}
@@ -156,29 +163,42 @@ public void moveTemporaryDataBlockFileToDirectory(){
@Override
public void writeSingleRecordToFile(Row row) throws WriteRecordException {
- if(os == null){
- nextBlock();
- }
-
- String line = StringUtil.row2string(row, columnTypes, ftpConfig.getFieldDelimiter(), columnNames);
try {
- byte[] bytes = line.getBytes(ftpConfig.getEncoding());
- this.os.write(bytes);
- this.os.write(NEWLINE);
- this.os.flush();
+ if(writer == null){
+ nextBlock();
+ }
+
+ String line = StringUtil.row2string(row, columnTypes, ftpConfig.getFieldDelimiter());
+ this.writer.write(line);
+ this.writer.write(NEWLINE);
if(restoreConfig.isRestore()){
lastRow = row;
rowsOfCurrentBlock++;
}
- } catch(Exception ex) {
- throw new WriteRecordException(ex.getMessage(), ex);
+ } catch(Exception e) {
+ LOG.error("error happened when write single record to file, row = {}, columnTypes = {}, e = {}", row, GsonUtil.GSON.toJson(columnTypes), ExceptionUtil.getErrorMessage(e));
+ throw new WriteRecordException(e.getMessage(), e);
}
}
@Override
- protected void createFinishedTag() throws IOException {
- LOG.info("Subtask [{}] finished, create dir {}", taskNumber, finishedPath);
+ protected void createFinishedTag() {
+ LOG.info("SubTask [{}] finished, create dir {}", taskNumber, finishedPath);
+ String path = outputFilePath + SP + FINISHED_SUBDIR;
+ if(taskNumber == 0){
+ ftpHandler.mkDirRecursive(path);
+ }
+ final int maxRetryTime = 15;
+ int i = 0;
+ try {
+ while(!(ftpHandler.isDirExist(path) || i > maxRetryTime)){
+ i++;
+ TimeUnit.MILLISECONDS.sleep(10);
+ }
+ }catch (Exception e){
+ LOG.error("exception when createFinishedTag, path = {}, e = {}", path, ExceptionUtil.getErrorMessage(e));
+ }
ftpHandler.mkDirRecursive(finishedPath);
}
@@ -238,7 +258,7 @@ protected void waitForAllTasksToFinish(){
if (i == maxRetryTime) {
ftpHandler.deleteAllFilesInDir(finishedPath, null);
- throw new RuntimeException("timeout when gathering finish tags for each subtasks");
+ throw new RuntimeException("timeout when gathering finish tags for each subTasks");
}
}
@@ -246,7 +266,7 @@ protected void waitForAllTasksToFinish(){
protected void coverageData(){
boolean cleanPath = restoreConfig.isRestore() && OVERWRITE_MODE.equalsIgnoreCase(ftpConfig.getWriteMode()) && !SP.equals(ftpConfig.getPath());
if(cleanPath){
- ftpHandler.deleteAllFilesInDir(ftpConfig.getPath(), Arrays.asList(tmpPath));
+ ftpHandler.deleteAllFilesInDir(ftpConfig.getPath(), Collections.singletonList(tmpPath));
}
}
@@ -286,15 +306,15 @@ protected void moveAllTemporaryDataFileToDirectory() throws IOException {
@Override
protected void closeSource() throws IOException {
- if (os != null){
- os.flush();
- os.close();
- os = null;
+ if (writer != null){
+ writer.flush();
+ writer.close();
+ writer = null;
}
}
@Override
- protected void clearTemporaryDataFiles() throws IOException {
+ protected void clearTemporaryDataFiles() {
ftpHandler.deleteAllFilesInDir(tmpPath, null);
LOG.info("Delete .data dir:{}", tmpPath);
@@ -307,6 +327,12 @@ public void flushDataInternal() throws IOException {
closeSource();
}
+ @Override
+ public void closeInternal() throws IOException {
+ closeSource();
+ super.closeInternal();
+ }
+
@Override
public float getDeviation() {
return 1.0F;
@@ -316,4 +342,9 @@ public float getDeviation() {
protected String getExtension() {
return ".csv";
}
+
+ @Override
+ protected void writeMultipleRecordsInternal() throws Exception {
+ notSupportBatchWrite("FtpWriter");
+ }
}
diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java
index dee19b9e1c..dc7bcae4f9 100644
--- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java
+++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java
@@ -19,9 +19,7 @@
package com.dtstack.flinkx.ftp.writer;
import com.dtstack.flinkx.ftp.FtpConfig;
-import com.dtstack.flinkx.ftp.FtpConfigConstants;
import com.dtstack.flinkx.outputformat.FileOutputFormatBuilder;
-import org.apache.commons.lang.StringUtils;
import java.util.List;
/**
@@ -53,7 +51,7 @@ public void setFtpConfig(FtpConfig ftpConfig){
@Override
protected void checkFormat() {
-
+ notSupportBatchWrite("FtpWriter");
}
}
diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java
index edfb666f4a..9588e3a3c0 100644
--- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java
+++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java
@@ -30,7 +30,7 @@
import java.util.List;
import java.util.Map;
-import static com.dtstack.flinkx.ftp.FtpConfigConstants.*;
+import static com.dtstack.flinkx.ftp.FtpConfigConstants.DEFAULT_FIELD_DELIMITER;
/**
* The Writer Plugin of Ftp
diff --git a/flinkx-gbase/flinkx-gbase-reader/pom.xml b/flinkx-gbase/flinkx-gbase-reader/pom.xml
index 3082049696..9a7903f7fc 100644
--- a/flinkx-gbase/flinkx-gbase-reader/pom.xml
+++ b/flinkx-gbase/flinkx-gbase-reader/pom.xml
@@ -62,8 +62,12 @@
shade.gbasereader.io.netty
- com.google
- shade.gbasereader.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java
index 7361843030..eb1b86a8a4 100644
--- a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java
+++ b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java
@@ -18,6 +18,7 @@
package com.dtstack.flinkx.gbase.format;
import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat;
+import com.dtstack.flinkx.rdb.inputformat.JdbcInputSplit;
import com.dtstack.flinkx.rdb.util.DbUtil;
import com.dtstack.flinkx.util.ClassUtil;
import org.apache.commons.lang3.StringUtils;
@@ -47,7 +48,9 @@ public void openInternal(InputSplit inputSplit) throws IOException {
String startLocation = incrementConfig.getStartLocation();
if (incrementConfig.isPolling()) {
- endLocationAccumulator.add(Long.parseLong(startLocation));
+ if (StringUtils.isNotEmpty(startLocation)) {
+ endLocationAccumulator.add(Long.parseLong(startLocation));
+ }
isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType());
} else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) {
getMaxValue(inputSplit);
@@ -61,6 +64,10 @@ public void openInternal(InputSplit inputSplit) throws IOException {
fetchSize = Integer.MIN_VALUE;
querySql = buildQuerySql(inputSplit);
+ JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit;
+ if (null != jdbcInputSplit.getStartLocation()) {
+ startLocation = jdbcInputSplit.getStartLocation();
+ }
executeQuery(startLocation);
columnCount = resultSet.getMetaData().getColumnCount();
diff --git a/flinkx-gbase/flinkx-gbase-writer/pom.xml b/flinkx-gbase/flinkx-gbase-writer/pom.xml
index da6f21b4a2..d28e0af276 100644
--- a/flinkx-gbase/flinkx-gbase-writer/pom.xml
+++ b/flinkx-gbase/flinkx-gbase-writer/pom.xml
@@ -62,8 +62,12 @@
shade.gbasewriter.io.netty
- com.google
- shade.gbasewriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-greenplum/flinkx-greenplum-core/pom.xml b/flinkx-greenplum/flinkx-greenplum-core/pom.xml
new file mode 100644
index 0000000000..3a79da783d
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-core/pom.xml
@@ -0,0 +1,39 @@
+
+
+
+ flinkx-greenplum
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-greenplum-core
+
+
+ com.dtstack.flinkx
+ flinkx-postgresql-core
+ 1.6
+
+
+ com.pivotal
+ greenplum-jdbc
+ 5.1.4
+
+
+ org.postgresql
+ postgresql
+ 42.2.2
+
+
+
+
+
+ HandChina RDC
+ HandChina RDC
+ http://nexus.saas.hand-china.com/content/repositories/rdc/
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMeta.java b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMeta.java
new file mode 100644
index 0000000000..95dcb1b887
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMeta.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.flinkx.greenplum;
+
+import com.dtstack.flinkx.enums.EDatabaseType;
+import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The class of Greenplum database prototype
+ *
+ * @Company: www.dtstack.com
+ * @author kunni@dtstack.com
+ */
+
+public class GreenplumDatabaseMeta extends PostgresqlDatabaseMeta {
+
+ @Override
+ public String getUpsertStatement(List column, String table, Map> updateKey) {
+ throw new UnsupportedOperationException("Greenplum not support update mode");
+ }
+
+ @Override
+ public EDatabaseType getDatabaseType() {
+ return EDatabaseType.Greenplum;
+ }
+
+ @Override
+ public String getDriverClass() {
+ return "com.pivotal.jdbc.GreenplumDriver";
+ }
+}
diff --git a/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMetaInsert.java b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMetaInsert.java
new file mode 100644
index 0000000000..cb811c6ef0
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMetaInsert.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.flinkx.greenplum;
+
+/**
+ * when use insert mode, use org.postgresql.Driver
+ *
+ * @Company: www.dtstack.com
+ * @author kunni@dtstack.com
+ */
+
+public class GreenplumDatabaseMetaInsert extends GreenplumDatabaseMeta{
+ @Override
+ public String getDriverClass() {
+ return "org.postgresql.Driver";
+ }
+}
diff --git a/flinkx-greenplum/flinkx-greenplum-reader/pom.xml b/flinkx-greenplum/flinkx-greenplum-reader/pom.xml
new file mode 100644
index 0000000000..0fb695ce58
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-reader/pom.xml
@@ -0,0 +1,112 @@
+
+
+
+ flinkx-greenplum
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-greenplum-reader
+
+
+
+ com.dtstack.flinkx
+ flinkx-greenplum-core
+ 1.6
+
+
+ com.dtstack.flinkx
+ flinkx-postgresql-reader
+ 1.6
+
+
+ com.dtstack.flinkx
+ flinkx-rdb-reader
+ 1.6
+ provided
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.1.0
+
+
+ package
+
+ shade
+
+
+ false
+
+
+ org.slf4j:slf4j-api
+ log4j:log4j
+ ch.qos.logback:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+ io.netty
+ shade.greenplumreader.io.netty
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
+
+
+
+
+
+
+ maven-antrun-plugin
+ 1.2
+
+
+ copy-resources
+
+ package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumInputFormat.java b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumInputFormat.java
new file mode 100644
index 0000000000..c1fba4eb0b
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumInputFormat.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.flinkx.greenplum.format;
+
+import com.dtstack.flinkx.postgresql.format.PostgresqlInputFormat;
+
+/**Simple inheritance of PostgresqlInputFormat
+ *
+ * @Company: www.dtstack.com
+ * @author kunni@dtstack.com
+ */
+
+public class GreenplumInputFormat extends PostgresqlInputFormat {
+}
diff --git a/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/reader/GreenplumReader.java b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/reader/GreenplumReader.java
new file mode 100644
index 0000000000..09f547469f
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/reader/GreenplumReader.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.flinkx.greenplum.reader;
+
+import com.dtstack.flinkx.config.DataTransferConfig;
+import com.dtstack.flinkx.greenplum.format.GreenplumInputFormat;
+import com.dtstack.flinkx.inputformat.BaseRichInputFormat;
+import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter;
+import com.dtstack.flinkx.postgresql.reader.PostgresqlQuerySqlBuilder;
+import com.dtstack.flinkx.rdb.datareader.JdbcDataReader;
+import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder;
+import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder;
+import com.dtstack.flinkx.greenplum.GreenplumDatabaseMeta;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.types.Row;
+
+/**
+ * The reader plugin for Greenplum database
+ *
+ * @Company: www.dtstack.com
+ * @author kunni@dtstack.com
+ */
+
+public class GreenplumReader extends JdbcDataReader {
+ public GreenplumReader(DataTransferConfig config, StreamExecutionEnvironment env) {
+ super(config, env);
+ setDatabaseInterface(new GreenplumDatabaseMeta());
+ setTypeConverterInterface(new PostgresqlTypeConverter());
+ }
+
+ @Override
+ protected JdbcInputFormatBuilder getBuilder() {
+ return new JdbcInputFormatBuilder(new GreenplumInputFormat());
+ }
+
+ @Override
+ public DataStream readData() {
+ JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(new GreenplumInputFormat());
+ builder.setDriverName(databaseInterface.getDriverClass());
+ builder.setDbUrl(dbUrl);
+ builder.setUsername(username);
+ builder.setPassword(password);
+ builder.setBytes(bytes);
+ builder.setMonitorUrls(monitorUrls);
+ builder.setTable(table);
+ builder.setDatabaseInterface(databaseInterface);
+ builder.setTypeConverter(typeConverter);
+ builder.setMetaColumn(metaColumns);
+ builder.setFetchSize(fetchSize == 0 ? databaseInterface.getFetchSize() : fetchSize);
+ builder.setQueryTimeOut(queryTimeOut == 0 ? databaseInterface.getQueryTimeout() : queryTimeOut);
+ builder.setIncrementConfig(incrementConfig);
+ builder.setSplitKey(splitKey);
+ builder.setNumPartitions(numPartitions);
+ builder.setCustomSql(customSql);
+ builder.setRestoreConfig(restoreConfig);
+ builder.setHadoopConfig(hadoopConfig);
+ builder.setTestConfig(testConfig);
+
+ QuerySqlBuilder sqlBuilder = new PostgresqlQuerySqlBuilder(this);
+ builder.setQuery(sqlBuilder.buildSql());
+
+ BaseRichInputFormat format = builder.finish();
+ return createInput(format, (databaseInterface.getDatabaseType() + "reader").toLowerCase());
+ }
+
+}
diff --git a/flinkx-greenplum/flinkx-greenplum-writer/pom.xml b/flinkx-greenplum/flinkx-greenplum-writer/pom.xml
new file mode 100644
index 0000000000..db6738d9e5
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-writer/pom.xml
@@ -0,0 +1,110 @@
+
+
+
+ flinkx-greenplum
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-greenplum-writer
+
+
+ com.dtstack.flinkx
+ flinkx-rdb-writer
+ 1.6
+ provided
+
+
+ com.dtstack.flinkx
+ flinkx-greenplum-core
+ 1.6
+
+
+ com.dtstack.flinkx
+ flinkx-postgresql-writer
+ 1.6
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.1.0
+
+
+ package
+
+ shade
+
+
+ false
+
+
+ org.slf4j:slf4j-api
+ log4j:log4j
+ ch.qos.logback:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+ io.netty
+ shade.greenplumwriter.io.netty
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
+
+
+
+
+
+
+ maven-antrun-plugin
+ 1.2
+
+
+ copy-resources
+
+ package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumOutputFormat.java b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumOutputFormat.java
new file mode 100644
index 0000000000..a1d02138d2
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumOutputFormat.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.flinkx.greenplum.format;
+
+import com.dtstack.flinkx.postgresql.format.PostgresqlOutputFormat;
+
+/**Simple inheritance of PostgresqlOutputFormat
+ *
+ * Date: 2020/6/4
+ * Company: www.dtstack.com
+ * @author kunni@dtstack.com
+ */
+
+public class GreenplumOutputFormat extends PostgresqlOutputFormat {
+}
diff --git a/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/writer/GreenplumWriter.java b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/writer/GreenplumWriter.java
new file mode 100644
index 0000000000..8d3267968b
--- /dev/null
+++ b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/writer/GreenplumWriter.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.flinkx.greenplum.writer;
+
+import com.dtstack.flinkx.config.DataTransferConfig;
+import com.dtstack.flinkx.greenplum.format.GreenplumOutputFormat;
+import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter;
+import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter;
+import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder;
+import com.dtstack.flinkx.greenplum.GreenplumDatabaseMetaInsert;
+import org.apache.commons.lang.StringUtils;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.types.Row;
+
+/**
+ * The writer plugin for Greenplum database
+ *
+ * @Company: www.dtstack.com
+ * @author kunni@dtstack.com
+ */
+
+public class GreenplumWriter extends JdbcDataWriter {
+
+ public static final String INSERT_SQL_MODE_TYPE = "copy";
+ public static final String DATABASE_NAME = ";DatabaseName=";
+ public static final String JDBC_POSTGRESQL_PREFIX = "jdbc:postgresql";
+ public static final String JDBC_GREENPLUM_PREFIX = "jdbc:pivotal:greenplum";
+
+ public GreenplumWriter(DataTransferConfig config) {
+ super(config);
+ //统一固定为copy模式
+ insertSqlMode = INSERT_SQL_MODE_TYPE;
+ dbUrl = changeToPostgresqlUrl();
+ setDatabaseInterface(new GreenplumDatabaseMetaInsert());
+ setTypeConverterInterface(new PostgresqlTypeConverter());
+ }
+
+ String changeToPostgresqlUrl(){
+ dbUrl = StringUtils.replaceOnce(dbUrl, JDBC_GREENPLUM_PREFIX, JDBC_POSTGRESQL_PREFIX);
+ dbUrl = StringUtils.replaceOnce(dbUrl, DATABASE_NAME, "/");
+ return dbUrl;
+ }
+
+ @Override
+ public DataStreamSink> writeData(DataStream dataSet) {
+ GreenplumOutputFormat greenplumOutputFormat = new GreenplumOutputFormat();
+ JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(greenplumOutputFormat);
+ builder.setDriverName(databaseInterface.getDriverClass());
+ builder.setDbUrl(dbUrl);
+ builder.setUsername(username);
+ builder.setPassword(password);
+ builder.setBatchInterval(batchSize);
+ builder.setMonitorUrls(monitorUrls);
+ builder.setPreSql(preSql);
+ builder.setPostSql(postSql);
+ builder.setErrors(errors);
+ builder.setErrorRatio(errorRatio);
+ builder.setDirtyPath(dirtyPath);
+ builder.setDirtyHadoopConfig(dirtyHadoopConfig);
+ builder.setSrcCols(srcCols);
+ builder.setDatabaseInterface(databaseInterface);
+ builder.setMode(mode);
+ builder.setTable(table);
+ builder.setColumn(column);
+ builder.setFullColumn(fullColumn);
+ builder.setUpdateKey(updateKey);
+ builder.setTypeConverter(typeConverter);
+ builder.setRestoreConfig(restoreConfig);
+ builder.setInsertSqlMode(insertSqlMode);
+
+ DataStreamSink> dataStreamSink = createOutput(dataSet, builder.finish());
+ String sinkName = (databaseInterface.getDatabaseType() + "writer").toLowerCase();
+ dataStreamSink.name(sinkName);
+ return dataStreamSink;
+ }
+}
diff --git a/flinkx-greenplum/pom.xml b/flinkx-greenplum/pom.xml
new file mode 100644
index 0000000000..5c3fb85de9
--- /dev/null
+++ b/flinkx-greenplum/pom.xml
@@ -0,0 +1,36 @@
+
+
+
+ flinkx-all
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-greenplum
+ pom
+
+ flinkx-greenplum-reader
+ flinkx-greenplum-core
+ flinkx-greenplum-writer
+
+
+
+
+ com.dtstack.flinkx
+ flinkx-core
+ 1.6
+ provided
+
+
+
+ com.dtstack.flinkx
+ flinkx-rdb-core
+ 1.6
+ provided
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java
index d83d83b3ee..d7f0d61ba6 100644
--- a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java
+++ b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java
@@ -44,7 +44,7 @@ public class HbaseConfigConstants {
public static final String DEFAULT_NULL_MODE = "skip";
- public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024;
+ public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024L;
public static final boolean DEFAULT_WAL_FLAG = false;
diff --git a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java
index 0bce2c0c69..3c88f828f5 100644
--- a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java
+++ b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java
@@ -27,15 +27,17 @@
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.BufferedMutator;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.security.PrivilegedAction;
-import java.util.Arrays;
-import java.util.List;
import java.util.Map;
/**
@@ -51,21 +53,9 @@ public class HbaseHelper {
private final static String AUTHENTICATION_TYPE = "Kerberos";
private final static String KEY_HBASE_SECURITY_AUTHENTICATION = "hbase.security.authentication";
private final static String KEY_HBASE_SECURITY_AUTHORIZATION = "hbase.security.authorization";
- private final static String KEY_HBASE_MASTER_KERBEROS_PRINCIPAL = "hbase.master.kerberos.principal";
- private final static String KEY_HBASE_MASTER_KEYTAB_FILE = "hbase.master.keytab.file";
- private final static String KEY_HBASE_REGIONSERVER_KEYTAB_FILE = "hbase.regionserver.keytab.file";
- private final static String KEY_HBASE_REGIONSERVER_KERBEROS_PRINCIPAL = "hbase.regionserver.kerberos.principal";
-
- private static List KEYS_KERBEROS_REQUIRED = Arrays.asList(
- KEY_HBASE_SECURITY_AUTHENTICATION,
- KEY_HBASE_MASTER_KERBEROS_PRINCIPAL,
- KEY_HBASE_MASTER_KEYTAB_FILE,
- KEY_HBASE_REGIONSERVER_KEYTAB_FILE,
- KEY_HBASE_REGIONSERVER_KERBEROS_PRINCIPAL
- );
public static org.apache.hadoop.hbase.client.Connection getHbaseConnection(Map hbaseConfigMap) {
- Validate.isTrue(hbaseConfigMap != null && hbaseConfigMap.size() !=0, "hbaseConfig不能为空Map结构!");
+ Validate.isTrue(MapUtils.isEmpty(hbaseConfigMap), "hbaseConfig不能为空Map结构!");
if(openKerberos(hbaseConfigMap)){
return getConnectionWithKerberos(hbaseConfigMap);
@@ -81,43 +71,43 @@ public static org.apache.hadoop.hbase.client.Connection getHbaseConnection(Map hbaseConfigMap){
- for (String key : KEYS_KERBEROS_REQUIRED) {
- if(StringUtils.isEmpty(MapUtils.getString(hbaseConfigMap, key))){
- throw new IllegalArgumentException(String.format("Must provide [%s] when authentication is Kerberos", key));
- }
+ try {
+ UserGroupInformation ugi = getUgi(hbaseConfigMap);
+ return ugi.doAs(new PrivilegedAction() {
+ @Override
+ public Connection run() {
+ try {
+ Configuration hConfiguration = getConfig(hbaseConfigMap);
+ return ConnectionFactory.createConnection(hConfiguration);
+ } catch (IOException e) {
+ LOG.error("Get connection fail with config:{}", hbaseConfigMap);
+ throw new RuntimeException(e);
+ }
+ }
+ });
+ } catch (Exception e){
+ throw new RuntimeException("Login kerberos error", e);
}
+ }
+ public static UserGroupInformation getUgi(Map hbaseConfigMap) throws IOException{
String keytabFileName = KerberosUtil.getPrincipalFileName(hbaseConfigMap);
keytabFileName = KerberosUtil.loadFile(hbaseConfigMap, keytabFileName);
- String principal = KerberosUtil.findPrincipalFromKeytab(keytabFileName);
+ String principal = KerberosUtil.getPrincipal(hbaseConfigMap, keytabFileName);
KerberosUtil.loadKrb5Conf(hbaseConfigMap);
Configuration conf = FileSystemUtil.getConfiguration(hbaseConfigMap, null);
- UserGroupInformation ugi;
- try {
- ugi = KerberosUtil.loginAndReturnUgi(conf, principal, keytabFileName);
- } catch (Exception e){
- throw new RuntimeException("Login kerberos error", e);
- }
-
- return ugi.doAs(new PrivilegedAction() {
- @Override
- public Connection run() {
- try {
- Configuration hConfiguration = getConfig(hbaseConfigMap);
- return ConnectionFactory.createConnection(hConfiguration);
- } catch (IOException e) {
- LOG.error("Get connection fail with config:{}", hbaseConfigMap);
- throw new RuntimeException(e);
- }
- }
- });
+ return KerberosUtil.loginAndReturnUgi(conf, principal, keytabFileName);
}
public static Configuration getConfig(Map hbaseConfigMap){
Configuration hConfiguration = HBaseConfiguration.create();
+ if (MapUtils.isEmpty(hbaseConfigMap)) {
+ return hConfiguration;
+ }
+
for (Map.Entry entry : hbaseConfigMap.entrySet()) {
if(entry.getValue() != null && !(entry.getValue() instanceof Map)){
hConfiguration.set(entry.getKey(), entry.getValue().toString());
diff --git a/flinkx-hbase/flinkx-hbase-core/src/test/java/com/dtstack/flinkx/hbase/test/HbaseHelperTest.java b/flinkx-hbase/flinkx-hbase-core/src/test/java/com/dtstack/flinkx/hbase/test/HbaseHelperTest.java
deleted file mode 100644
index c1a67b5750..0000000000
--- a/flinkx-hbase/flinkx-hbase-core/src/test/java/com/dtstack/flinkx/hbase/test/HbaseHelperTest.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package com.dtstack.flinkx.hbase.test;
-
-import com.dtstack.flinkx.hbase.HbaseHelper;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.*;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * @author jiangbo
- * @date 2019/8/29
- */
-public class HbaseHelperTest {
-
- public static void main(String[] args) throws Exception{
- Map sftpConf = new HashMap<>();
- sftpConf.put("host", "172.16.10.79");
- sftpConf.put("port", "22");
- sftpConf.put("username", "root");
- sftpConf.put("password", "abc123");
-
- Map hbaseConfig = new HashMap<>();
- hbaseConfig.put("hbase.security.authorization", "true");
- hbaseConfig.put("hbase.security.authentication", "kerberos");
- hbaseConfig.put("hbase.master.kerberos.principal", "hbase/cdh01@HADOOP.COM");
- hbaseConfig.put("hbase.master.keytab.file", "D:\\cdh_cluster\\cdh2\\hbase.keytab");
- hbaseConfig.put("hbase.regionserver.kerberos.principal", "hbase/cdh01@HADOOP.COM");
- hbaseConfig.put("hbase.regionserver.keytab.file", "D:\\cdh_cluster\\cdh2\\hbase.keytab");
- hbaseConfig.put("java.security.krb5.conf", "D:\\cdh_cluster\\cdh2\\krb5.conf");
- hbaseConfig.put("useLocalFile", "true");
-// hbaseConfig.put("sftpConf", sftpConf);
-// hbaseConfig.put("remoteDir", "/home/sftp/keytab/jiangbo");
-
-// hbaseConfig.put("hbase.zookeeper.quorum", "cdh01:2181,cdh02:2181,cdh03:2181");
- hbaseConfig.put("hbase.zookeeper.quorum", "172.16.10.201:2181");
- hbaseConfig.put("hbase.rpc.timeout", "60000");
- hbaseConfig.put("ipc.socket.timeout", "20000");
- hbaseConfig.put("hbase.client.retries.number", "3");
- hbaseConfig.put("hbase.client.pause", "100");
- hbaseConfig.put("zookeeper.recovery.retry", "3");
-
- Connection connection = HbaseHelper.getHbaseConnection(hbaseConfig);
- Table table = connection.getTable(TableName.valueOf("tb1"));
-
- ResultScanner rs = table.getScanner(new Scan());
- Result result = rs.next();
- if(result != null){
- System.out.println(result.getRow());
- }
-
- HbaseHelper.getRegionLocator(connection, "tb1");
-
- connection.close();
- }
-}
diff --git a/flinkx-hbase/flinkx-hbase-reader/pom.xml b/flinkx-hbase/flinkx-hbase-reader/pom.xml
index f77a9613ca..5cdc499262 100644
--- a/flinkx-hbase/flinkx-hbase-reader/pom.xml
+++ b/flinkx-hbase/flinkx-hbase-reader/pom.xml
@@ -65,6 +65,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java
index 495a89c4e6..f17383ca87 100644
--- a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java
+++ b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java
@@ -29,16 +29,24 @@
import org.apache.flink.types.Row;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
+import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import com.google.common.collect.Maps;
+import org.apache.hadoop.security.UserGroupInformation;
/**
@@ -87,7 +95,17 @@ public void openInputFormat() throws IOException {
@Override
public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOException {
try (Connection connection = HbaseHelper.getHbaseConnection(hbaseConfig)) {
- return split(connection, tableName, startRowkey, endRowkey, isBinaryRowkey);
+ if(HbaseHelper.openKerberos(hbaseConfig)) {
+ UserGroupInformation ugi = HbaseHelper.getUgi(hbaseConfig);
+ return ugi.doAs(new PrivilegedAction() {
+ @Override
+ public HbaseInputSplit[] run() {
+ return split(connection, tableName, startRowkey, endRowkey, isBinaryRowkey);
+ }
+ });
+ } else {
+ return split(connection, tableName, startRowkey, endRowkey, isBinaryRowkey);
+ }
}
}
@@ -132,9 +150,10 @@ private List doSplit(byte[] startRowkeyByte,
// 当前的region为最后一个region
// 如果最后一个region的start Key大于用户指定的userEndKey,则最后一个region,应该不包含在内
// 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region
- if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0
+ boolean isSkip = Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0
&& (endRowkeyByte.length != 0 && (Bytes.compareTo(
- regionStartKey, endRowkeyByte) > 0))) {
+ regionStartKey, endRowkeyByte) > 0));
+ if (isSkip) {
continue;
}
diff --git a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java
index 04c5150990..b1d4186132 100644
--- a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java
+++ b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java
@@ -26,6 +26,8 @@
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.types.Row;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
@@ -39,6 +41,8 @@
*/
public class HbaseReader extends BaseDataReader {
+ private static Logger LOG = LoggerFactory.getLogger(HbaseReader.class);
+
protected List columnName;
protected List columnType;
protected List columnValue;
@@ -82,7 +86,8 @@ public HbaseReader(DataTransferConfig config, StreamExecutionEnvironment env) {
columnValue.add((String) sm.get("value"));
columnFormat.add((String) sm.get("format"));
}
- System.out.println("init column finished");
+
+ LOG.info("init column finished");
} else{
throw new IllegalArgumentException("column argument error");
}
diff --git a/flinkx-hbase/flinkx-hbase-reader/src/test/java/com/dtstack/flinkx/hbase/reader/HbaseSplitDemo.java b/flinkx-hbase/flinkx-hbase-reader/src/test/java/com/dtstack/flinkx/hbase/reader/HbaseSplitDemo.java
deleted file mode 100644
index 47f153621b..0000000000
--- a/flinkx-hbase/flinkx-hbase-reader/src/test/java/com/dtstack/flinkx/hbase/reader/HbaseSplitDemo.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package com.dtstack.flinkx.hbase.reader;
-
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import java.io.IOException;
-
-/**
- * Created by softfly on 17/7/25.
- */
-public class HbaseSplitDemo {
-
- private static void split() {
-
- }
-
- public static void main(String[] args) throws IOException {
-
- org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
- conf.set("hbase.zookeeper.property.clientPort", "2181");
- conf.set("hbase.zookeeper.quorum", "172.16.1.151" );
- conf.set("zookeeper.znode.parent", "/hbase2");
-
- Connection conn = ConnectionFactory.createConnection(conf);
- //Table table = conn.getTable(TableName.valueOf("tb2"));
-
- RegionLocator regionLocator = conn.getRegionLocator(TableName.valueOf("tb2"));
- regionLocator.getStartEndKeys();
-
- }
-
-}
diff --git a/flinkx-hbase/flinkx-hbase-writer/pom.xml b/flinkx-hbase/flinkx-hbase-writer/pom.xml
index d406061b54..d5dd35eba4 100644
--- a/flinkx-hbase/flinkx-hbase-writer/pom.xml
+++ b/flinkx-hbase/flinkx-hbase-writer/pom.xml
@@ -54,6 +54,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java
index a3e75a9caa..d4db300d08 100644
--- a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java
+++ b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java
@@ -18,6 +18,7 @@
package com.dtstack.flinkx.hbase.writer;
+import com.dtstack.flinkx.constants.ConstantValue;
import com.dtstack.flinkx.enums.ColumnType;
import com.dtstack.flinkx.exception.WriteRecordException;
import com.dtstack.flinkx.hbase.HbaseHelper;
@@ -27,6 +28,7 @@
import com.dtstack.flinkx.util.DateUtil;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.Validate;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.types.Row;
@@ -34,10 +36,12 @@
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.security.UserGroupInformation;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
+import java.security.PrivilegedAction;
import java.sql.Timestamp;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@@ -93,8 +97,40 @@ public class HbaseOutputFormat extends BaseRichOutputFormat {
private transient ThreadLocal timeMillisecondFormatThreadLocal;
+ private boolean openKerberos = false;
+
@Override
public void configure(Configuration parameters) {
+ }
+
+ @Override
+ public void openInternal(int taskNumber, int numTasks) throws IOException {
+ openKerberos = HbaseHelper.openKerberos(hbaseConfig);
+ if (openKerberos) {
+ sleepRandomTime();
+
+ UserGroupInformation ugi = HbaseHelper.getUgi(hbaseConfig);
+ ugi.doAs(new PrivilegedAction() {
+ @Override
+ public Object run() {
+ openConnection();
+ return null;
+ }
+ });
+ } else {
+ openConnection();
+ }
+ }
+
+ private void sleepRandomTime() {
+ try {
+ Thread.sleep(5000L + (long)(10000 * Math.random()));
+ } catch (Exception exception) {
+ LOG.warn("", exception);
+ }
+ }
+
+ public void openConnection() {
LOG.info("HbaseOutputFormat configure start");
nameMaps = Maps.newConcurrentMap();
nameByteMaps = Maps.newConcurrentMap();
@@ -103,9 +139,9 @@ public void configure(Configuration parameters) {
Validate.isTrue(hbaseConfig != null && hbaseConfig.size() !=0, "hbaseConfig不能为空Map结构!");
try {
- connection = HbaseHelper.getHbaseConnection(hbaseConfig);
-
org.apache.hadoop.conf.Configuration hConfiguration = HbaseHelper.getConfig(hbaseConfig);
+ connection = ConnectionFactory.createConnection(hConfiguration);
+
bufferedMutator = connection.getBufferedMutator(
new BufferedMutatorParams(TableName.valueOf(tableName))
.pool(HTable.getDefaultExecutor(hConfiguration))
@@ -129,11 +165,6 @@ public void configure(Configuration parameters) {
LOG.info("HbaseOutputFormat configure end");
}
- @Override
- public void openInternal(int taskNumber, int numTasks) throws IOException {
-
- }
-
@Override
public void writeSingleRecordInternal(Row record) throws WriteRecordException {
int i = 0;
@@ -159,17 +190,19 @@ public void writeSingleRecordInternal(Row record) throws WriteRecordException {
String name = columnNames.get(i);
String[] cfAndQualifier = nameMaps.get(name);
byte[][] cfAndQualifierBytes = nameByteMaps.get(name);
- if(cfAndQualifier == null || cfAndQualifierBytes==null){
- String promptInfo = "Hbasewriter 中,column 的列配置格式应该是:列族:列名. 您配置的列错误:" + name;
+ if(cfAndQualifier == null || cfAndQualifierBytes == null){
cfAndQualifier = name.split(":");
- Validate.isTrue(cfAndQualifier != null && cfAndQualifier.length == 2
- && org.apache.commons.lang3.StringUtils.isNotBlank(cfAndQualifier[0])
- && org.apache.commons.lang3.StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo);
- nameMaps.put(name,cfAndQualifier);
- cfAndQualifierBytes = new byte[2][];
- cfAndQualifierBytes[0] = Bytes.toBytes(cfAndQualifier[0]);
- cfAndQualifierBytes[1] = Bytes.toBytes(cfAndQualifier[1]);
- nameByteMaps.put(name,cfAndQualifierBytes);
+ if(cfAndQualifier.length == 2
+ && StringUtils.isNotBlank(cfAndQualifier[0])
+ && StringUtils.isNotBlank(cfAndQualifier[1])){
+ nameMaps.put(name,cfAndQualifier);
+ cfAndQualifierBytes = new byte[2][];
+ cfAndQualifierBytes[0] = Bytes.toBytes(cfAndQualifier[0]);
+ cfAndQualifierBytes[1] = Bytes.toBytes(cfAndQualifier[1]);
+ nameByteMaps.put(name,cfAndQualifierBytes);
+ } else {
+ throw new IllegalArgumentException("Hbasewriter 中,column 的列配置格式应该是:列族:列名. 您配置的列错误:" + name);
+ }
}
ColumnType columnType = ColumnType.getType(type);
@@ -180,8 +213,6 @@ public void writeSingleRecordInternal(Row record) throws WriteRecordException {
cfAndQualifierBytes[0],
cfAndQualifierBytes[1],
columnBytes);
- }else{
- continue;
}
}
@@ -195,20 +226,21 @@ public void writeSingleRecordInternal(Row record) throws WriteRecordException {
}
private SimpleDateFormat getSimpleDateFormat(String sign){
- SimpleDateFormat format = null;
- if("sss".equalsIgnoreCase(sign)){
+ SimpleDateFormat format;
+ if(ConstantValue.TIME_SECOND_SUFFIX.equals(sign)){
format = timeSecondFormatThreadLocal.get();
if(format == null){
format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
timeSecondFormatThreadLocal.set(format);
}
- }else if("SSS".equalsIgnoreCase(sign)){
+ } else {
format = timeMillisecondFormatThreadLocal.get();
if(format == null){
format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS");
timeMillisecondFormatThreadLocal.set(format);
}
}
+
return format;
}
@@ -219,7 +251,7 @@ protected String recordConvertDetailErrorMessage(int pos, Row row) {
@Override
protected void writeMultipleRecordsInternal() throws Exception {
- throw new IllegalArgumentException();
+ notSupportBatchWrite("HbaseWriter");
}
private byte[] getRowkey(Row record) throws Exception{
@@ -249,8 +281,8 @@ public long getVersion(Row record){
if(record.getField(index) == null){
throw new IllegalArgumentException("null verison column!");
}
- SimpleDateFormat dfSeconds = getSimpleDateFormat("sss");
- SimpleDateFormat dfMs = getSimpleDateFormat("SSS");
+ SimpleDateFormat dfSeconds = getSimpleDateFormat(ConstantValue.TIME_SECOND_SUFFIX);
+ SimpleDateFormat dfMs = getSimpleDateFormat(ConstantValue.TIME_MILLISECOND_SUFFIX);
Object column = record.getField(index);
if(column instanceof Long){
Long longValue = (Long) column;
@@ -370,7 +402,7 @@ private byte[] intToBytes(Object column) {
if(column instanceof Integer) {
intValue = (Integer) column;
} else if(column instanceof Long) {
- intValue = Integer.valueOf(((Long)column).intValue());
+ intValue = ((Long) column).intValue();
} else if(column instanceof Double) {
intValue = ((Double) column).intValue();
} else if(column instanceof Float) {
@@ -378,7 +410,7 @@ private byte[] intToBytes(Object column) {
} else if(column instanceof Short) {
intValue = ((Short) column).intValue();
} else if(column instanceof Boolean) {
- intValue = ((Boolean) column).booleanValue() ? 1 : 0;
+ intValue = (Boolean) column ? 1 : 0;
} else if(column instanceof String) {
intValue = Integer.valueOf((String) column);
} else {
@@ -401,7 +433,7 @@ private byte[] longToBytes(Object column) {
} else if(column instanceof Short) {
longValue = ((Short) column).longValue();
} else if(column instanceof Boolean) {
- longValue = ((Boolean) column).booleanValue() ? 1L : 0L;
+ longValue = (Boolean) column ? 1L : 0L;
} else if(column instanceof String) {
longValue = Long.valueOf((String) column);
}else if (column instanceof Timestamp){
@@ -426,7 +458,7 @@ private byte[] doubleToBytes(Object column) {
} else if(column instanceof Short) {
doubleValue = ((Short) column).doubleValue();
} else if(column instanceof Boolean) {
- doubleValue = ((Boolean) column).booleanValue() ? 1.0 : 0.0;
+ doubleValue = (Boolean) column ? 1.0 : 0.0;
} else if(column instanceof String) {
doubleValue = Double.valueOf((String) column);
} else {
@@ -449,7 +481,7 @@ private byte[] floatToBytes(Object column) {
} else if(column instanceof Short) {
floatValue = ((Short) column).floatValue();
} else if(column instanceof Boolean) {
- floatValue = ((Boolean) column).booleanValue() ? 1.0f : 0.0f;
+ floatValue = (Boolean) column ? 1.0f : 0.0f;
} else if(column instanceof String) {
floatValue = Float.valueOf((String) column);
} else {
@@ -472,7 +504,7 @@ private byte[] shortToBytes(Object column) {
} else if(column instanceof Short) {
shortValue = (Short) column;
} else if(column instanceof Boolean) {
- shortValue = ((Boolean) column).booleanValue() ? (short) 1 : (short) 0 ;
+ shortValue = (Boolean) column ? (short) 1 : (short) 0 ;
} else if(column instanceof String) {
shortValue = Short.valueOf((String) column);
} else {
@@ -484,15 +516,15 @@ private byte[] shortToBytes(Object column) {
private byte[] boolToBytes(Object column) {
Boolean booleanValue = null;
if(column instanceof Integer) {
- booleanValue = (Integer)column == 0 ? false : true;
+ booleanValue = (Integer) column != 0;
} else if(column instanceof Long) {
- booleanValue = (Long) column == 0L ? false : true;
+ booleanValue = (Long) column != 0L;
} else if(column instanceof Double) {
- booleanValue = (Double) column == 0.0 ? false : true;
+ booleanValue = new Double(0.0).compareTo((Double) column) != 0;
} else if(column instanceof Float) {
- booleanValue = (Float) column == 0.0f ? false : true;
+ booleanValue = new Float(0.0f).compareTo((Float) column) != 0;
} else if(column instanceof Short) {
- booleanValue = (Short) column == 0 ? false : true;
+ booleanValue = (Short) column != 0;
} else if(column instanceof Boolean) {
booleanValue = (Boolean) column;
} else if(column instanceof String) {
diff --git a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java
index 4da4c10754..ec96e04c17 100644
--- a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java
+++ b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java
@@ -110,5 +110,7 @@ protected void checkFormat() {
if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){
throw new UnsupportedOperationException("This plugin not support restore from failed state");
}
+
+ notSupportBatchWrite("HbaseWriter");
}
}
diff --git a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java
index 42091f62d3..3cb5bce6ce 100644
--- a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java
+++ b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java
@@ -31,8 +31,23 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import static com.dtstack.flinkx.hbase.HbaseConfigConstants.*;
-import static com.dtstack.flinkx.hbase.HbaseConfigKeys.*;
+
+import static com.dtstack.flinkx.hbase.HbaseConfigConstants.DEFAULT_WAL_FLAG;
+import static com.dtstack.flinkx.hbase.HbaseConfigConstants.DEFAULT_WRITE_BUFFER_SIZE;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_COLUMN_NAME;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_COLUMN_TYPE;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ENCODING;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_HBASE_CONFIG;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_NULL_MODE;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ROW_KEY_COLUMN;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ROW_KEY_COLUMN_INDEX;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ROW_KEY_COLUMN_VALUE;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_TABLE;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_VERSION_COLUMN;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_VERSION_COLUMN_INDEX;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_VERSION_COLUMN_VALUE;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_WAL_FLAG;
+import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_WRITE_BUFFER_SIZE;
/**
* The Writer plugin of HBase
diff --git a/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java b/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java
index dbf12dfa2e..75143994f5 100644
--- a/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java
+++ b/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java
@@ -18,7 +18,10 @@
package com.dtstack.flinkx.hbase.writer;
-import com.dtstack.flinkx.hbase.writer.function.*;
+import com.dtstack.flinkx.hbase.writer.function.FunctionParser;
+import com.dtstack.flinkx.hbase.writer.function.FunctionTree;
+import com.dtstack.flinkx.hbase.writer.function.Md5Function;
+import com.dtstack.flinkx.hbase.writer.function.StringFunction;
import org.junit.Assert;
import org.junit.Test;
diff --git a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java
index 913c0fd41c..2f0c7ec101 100644
--- a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java
+++ b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java
@@ -19,7 +19,6 @@
package com.dtstack.flinkx.hdfs;
import org.apache.commons.lang.StringUtils;
-import org.apache.parquet.hadoop.metadata.CompressionCodecName;
/**
* @author jiangbo
diff --git a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java
index fc6ced4a25..a4b44360e3 100644
--- a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java
+++ b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java
@@ -60,4 +60,6 @@ public class HdfsConfigKeys {
public static final String KEY_FLUSH_INTERVAL = "flushInterval";
+ public static final String KEY_ENABLE_DICTIONARY = "enableDictionary";
+
}
diff --git a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java
index b678812f68..0acaa924b8 100644
--- a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java
+++ b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java
@@ -19,15 +19,20 @@
package com.dtstack.flinkx.hdfs;
import com.dtstack.flinkx.enums.ColumnType;
-import com.dtstack.flinkx.util.DateUtil;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.io.*;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.parquet.io.api.Binary;
-import java.sql.Date;
-import java.text.SimpleDateFormat;
/**
* Utilities for HdfsReader and HdfsWriter
@@ -39,62 +44,13 @@ public class HdfsUtil {
public static final String NULL_VALUE = "\\N";
- public static Object string2col(String str, String type, SimpleDateFormat customDateFormat) {
- if (str == null || str.length() == 0){
- return null;
- }
-
- if(type == null){
- return str;
- }
+ private static final long NANO_SECONDS_PER_DAY = 86400_000_000_000L;
- ColumnType columnType = ColumnType.fromString(type.toUpperCase());
- Object ret;
- switch(columnType) {
- case TINYINT:
- ret = Byte.valueOf(str.trim());
- break;
- case SMALLINT:
- ret = Short.valueOf(str.trim());
- break;
- case INT:
- ret = Integer.valueOf(str.trim());
- break;
- case BIGINT:
- ret = Long.valueOf(str.trim());
- break;
- case FLOAT:
- ret = Float.valueOf(str.trim());
- break;
- case DOUBLE:
- case DECIMAL:
- ret = Double.valueOf(str.trim());
- break;
- case STRING:
- case VARCHAR:
- case CHAR:
- if(customDateFormat != null){
- ret = DateUtil.columnToDate(str,customDateFormat);
- ret = DateUtil.timestampToString((Date)ret);
- } else {
- ret = str;
- }
- break;
- case BOOLEAN:
- ret = Boolean.valueOf(str.trim().toLowerCase());
- break;
- case DATE:
- ret = DateUtil.columnToDate(str,customDateFormat);
- break;
- case TIMESTAMP:
- ret = DateUtil.columnToTimestamp(str,customDateFormat);
- break;
- default:
- throw new IllegalArgumentException("Unsupported field type:" + type);
- }
+ private static final long JULIAN_EPOCH_OFFSET_DAYS = 2440588;
- return ret;
- }
+ private static final double SCALE_TWO = 2.0;
+ private static final double SCALE_TEN = 10.0;
+ private static final int BIT_SIZE = 8;
public static Object getWritableValue(Object writable) {
Class> clz = writable.getClass();
@@ -166,4 +122,78 @@ public static ObjectInspector columnTypeToObjectInspetor(ColumnType columnType)
return objectInspector;
}
+
+ public static Binary decimalToBinary(final HiveDecimal hiveDecimal, int prec, int scale) {
+ byte[] decimalBytes = hiveDecimal.setScale(scale).unscaledValue().toByteArray();
+
+ // Estimated number of bytes needed.
+ int precToBytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
+ if (precToBytes == decimalBytes.length) {
+ // No padding needed.
+ return Binary.fromReusedByteArray(decimalBytes);
+ }
+
+ byte[] tgt = new byte[precToBytes];
+ if (hiveDecimal.signum() == -1) {
+ // For negative number, initializing bits to 1
+ for (int i = 0; i < precToBytes; i++) {
+ tgt[i] |= 0xFF;
+ }
+ }
+
+ // Padding leading zeroes/ones.
+ System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length);
+ return Binary.fromReusedByteArray(tgt);
+ }
+
+ public static int computeMinBytesForPrecision(int precision){
+ int numBytes = 1;
+ while (Math.pow(SCALE_TWO, BIT_SIZE * numBytes - 1.0) < Math.pow(SCALE_TEN, precision)) {
+ numBytes += 1;
+ }
+ return numBytes;
+ }
+
+ public static byte[] longToByteArray(long data){
+ long nano = data * 1000_000;
+
+ int julianDays = (int) ((nano / NANO_SECONDS_PER_DAY) + JULIAN_EPOCH_OFFSET_DAYS);
+ byte[] julianDaysBytes = getBytes(julianDays);
+ flip(julianDaysBytes);
+
+ long lastDayNanos = nano % NANO_SECONDS_PER_DAY;
+ byte[] lastDayNanosBytes = getBytes(lastDayNanos);
+ flip(lastDayNanosBytes);
+
+ byte[] dst = new byte[12];
+
+ System.arraycopy(lastDayNanosBytes, 0, dst, 0, 8);
+ System.arraycopy(julianDaysBytes, 0, dst, 8, 4);
+
+ return dst;
+ }
+
+ private static byte[] getBytes(long i) {
+ byte[] bytes=new byte[8];
+ bytes[0]=(byte)((i >> 56) & 0xFF);
+ bytes[1]=(byte)((i >> 48) & 0xFF);
+ bytes[2]=(byte)((i >> 40) & 0xFF);
+ bytes[3]=(byte)((i >> 32) & 0xFF);
+ bytes[4]=(byte)((i >> 24) & 0xFF);
+ bytes[5]=(byte)((i >> 16) & 0xFF);
+ bytes[6]=(byte)((i >> 8) & 0xFF);
+ bytes[7]=(byte)(i & 0xFF);
+ return bytes;
+ }
+
+ /**
+ * @param bytes
+ */
+ private static void flip(byte[] bytes) {
+ for(int i=0,j=bytes.length-1;i
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java
index f6d802cf1b..e5e6a8ee37 100644
--- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java
+++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java
@@ -21,10 +21,15 @@
import com.dtstack.flinkx.inputformat.BaseRichInputFormat;
import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.FileSystemUtil;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.security.UserGroupInformation;
+import java.io.File;
import java.io.IOException;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -36,6 +41,8 @@
*/
public abstract class BaseHdfsInputFormat extends BaseRichInputFormat {
+ private static final String PARTITION_SPLIT_CHAR = "=";
+
protected Map hadoopConfig;
protected List metaColumns;
@@ -61,14 +68,25 @@ public abstract class BaseHdfsInputFormat extends BaseRichInputFormat {
protected Object value;
- protected boolean isFileEmpty = false;
-
protected String filterRegex;
+ protected transient UserGroupInformation ugi;
+
+ protected boolean openKerberos;
+
+ protected String currentPartition;
+
+ protected transient FileSystem fs;
+
@Override
public void openInputFormat() throws IOException {
super.openInputFormat();
conf = buildConfig();
+
+ openKerberos = FileSystemUtil.isOpenKerberos(hadoopConfig);
+ if (openKerberos) {
+ ugi = FileSystemUtil.getUGI(hadoopConfig, defaultFs);
+ }
}
protected JobConf buildConfig() {
@@ -81,7 +99,7 @@ protected JobConf buildConfig() {
@Override
public boolean reachedEnd() throws IOException {
- return isFileEmpty || !recordReader.next(key, value);
+ return !recordReader.next(key, value);
}
@Override
@@ -91,4 +109,27 @@ public void closeInternal() throws IOException {
}
}
+ /**
+ * 从hdfs路径中获取当前分区信息
+ * @param path hdfs路径
+ */
+ public void findCurrentPartition(Path path){
+ Map map = new HashMap<>(16);
+ String pathStr = path.getParent().toString();
+ int index;
+ while((index = pathStr.lastIndexOf(PARTITION_SPLIT_CHAR)) > 0){
+ int i = pathStr.lastIndexOf(File.separator);
+ String name = pathStr.substring(i + 1, index);
+ String value = pathStr.substring(index + 1);
+ map.put(name, value);
+ pathStr = pathStr.substring(0, i);
+ }
+
+ for (MetaColumn column : metaColumns) {
+ if(column.getPart()){
+ column.setValue(map.get(column.getName()));
+ }
+ }
+ }
+
}
diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java
index a87d704310..6f48d4e404 100644
--- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java
+++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java
@@ -22,21 +22,25 @@
import com.dtstack.flinkx.hdfs.HdfsUtil;
import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.FileSystemUtil;
+import com.dtstack.flinkx.util.StringUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.types.Row;
-import org.apache.hadoop.fs.*;
-import org.apache.hadoop.hive.ql.io.orc.*;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
+import org.apache.hadoop.hive.ql.io.orc.OrcSplit;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import java.io.*;
+import java.security.PrivilegedAction;
import java.util.*;
+import java.util.concurrent.atomic.AtomicBoolean;
/**
* The subclass of HdfsInputFormat which handles orc files
@@ -46,99 +50,102 @@
*/
public class HdfsOrcInputFormat extends BaseHdfsInputFormat {
- private transient OrcSerde orcSerde;
-
private transient String[] fullColNames;
- private transient String[] fullColTypes;
-
private transient StructObjectInspector inspector;
private transient List extends StructField> fields;
private static final String COMPLEX_FIELD_TYPE_SYMBOL_REGEX = ".*(<|>|\\{|}|[|]).*";
+ private AtomicBoolean isInit = new AtomicBoolean(false);
+
@Override
- public void openInputFormat() throws IOException{
+ public void openInputFormat() throws IOException {
super.openInputFormat();
+ inputFormat = new OrcInputFormat();
+ }
+
+ @Override
+ public void openInternal(InputSplit inputSplit) throws IOException {
+ HdfsOrcInputSplit hdfsOrcInputSplit = (HdfsOrcInputSplit) inputSplit;
+ OrcSplit orcSplit = hdfsOrcInputSplit.getOrcSplit();
- FileSystem fs;
try {
- fs = FileSystemUtil.getFileSystem(hadoopConfig, defaultFs);
+ if (!isInit.get()) {
+ init(orcSplit.getPath());
+ isInit.set(true);
+ }
} catch (Exception e) {
- throw new RuntimeException(e);
+ throw new IOException("初始化[inspector]出错", e);
}
- orcSerde = new OrcSerde();
- inputFormat = new OrcInputFormat();
- org.apache.hadoop.hive.ql.io.orc.Reader reader = null;
- try {
- OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
- readerOptions.filesystem(fs);
-
- Path path = new Path(inputPath);
- String typeStruct = null;
-
- if(fs.isDirectory(path)) {
- RemoteIterator iterator = fs.listFiles(path, true);
- while(iterator.hasNext()) {
- FileStatus fileStatus = iterator.next();
- if(fileStatus.isFile() && fileStatus.getLen() > 49) {
- Path subPath = fileStatus.getPath();
- reader = OrcFile.createReader(subPath, readerOptions);
- typeStruct = reader.getObjectInspector().getTypeName();
- if(StringUtils.isNotEmpty(typeStruct)) {
- break;
- }
+ if (openKerberos) {
+ ugi.doAs(new PrivilegedAction() {
+ @Override
+ public Object run() {
+ try {
+ openOrcReader(inputSplit);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
}
- }
- if(reader == null) {
- //throw new RuntimeException("orcfile dir is empty!");
- LOG.error("orc file {} is empty!", inputPath);
- isFileEmpty = true;
- return;
+ return null;
}
+ });
+ } else {
+ openOrcReader(inputSplit);
+ }
+ }
- } else {
- reader = OrcFile.createReader(path, readerOptions);
- typeStruct = reader.getObjectInspector().getTypeName();
- }
+ private void openOrcReader(InputSplit inputSplit) throws IOException{
+ numReadCounter = getRuntimeContext().getLongCounter("numRead");
+ HdfsOrcInputSplit hdfsOrcInputSplit = (HdfsOrcInputSplit) inputSplit;
+ OrcSplit orcSplit = hdfsOrcInputSplit.getOrcSplit();
+ recordReader = inputFormat.getRecordReader(orcSplit, conf, Reporter.NULL);
+ key = recordReader.createKey();
+ value = recordReader.createValue();
+ fields = inspector.getAllStructFieldRefs();
+ }
- if (StringUtils.isEmpty(typeStruct)) {
- throw new RuntimeException("can't retrieve type struct from " + path);
- }
+ private void init(Path path) throws Exception {
+ OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
+ readerOptions.filesystem(fs);
+ org.apache.hadoop.hive.ql.io.orc.Reader reader = OrcFile.createReader(path, readerOptions);
+ String typeStruct = reader.getObjectInspector().getTypeName();
- int startIndex = typeStruct.indexOf("<") + 1;
- int endIndex = typeStruct.lastIndexOf(">");
- typeStruct = typeStruct.substring(startIndex, endIndex);
+ if (StringUtils.isEmpty(typeStruct)) {
+ throw new RuntimeException("can't retrieve type struct from " + path);
+ }
- if(typeStruct.matches(COMPLEX_FIELD_TYPE_SYMBOL_REGEX)){
- throw new RuntimeException("Field types such as array, map, and struct are not supported.");
- }
+ int startIndex = typeStruct.indexOf("<") + 1;
+ int endIndex = typeStruct.lastIndexOf(">");
+ typeStruct = typeStruct.substring(startIndex, endIndex);
- List cols = parseColumnAndType(typeStruct);
+ if(typeStruct.matches(COMPLEX_FIELD_TYPE_SYMBOL_REGEX)){
+ throw new RuntimeException("Field types such as array, map, and struct are not supported.");
+ }
- fullColNames = new String[cols.size()];
- fullColTypes = new String[cols.size()];
+ List cols = parseColumnAndType(typeStruct);
- for(int i = 0; i < cols.size(); ++i) {
- String[] temp = cols.get(i).split(":");
- fullColNames[i] = temp[0];
- fullColTypes[i] = temp[1];
- }
+ fullColNames = new String[cols.size()];
+ String[] fullColTypes = new String[cols.size()];
- Properties p = new Properties();
- p.setProperty("columns", StringUtils.join(fullColNames, ","));
- p.setProperty("columns.types", StringUtils.join(fullColTypes, ":"));
- orcSerde.initialize(conf, p);
+ for(int i = 0; i < cols.size(); ++i) {
+ String[] temp = cols.get(i).split(":");
+ fullColNames[i] = temp[0];
+ fullColTypes[i] = temp[1];
+ }
- this.inspector = (StructObjectInspector) orcSerde.getObjectInspector();
+ Properties p = new Properties();
+ p.setProperty("columns", StringUtils.join(fullColNames, ","));
+ p.setProperty("columns.types", StringUtils.join(fullColTypes, ":"));
- } catch (Throwable e) {
- throw new RuntimeException(e);
- }
+ OrcSerde orcSerde = new OrcSerde();
+ orcSerde.initialize(conf, p);
+
+ this.inspector = (StructObjectInspector) orcSerde.getObjectInspector();
}
private List parseColumnAndType(String typeStruct){
@@ -175,12 +182,25 @@ private List parseColumnAndType(String typeStruct){
@Override
public HdfsOrcInputSplit[] createInputSplitsInternal(int minNumSplits) throws IOException {
- try {
- FileSystemUtil.getFileSystem(hadoopConfig, defaultFs);
- } catch (Exception e) {
- throw new IOException(e);
+ if (FileSystemUtil.isOpenKerberos(hadoopConfig)) {
+ UserGroupInformation ugi = FileSystemUtil.getUGI(hadoopConfig, defaultFs);
+ LOG.info("user:{}, ", ugi.getShortUserName());
+ return ugi.doAs(new PrivilegedAction() {
+ @Override
+ public HdfsOrcInputSplit[] run() {
+ try {
+ return createOrcSplit(minNumSplits);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ });
+ } else {
+ return createOrcSplit(minNumSplits);
}
+ }
+ private HdfsOrcInputSplit[] createOrcSplit(int minNumSplits) throws IOException{
JobConf jobConf = FileSystemUtil.getJobConf(hadoopConfig, defaultFs);
org.apache.hadoop.mapred.FileInputFormat.setInputPaths(jobConf, inputPath);
org.apache.hadoop.mapred.FileInputFormat.setInputPathFilter(buildConfig(), HdfsPathFilter.class);
@@ -204,24 +224,6 @@ public HdfsOrcInputSplit[] createInputSplitsInternal(int minNumSplits) throws IO
return null;
}
-
- @Override
- public void openInternal(InputSplit inputSplit) throws IOException {
-
- if(isFileEmpty){
- return;
- }
-
- numReadCounter = getRuntimeContext().getLongCounter("numRead");
- HdfsOrcInputSplit hdfsOrcInputSplit = (HdfsOrcInputSplit) inputSplit;
- OrcSplit orcSplit = hdfsOrcInputSplit.getOrcSplit();
- recordReader = inputFormat.getRecordReader(orcSplit, conf, Reporter.NULL);
- key = recordReader.createKey();
- value = recordReader.createValue();
- fields = inspector.getAllStructFieldRefs();
- }
-
-
@Override
public Row nextRecordInternal(Row row) throws IOException {
if(metaColumns.size() == 1 && ConstantValue.STAR_SYMBOL.equals(metaColumns.get(0).getName())){
@@ -239,17 +241,17 @@ public Row nextRecordInternal(Row row) throws IOException {
MetaColumn metaColumn = metaColumns.get(i);
Object val = null;
- if(metaColumn.getIndex() != -1){
+ if(metaColumn.getValue() != null){
+ val = metaColumn.getValue();
+ }else if(metaColumn.getIndex() != -1){
val = inspector.getStructFieldData(value, fields.get(metaColumn.getIndex()));
if (val == null && metaColumn.getValue() != null){
val = metaColumn.getValue();
}
- } else if(metaColumn.getValue() != null){
- val = metaColumn.getValue();
}
if(val instanceof String || val instanceof org.apache.hadoop.io.Text){
- val = HdfsUtil.string2col(String.valueOf(val),metaColumn.getType(),metaColumn.getTimeFormat());
+ val = StringUtil.string2col(String.valueOf(val), metaColumn.getType(), metaColumn.getTimeFormat());
} else if(val != null){
val = HdfsUtil.getWritableValue(val);
}
@@ -291,5 +293,4 @@ public int getSplitNumber() {
return splitNumber;
}
}
-
}
diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java
index 16fb276495..565c0a49b2 100644
--- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java
+++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java
@@ -20,9 +20,9 @@
import com.dtstack.flinkx.constants.ConstantValue;
import com.dtstack.flinkx.enums.ColumnType;
-import com.dtstack.flinkx.hdfs.HdfsUtil;
import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.FileSystemUtil;
+import com.dtstack.flinkx.util.StringUtil;
import com.google.common.collect.Lists;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
@@ -43,6 +43,7 @@
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
+import java.security.PrivilegedAction;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Iterator;
@@ -75,6 +76,8 @@ public class HdfsParquetInputFormat extends BaseHdfsInputFormat {
private static final long NANOS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1);
+ private static final int TIMESTAMP_BINARY_LENGTH = 12;
+
@Override
protected void openInternal(InputSplit inputSplit) throws IOException {
currentSplitFilePaths = ((HdfsParquetSplit)inputSplit).getPaths();
@@ -82,14 +85,33 @@ protected void openInternal(InputSplit inputSplit) throws IOException {
private boolean nextLine() throws IOException{
if (currentFileReader == null && currentFileIndex <= currentSplitFilePaths.size()-1){
- nextFile();
+ if (openKerberos) {
+ ugi.doAs(new PrivilegedAction() {
+ @Override
+ public Object run() {
+ try {
+ nextFile();
+ return null;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ });
+ } else {
+ nextFile();
+ }
}
if (currentFileReader == null){
return false;
}
- currentLine = currentFileReader.read();
+ if (openKerberos) {
+ currentLine = nextLineWithKerberos();
+ } else {
+ currentLine = currentFileReader.read();
+ }
+
if (fullColNames == null && currentLine != null){
fullColNames = new ArrayList<>();
fullColTypes = new ArrayList<>();
@@ -117,11 +139,24 @@ private boolean nextLine() throws IOException{
return currentLine != null;
}
+ private Group nextLineWithKerberos() {
+ return ugi.doAs(new PrivilegedAction() {
+ @Override
+ public Group run() {
+ try {
+ return currentFileReader.read();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ });
+ }
+
private void nextFile() throws IOException{
- String path = currentSplitFilePaths.get(currentFileIndex);
- ParquetReader.Builder reader = ParquetReader.builder(new GroupReadSupport(), new Path(path)).withConf(conf);
+ Path path = new Path(currentSplitFilePaths.get(currentFileIndex));
+ findCurrentPartition(path);
+ ParquetReader.Builder reader = ParquetReader.builder(new GroupReadSupport(), path).withConf(conf);
currentFileReader = reader.build();
-
currentFileIndex++;
}
@@ -139,22 +174,20 @@ protected Row nextRecordInternal(Row row) throws IOException {
MetaColumn metaColumn = metaColumns.get(i);
Object val = null;
- if(metaColumn.getIndex() != -1){
- if (metaColumn.getIndex() < currentLine.getType().getFieldCount()) {
- if(currentLine.getFieldRepetitionCount(metaColumn.getIndex()) > 0){
- val = getData(currentLine,metaColumn.getType(),metaColumn.getIndex());
- }
+ if (metaColumn.getValue() != null){
+ val = metaColumn.getValue();
+ }else if(metaColumn.getIndex() != -1){
+ if(currentLine.getFieldRepetitionCount(metaColumn.getIndex()) > 0){
+ val = getData(currentLine,metaColumn.getType(),metaColumn.getIndex());
+ }
- if (val == null && metaColumn.getValue() != null){
- val = metaColumn.getValue();
- }
+ if (val == null && metaColumn.getValue() != null){
+ val = metaColumn.getValue();
}
- } else if (metaColumn.getValue() != null){
- val = metaColumn.getValue();
}
if(val instanceof String){
- val = HdfsUtil.string2col(String.valueOf(val),metaColumn.getType(),metaColumn.getTimeFormat());
+ val = StringUtil.string2col(String.valueOf(val), metaColumn.getType(), metaColumn.getTimeFormat());
}
row.setField(i,val);
@@ -169,7 +202,7 @@ public boolean reachedEnd() throws IOException {
return !nextLine();
}
- private Object getData(Group currentLine,String type,int index){
+ public Object getData(Group currentLine,String type,int index){
Object data = null;
ColumnType columnType = ColumnType.fromString(type);
@@ -302,13 +335,12 @@ private static List getAllPartitionPath(String tableLocation, FileSystem
private String getTypeName(String method){
String typeName;
switch (method){
+ case "getBoolean":
case "getInteger" : typeName = "int";break;
case "getInt96" : typeName = "bigint";break;
case "getFloat" : typeName = "float";break;
case "getDouble" : typeName = "double";break;
case "getBinary" : typeName = "binary";break;
- case "getString" : typeName = "string";break;
- case "getBoolean" : typeName = "int";break;
default:typeName = "string";
}
@@ -319,11 +351,11 @@ private String getTypeName(String method){
* @param timestampBinary
* @return
*/
- private long getTimestampMillis(Binary timestampBinary)
- {
- if (timestampBinary.length() != 12) {
+ private long getTimestampMillis(Binary timestampBinary) {
+ if (timestampBinary.length() != TIMESTAMP_BINARY_LENGTH) {
return 0;
}
+
byte[] bytes = timestampBinary.getBytes();
long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java
index 5e3cea2a94..d4291741f0 100644
--- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java
+++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java
@@ -43,9 +43,9 @@ public class HdfsReader extends BaseDataReader {
protected String fileType;
protected String path;
protected String fieldDelimiter;
- private List metaColumns;
+ protected List metaColumns;
protected Map hadoopConfig;
- private String filterRegex;
+ protected String filterRegex;
public HdfsReader(DataTransferConfig config, StreamExecutionEnvironment env) {
super(config, env);
diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java
index 2ba2b1c363..f7d2dac2b7 100644
--- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java
+++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java
@@ -22,25 +22,25 @@
import com.dtstack.flinkx.hdfs.HdfsUtil;
import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.FileSystemUtil;
-import jodd.util.StringUtil;
+import com.dtstack.flinkx.util.StringUtil;
import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.commons.lang3.StringUtils;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.types.Row;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.security.UserGroupInformation;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
-import java.nio.charset.Charset;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.Map;
+import java.security.PrivilegedAction;
/**
* The subclass of HdfsInputFormat which handles text files
@@ -59,12 +59,25 @@ public void openInputFormat() throws IOException {
@Override
public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOException {
- try {
- FileSystemUtil.getFileSystem(hadoopConfig, defaultFs);
- } catch (Exception e) {
- throw new IOException(e);
+ if (FileSystemUtil.isOpenKerberos(hadoopConfig)) {
+ UserGroupInformation ugi = FileSystemUtil.getUGI(hadoopConfig, defaultFs);
+ LOG.info("user:{}, ", ugi.getShortUserName());
+ return ugi.doAs(new PrivilegedAction() {
+ @Override
+ public InputSplit[] run() {
+ try {
+ return createTextSplit(minNumSplits);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ });
+ } else {
+ return createTextSplit(minNumSplits);
}
+ }
+ private InputSplit[] createTextSplit(int minNumSplits) throws IOException{
JobConf jobConf = buildConfig();
org.apache.hadoop.mapred.FileInputFormat.setInputPathFilter(jobConf, HdfsPathFilter.class);
@@ -88,8 +101,30 @@ public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOExcepti
@Override
public void openInternal(InputSplit inputSplit) throws IOException {
+
+ if(openKerberos){
+ ugi.doAs(new PrivilegedAction() {
+ @Override
+ public Object run() {
+ try {
+ openHdfsTextReader(inputSplit);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ return null;
+ }
+ });
+ }else{
+ openHdfsTextReader(inputSplit);
+ }
+
+ }
+
+ private void openHdfsTextReader(InputSplit inputSplit) throws IOException{
HdfsTextInputSplit hdfsTextInputSplit = (HdfsTextInputSplit) inputSplit;
org.apache.hadoop.mapred.InputSplit fileSplit = hdfsTextInputSplit.getTextSplit();
+ findCurrentPartition(((FileSplit) fileSplit).getPath());
recordReader = inputFormat.getRecordReader(fileSplit, conf, Reporter.NULL);
key = new LongWritable();
value = new Text();
@@ -98,7 +133,7 @@ public void openInternal(InputSplit inputSplit) throws IOException {
@Override
public Row nextRecordInternal(Row row) throws IOException {
String line = new String(((Text)value).getBytes(), 0, ((Text)value).getLength(), charsetName);
- String[] fields = line.split(delimiter);
+ String[] fields = StringUtils.splitPreserveAllTokens(line, delimiter);
if (metaColumns.size() == 1 && ConstantValue.STAR_SYMBOL.equals(metaColumns.get(0).getName())){
row = new Row(fields.length);
@@ -121,7 +156,7 @@ public Row nextRecordInternal(Row row) throws IOException {
}
if(value != null){
- value = HdfsUtil.string2col(String.valueOf(value),metaColumn.getType(),metaColumn.getTimeFormat());
+ value = StringUtil.string2col(String.valueOf(value), metaColumn.getType(),metaColumn.getTimeFormat());
}
row.setField(i, value);
@@ -135,69 +170,7 @@ public Row nextRecordInternal(Row row) throws IOException {
public boolean reachedEnd() throws IOException {
key = new LongWritable();
value = new Text();
- return isFileEmpty || !recordReader.next(key, value);
- }
-
-
- public static class HdfsTextInputFormatBuilder {
-
- private HdfsTextInputFormat format;
-
- private HdfsTextInputFormatBuilder() {
- format = new HdfsTextInputFormat();
- }
-
- public HdfsTextInputFormatBuilder setHadoopConfig(Map hadoopConfig) {
- format.hadoopConfig = hadoopConfig;
- return this;
- }
-
- public HdfsTextInputFormatBuilder setInputPaths(String inputPaths) {
- format.inputPath = inputPaths;
- return this;
- }
-
- public HdfsTextInputFormatBuilder setBytes(long bytes) {
- format.bytes = bytes;
- return this;
- }
-
- public HdfsTextInputFormatBuilder setMonitorUrls(String monitorUrls) {
- format.monitorUrls = monitorUrls;
- return this;
- }
-
- public HdfsTextInputFormatBuilder setDelimiter(String delimiter) {
- if(delimiter == null) {
- delimiter = "\\001";
- }
- format.delimiter = delimiter;
- return this;
- }
-
- public HdfsTextInputFormatBuilder setDefaultFs(String defaultFs) {
- format.defaultFs = defaultFs;
- return this;
- }
-
- public HdfsTextInputFormatBuilder setcharsetName (String charsetName) {
- if(StringUtil.isNotEmpty(charsetName)) {
- if(!Charset.isSupported(charsetName)) {
- throw new UnsupportedCharsetException("The charset " + charsetName + " is not supported.");
- }
- this.format.charsetName = charsetName;
- }
-
- return this;
- }
-
- public HdfsTextInputFormat finish() {
- return format;
- }
- }
-
- public static HdfsTextInputFormatBuilder buildHdfsTextInputFormat() {
- return new HdfsTextInputFormatBuilder();
+ return !recordReader.next(key, value);
}
static class HdfsTextInputSplit implements InputSplit {
@@ -229,5 +202,4 @@ public int getSplitNumber() {
return splitNumber;
}
}
-
}
\ No newline at end of file
diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
new file mode 100644
index 0000000000..a01ea427df
--- /dev/null
+++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -0,0 +1,1221 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.ValidReadTxnList;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
+import org.apache.hadoop.hive.ql.io.*;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.StringUtils;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+/**
+ * A MapReduce/Hive input format for ORC files.
+ *
+ * This class implements both the classic InputFormat, which stores the rows
+ * directly, and AcidInputFormat, which stores a series of events with the
+ * following schema:
+ *
+ * class AcidEvent<ROW> {
+ * enum ACTION {INSERT, UPDATE, DELETE}
+ * ACTION operation;
+ * long originalTransaction;
+ * int bucket;
+ * long rowId;
+ * long currentTransaction;
+ * ROW row;
+ * }
+ *
+ * Each AcidEvent object corresponds to an update event. The
+ * originalTransaction, bucket, and rowId are the unique identifier for the row.
+ * The operation and currentTransaction are the operation and the transaction
+ * that added this event. Insert and update events include the entire row, while
+ * delete events have null for row.
+ */
+public class OrcInputFormat implements InputFormat,
+ InputFormatChecker, VectorizedInputFormatInterface,
+ AcidInputFormat,
+ CombineHiveInputFormat.AvoidSplitCombination {
+
+ static final HadoopShims SHIMS = ShimLoader.getHadoopShims();
+ static final String MIN_SPLIT_SIZE =
+ SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE");
+ static final String MAX_SPLIT_SIZE =
+ SHIMS.getHadoopConfNames().get("MAPREDMAXSPLITSIZE");
+ static final String SARG_PUSHDOWN = "sarg.pushdown";
+ private static final Log LOG = LogFactory.getLog(OrcInputFormat.class);
+ private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
+ private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
+
+ private static final PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ private static final String CLASS_NAME = ReaderImpl.class.getName();
+
+ /**
+ * When picking the hosts for a split that crosses block boundaries,
+ * any drop any host that has fewer than MIN_INCLUDED_LOCATION of the
+ * number of bytes available on the host with the most.
+ * If host1 has 10MB of the split, host2 has 20MB, and host3 has 18MB the
+ * split will contain host2 (100% of host2) and host3 (90% of host2). Host1
+ * with 50% will be dropped.
+ */
+ private static final double MIN_INCLUDED_LOCATION = 0.80;
+
+ /**
+ * Get the root column for the row. In ACID format files, it is offset by
+ * the extra metadata columns.
+ * @param isOriginal is the file in the original format?
+ * @return the column number for the root of row.
+ */
+ private static int getRootColumn(boolean isOriginal) {
+ return isOriginal ? 0 : (OrcRecordUpdater.ROW + 1);
+ }
+
+ public static RecordReader createReaderFromFile(Reader file,
+ Configuration conf,
+ long offset, long length
+ ) throws IOException {
+ Reader.Options options = new Reader.Options().range(offset, length);
+ boolean isOriginal =
+ !file.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME);
+ List types = file.getTypes();
+ setIncludedColumns(options, types, conf, isOriginal);
+ setSearchArgument(options, types, conf, isOriginal);
+ return file.rowsOptions(options);
+ }
+
+ /**
+ * Recurse down into a type subtree turning on all of the sub-columns.
+ * @param types the types of the file
+ * @param result the global view of columns that should be included
+ * @param typeId the root of tree to enable
+ * @param rootColumn the top column
+ */
+ private static void includeColumnRecursive(List types,
+ boolean[] result,
+ int typeId,
+ int rootColumn) {
+ result[typeId - rootColumn] = true;
+ OrcProto.Type type = types.get(typeId);
+ int children = type.getSubtypesCount();
+ for(int i=0; i < children; ++i) {
+ includeColumnRecursive(types, result, type.getSubtypes(i), rootColumn);
+ }
+ }
+
+ /**
+ * Take the configuration and figure out which columns we need to include.
+ * @param options the options to update
+ * @param types the types for the file
+ * @param conf the configuration
+ * @param isOriginal is the file in the original format?
+ */
+ static void setIncludedColumns(Reader.Options options,
+ List types,
+ Configuration conf,
+ boolean isOriginal) {
+ int rootColumn = getRootColumn(isOriginal);
+ if (!ColumnProjectionUtils.isReadAllColumns(conf)) {
+ int numColumns = types.size() - rootColumn;
+ boolean[] result = new boolean[numColumns];
+ result[0] = true;
+ OrcProto.Type root = types.get(rootColumn);
+ List included = ColumnProjectionUtils.getReadColumnIDs(conf);
+ for(int i=0; i < root.getSubtypesCount(); ++i) {
+ if (included.contains(i)) {
+ includeColumnRecursive(types, result, root.getSubtypes(i),
+ rootColumn);
+ }
+ }
+ options.include(result);
+ } else {
+ options.include(null);
+ }
+ }
+
+ static void setSearchArgument(Reader.Options options,
+ List types,
+ Configuration conf,
+ boolean isOriginal) {
+ int rootColumn = getRootColumn(isOriginal);
+ String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+ String sargPushdown = conf.get(SARG_PUSHDOWN);
+ String columnNamesString =
+ conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+ if ((sargPushdown == null && serializedPushdown == null)
+ || columnNamesString == null) {
+ LOG.debug("No ORC pushdown predicate");
+ options.searchArgument(null, null);
+ } else {
+ SearchArgument sarg;
+ if (serializedPushdown != null) {
+ sarg = SearchArgumentFactory.create
+ (Utilities.deserializeExpression(serializedPushdown));
+ } else {
+ sarg = SearchArgumentFactory.create(sargPushdown);
+ }
+ LOG.info("ORC pushdown predicate: " + sarg);
+ String[] neededColumnNames = columnNamesString.split(",");
+ String[] columnNames = new String[types.size() - rootColumn];
+ boolean[] includedColumns = options.getInclude();
+ int i = 0;
+ for(int columnId: types.get(rootColumn).getSubtypesList()) {
+ if (includedColumns == null || includedColumns[columnId - rootColumn]) {
+ // this is guaranteed to be positive because types only have children
+ // ids greater than their own id.
+ columnNames[columnId - rootColumn] = neededColumnNames[i++];
+ }
+ }
+ options.searchArgument(sarg, columnNames);
+ }
+ }
+
+ /**
+ * Get the list of input {@link Path}s for the map-reduce job.
+ *
+ * @param conf The configuration of the job
+ * @return the list of input {@link Path}s for the map-reduce job.
+ */
+ static Path[] getInputPaths(Configuration conf) throws IOException {
+ String dirs = conf.get("mapred.input.dir");
+ if (dirs == null) {
+ throw new IOException("Configuration mapred.input.dir is not defined.");
+ }
+ String [] list = StringUtils.split(dirs);
+ Path[] result = new Path[list.length];
+ for (int i = 0; i < list.length; i++) {
+ result[i] = new Path(StringUtils.unEscapeString(list[i]));
+ }
+ return result;
+ }
+
+ static List generateSplitsInfo(Configuration conf)
+ throws IOException {
+ // use threads to resolve directories into splits
+ Context context = new Context(conf);
+ for(Path dir: getInputPaths(conf)) {
+ FileSystem fs = dir.getFileSystem(conf);
+ context.schedule(new FileGenerator(context, fs, dir));
+ }
+ context.waitForTasks();
+ // deal with exceptions
+ if (!context.errors.isEmpty()) {
+ List errors =
+ new ArrayList(context.errors.size());
+ for(Throwable th: context.errors) {
+ if (th instanceof IOException) {
+ errors.add((IOException) th);
+ } else {
+ throw new RuntimeException("serious problem", th);
+ }
+ }
+ throw new InvalidInputException(errors);
+ }
+ if (context.cacheStripeDetails) {
+ LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/"
+ + context.numFilesCounter.get());
+ }
+ return context.splits;
+ }
+
+ static Path findOriginalBucket(FileSystem fs,
+ Path directory,
+ int bucket) throws IOException {
+ for(FileStatus stat: fs.listStatus(directory)) {
+ String name = stat.getPath().getName();
+ String numberPart = name.substring(0, name.indexOf('_'));
+ if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart) &&
+ Integer.parseInt(numberPart) == bucket) {
+ return stat.getPath();
+ }
+ }
+ throw new IllegalArgumentException("Can't find bucket " + bucket + " in " +
+ directory);
+ }
+
+ @Override
+ public boolean shouldSkipCombine(Path path,
+ Configuration conf) throws IOException {
+ return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf);
+ }
+
+ @Override
+ public boolean validateInput(FileSystem fs, HiveConf conf,
+ ArrayList files
+ ) throws IOException {
+
+ if (Utilities.isVectorMode(conf)) {
+ return new VectorizedOrcInputFormat().validateInput(fs, conf, files);
+ }
+
+ if (files.size() <= 0) {
+ return false;
+ }
+ for (FileStatus file : files) {
+ try {
+ OrcFile.createReader(file.getPath(),
+ OrcFile.readerOptions(conf).filesystem(fs));
+ } catch (IOException e) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public InputSplit[] getSplits(JobConf job,
+ int numSplits) throws IOException {
+ perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
+ List result = generateSplitsInfo(job);
+ perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
+ return result.toArray(new InputSplit[result.size()]);
+ }
+
+ @SuppressWarnings("unchecked")
+ private org.apache.hadoop.mapred.RecordReader
+ createVectorizedReader(InputSplit split, JobConf conf, Reporter reporter
+ ) throws IOException {
+ return (org.apache.hadoop.mapred.RecordReader)
+ new VectorizedOrcInputFormat().getRecordReader(split, conf, reporter);
+ }
+
+ @Override
+ public org.apache.hadoop.mapred.RecordReader
+ getRecordReader(InputSplit inputSplit, JobConf conf,
+ Reporter reporter) throws IOException {
+ boolean vectorMode = Utilities.isVectorMode(conf);
+
+ // if HiveCombineInputFormat gives us FileSplits instead of OrcSplits,
+ // we know it is not ACID. (see a check in CombineHiveInputFormat.getSplits() that assures this)
+ if (inputSplit.getClass() == FileSplit.class) {
+ if (vectorMode) {
+ return createVectorizedReader(inputSplit, conf, reporter);
+ }
+ return new OrcRecordReader(OrcFile.createReader(
+ ((FileSplit) inputSplit).getPath(),
+ OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit);
+ }
+
+ OrcSplit split = (OrcSplit) inputSplit;
+ reporter.setStatus(inputSplit.toString());
+
+ Options options = new Options(conf).reporter(reporter);
+ final RowReader inner = getReader(inputSplit, options);
+
+
+ /*Even though there are no delta files, we still need to produce row ids so that an
+ * UPDATE or DELETE statement would work on a table which didn't have any previous updates*/
+ if (split.isOriginal() && split.getDeltas().isEmpty()) {
+ if (vectorMode) {
+ return createVectorizedReader(inputSplit, conf, reporter);
+ } else {
+ return new NullKeyRecordReader(inner, conf);
+ }
+ }
+
+ if (vectorMode) {
+ return (org.apache.hadoop.mapred.RecordReader)
+ new VectorizedOrcAcidRowReader(inner, conf, (FileSplit) inputSplit);
+ }
+ return new NullKeyRecordReader(inner, conf);
+ }
+
+ @Override
+ public RowReader getReader(InputSplit inputSplit,
+ Options options) throws IOException {
+ final OrcSplit split = (OrcSplit) inputSplit;
+ final Path path = split.getPath();
+ Path root;
+ if (split.hasBase()) {
+ if (split.isOriginal()) {
+ root = path.getParent();
+ } else {
+ root = path.getParent().getParent();
+ }
+ } else {
+ root = path;
+ }
+ final Path[] deltas = AcidUtils.deserializeDeltas(root, split.getDeltas());
+ final Configuration conf = options.getConfiguration();
+ final Reader reader;
+ final int bucket;
+ Reader.Options readOptions = new Reader.Options();
+ readOptions.range(split.getStart(), split.getLength());
+ if (split.hasBase()) {
+ bucket = AcidUtils.parseBaseBucketFilename(split.getPath(), conf)
+ .getBucket();
+ reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+ final List types = reader.getTypes();
+ setIncludedColumns(readOptions, types, conf, split.isOriginal());
+ setSearchArgument(readOptions, types, conf, split.isOriginal());
+ } else {
+ bucket = (int) split.getStart();
+ reader = null;
+ }
+ String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY,
+ Long.MAX_VALUE + ":");
+ ValidTxnList validTxnList = new ValidReadTxnList(txnString);
+ final OrcRawRecordMerger records =
+ new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket,
+ validTxnList, readOptions, deltas);
+ return new RowReader() {
+ OrcStruct innerRecord = records.createValue();
+
+ @Override
+ public ObjectInspector getObjectInspector() {
+ return ((StructObjectInspector) records.getObjectInspector())
+ .getAllStructFieldRefs().get(OrcRecordUpdater.ROW)
+ .getFieldObjectInspector();
+ }
+
+ @Override
+ public boolean next(RecordIdentifier recordIdentifier,
+ OrcStruct orcStruct) throws IOException {
+ boolean result;
+ // filter out the deleted records
+ do {
+ result = records.next(recordIdentifier, innerRecord);
+ } while (result &&
+ OrcRecordUpdater.getOperation(innerRecord) ==
+ OrcRecordUpdater.DELETE_OPERATION);
+ if (result) {
+ // swap the fields with the passed in orcStruct
+ orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord));
+ }
+ return result;
+ }
+
+ @Override
+ public RecordIdentifier createKey() {
+ return records.createKey();
+ }
+
+ @Override
+ public OrcStruct createValue() {
+ return new OrcStruct(records.getColumns());
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return records.getPos();
+ }
+
+ @Override
+ public void close() throws IOException {
+ records.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ return records.getProgress();
+ }
+ };
+ }
+
+ @Override
+ public RawReader getRawReader(Configuration conf,
+ boolean collapseEvents,
+ int bucket,
+ ValidTxnList validTxnList,
+ Path baseDirectory,
+ Path[] deltaDirectory
+ ) throws IOException {
+ Reader reader = null;
+ boolean isOriginal = false;
+ if (baseDirectory != null) {
+ Path bucketFile;
+ if (baseDirectory.getName().startsWith(AcidUtils.BASE_PREFIX)) {
+ bucketFile = AcidUtils.createBucketFile(baseDirectory, bucket);
+ } else {
+ isOriginal = true;
+ bucketFile = findOriginalBucket(baseDirectory.getFileSystem(conf),
+ baseDirectory, bucket);
+ }
+ reader = OrcFile.createReader(bucketFile, OrcFile.readerOptions(conf));
+ }
+ return new OrcRawRecordMerger(conf, collapseEvents, reader, isOriginal,
+ bucket, validTxnList, new Reader.Options(), deltaDirectory);
+ }
+
+ private static class OrcRecordReader
+ implements org.apache.hadoop.mapred.RecordReader,
+ StatsProvidingRecordReader {
+ private final RecordReader reader;
+ private final long offset;
+ private final long length;
+ private final int numColumns;
+ private final Reader file;
+ private final SerDeStats stats;
+ private float progress = 0.0f;
+
+
+ OrcRecordReader(Reader file, Configuration conf,
+ FileSplit split) throws IOException {
+ List types = file.getTypes();
+ this.file = file;
+ numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount();
+ this.offset = split.getStart();
+ this.length = split.getLength();
+ this.reader = createReaderFromFile(file, conf, offset, length);
+ this.stats = new SerDeStats();
+ }
+
+ @Override
+ public boolean next(NullWritable key, OrcStruct value) throws IOException {
+ if (reader.hasNext()) {
+ reader.next(value);
+ progress = reader.getProgress();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public NullWritable createKey() {
+ return NullWritable.get();
+ }
+
+ @Override
+ public OrcStruct createValue() {
+ return new OrcStruct(numColumns);
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return offset + (long) (progress * length);
+ }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ return progress;
+ }
+
+ @Override
+ public SerDeStats getStats() {
+ stats.setRawDataSize(file.getRawDataSize());
+ stats.setRowCount(file.getNumberOfRows());
+ return stats;
+ }
+ }
+
+ /**
+ * The global information about the split generation that we pass around to
+ * the different worker threads.
+ */
+ static class Context {
+ private static Cache footerCache;
+ private final Configuration conf;
+ private final ExecutorService threadPool;
+ private final List splits =
+ new ArrayList(10000);
+ private final int numBuckets;
+ private final List errors = new ArrayList();
+ private final long maxSize;
+ private final long minSize;
+ private final boolean footerInSplits;
+ private final boolean cacheStripeDetails;
+ private final AtomicInteger cacheHitCounter = new AtomicInteger(0);
+ private final AtomicInteger numFilesCounter = new AtomicInteger(0);
+ private Throwable fatalError = null;
+ private ValidTxnList transactionList;
+
+ /**
+ * A count of the number of threads that may create more work for the
+ * thread pool.
+ */
+ private int schedulers = 0;
+
+ Context(Configuration conf) {
+ this.conf = conf;
+ minSize = conf.getLong(MIN_SPLIT_SIZE, DEFAULT_MIN_SPLIT_SIZE);
+ maxSize = conf.getLong(MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE);
+ footerInSplits = HiveConf.getBoolVar(conf,
+ ConfVars.HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS);
+ numBuckets =
+ Math.max(conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0), 0);
+ LOG.debug("Number of buckets specified by conf file is " + numBuckets);
+ int cacheStripeDetailsSize = HiveConf.getIntVar(conf,
+ ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE);
+ int numThreads = HiveConf.getIntVar(conf,
+ ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS);
+
+ cacheStripeDetails = (cacheStripeDetailsSize > 0);
+
+ threadPool = Executors.newFixedThreadPool(numThreads,
+ new ThreadFactoryBuilder().setDaemon(true)
+ .setNameFormat("ORC_GET_SPLITS #%d").build());
+
+ synchronized (Context.class) {
+ if (footerCache == null && cacheStripeDetails) {
+ footerCache = CacheBuilder.newBuilder().concurrencyLevel(numThreads)
+ .initialCapacity(cacheStripeDetailsSize).softValues().build();
+ }
+ }
+ String value = conf.get(ValidTxnList.VALID_TXNS_KEY,
+ Long.MAX_VALUE + ":");
+ transactionList = new ValidReadTxnList(value);
+ }
+
+ int getSchedulers() {
+ return schedulers;
+ }
+
+ /**
+ * Get the Nth split.
+ * @param index if index >= 0, count from the front, otherwise count from
+ * the back.
+ * @return the Nth file split
+ */
+ OrcSplit getResult(int index) {
+ if (index >= 0) {
+ return splits.get(index);
+ } else {
+ return splits.get(splits.size() + index);
+ }
+ }
+
+ List getErrors() {
+ return errors;
+ }
+
+ /**
+ * Add a unit of work.
+ * @param runnable the object to run
+ */
+ synchronized void schedule(Runnable runnable) {
+ if (fatalError == null) {
+ if (runnable instanceof FileGenerator ||
+ runnable instanceof SplitGenerator) {
+ schedulers += 1;
+ }
+ threadPool.execute(runnable);
+ } else {
+ throw new RuntimeException("serious problem", fatalError);
+ }
+ }
+
+ /**
+ * Mark a worker that may generate more work as done.
+ */
+ synchronized void decrementSchedulers() {
+ schedulers -= 1;
+ if (schedulers == 0) {
+ notify();
+ }
+ }
+
+ synchronized void notifyOnNonIOException(Throwable th) {
+ fatalError = th;
+ notify();
+ }
+
+ /**
+ * Wait until all of the tasks are done. It waits until all of the
+ * threads that may create more work are done and then shuts down the
+ * thread pool and waits for the final threads to finish.
+ */
+ synchronized void waitForTasks() {
+ try {
+ while (schedulers != 0) {
+ wait();
+ if (fatalError != null) {
+ threadPool.shutdownNow();
+ throw new RuntimeException("serious problem", fatalError);
+ }
+ }
+ threadPool.shutdown();
+ threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
+ } catch (InterruptedException ie) {
+ throw new IllegalStateException("interrupted", ie);
+ }
+ }
+ }
+
+ /**
+ * Given a directory, get the list of files and blocks in those files.
+ * A thread is used for each directory.
+ */
+ static final class FileGenerator implements Runnable {
+ private final Context context;
+ private final FileSystem fs;
+ private final Path dir;
+
+ FileGenerator(Context context, FileSystem fs, Path dir) {
+ this.context = context;
+ this.fs = fs;
+ this.dir = dir;
+ }
+
+ private void scheduleSplits(FileStatus file,
+ boolean isOriginal,
+ boolean hasBase,
+ List deltas) throws IOException{
+ FileInfo info = null;
+ if (context.cacheStripeDetails) {
+ info = verifyCachedFileInfo(file);
+ }
+ new SplitGenerator(context, fs, file, info, isOriginal, deltas,
+ hasBase).schedule();
+ }
+
+ /**
+ * For each path, get the list of files and blocks that they consist of.
+ */
+ @Override
+ public void run() {
+ try {
+ AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir,
+ context.conf, context.transactionList);
+ List deltas =
+ AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
+ Path base = dirInfo.getBaseDirectory();
+ List original = dirInfo.getOriginalFiles();
+
+ boolean[] covered = new boolean[context.numBuckets];
+ boolean isOriginal = base == null;
+
+ // if we have a base to work from
+ if (base != null || !original.isEmpty()) {
+
+ // find the base files (original or new style)
+ List children = original;
+ if (base != null) {
+ children = SHIMS.listLocatedStatus(fs, base,
+ AcidUtils.hiddenFileFilter);
+ }
+
+ // for each child, schedule splits and mark off the bucket
+ for(FileStatus child: children) {
+ //update by tudou on 20200529, redmine = http://redmine.prod.dtstack.cn/issues/26286
+ // make sure the file length on HDFS > 0(is not empty), otherwise an IndexOutOfBoundsException will be thrown in org.apache.hadoop.hive.ql.io.orc.ReaderImpl.extractMetaInfoFromFooter:362
+ if(child.getLen() > 0){
+ AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename
+ (child.getPath(), context.conf);
+ scheduleSplits(child, isOriginal, true, deltas);
+ int b = opts.getBucket();
+ // If the bucket is in the valid range, mark it as covered.
+ // I wish Hive actually enforced bucketing all of the time.
+ if (b >= 0 && b < covered.length) {
+ covered[b] = true;
+ }
+ }
+ }
+ }
+
+ // Generate a split for any buckets that weren't covered.
+ // This happens in the case where a bucket just has deltas and no
+ // base.
+ if (!deltas.isEmpty()) {
+ for (int b = 0; b < context.numBuckets; ++b) {
+ if (!covered[b]) {
+ synchronized (context.splits) {
+ context.splits.add(new OrcSplit(dir, b, 0, new String[0], null,
+ false, false, deltas));
+ }
+ }
+ }
+ }
+ } catch (Throwable th) {
+ if (!(th instanceof IOException)) {
+ LOG.error("Unexpected Exception", th);
+ }
+ synchronized (context.errors) {
+ context.errors.add(th);
+ }
+ if (!(th instanceof IOException)) {
+ context.notifyOnNonIOException(th);
+ }
+ } finally {
+ context.decrementSchedulers();
+ }
+ }
+
+ private FileInfo verifyCachedFileInfo(FileStatus file) {
+ context.numFilesCounter.incrementAndGet();
+ FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath());
+ if (fileInfo != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Info cached for path: " + file.getPath());
+ }
+ if (fileInfo.modificationTime == file.getModificationTime() &&
+ fileInfo.size == file.getLen()) {
+ // Cached copy is valid
+ context.cacheHitCounter.incrementAndGet();
+ return fileInfo;
+ } else {
+ // Invalidate
+ Context.footerCache.invalidate(file.getPath());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Meta-Info for : " + file.getPath() +
+ " changed. CachedModificationTime: "
+ + fileInfo.modificationTime + ", CurrentModificationTime: "
+ + file.getModificationTime()
+ + ", CachedLength: " + fileInfo.size + ", CurrentLength: " +
+ file.getLen());
+ }
+ }
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Info not cached for path: " + file.getPath());
+ }
+ }
+ return null;
+ }
+ }
+
+ /**
+ * Split the stripes of a given file into input splits.
+ * A thread is used for each file.
+ */
+ static final class SplitGenerator implements Runnable {
+ private final Context context;
+ private final FileSystem fs;
+ private final FileStatus file;
+ private final long blockSize;
+ private final TreeMap locations;
+ private final FileInfo fileInfo;
+ private final boolean isOriginal;
+ private final List deltas;
+ private final boolean hasBase;
+ private List stripes;
+ private ReaderImpl.FileMetaInfo fileMetaInfo;
+ private Metadata metadata;
+ private List types;
+ private OrcFile.WriterVersion writerVersion;
+
+ SplitGenerator(Context context, FileSystem fs,
+ FileStatus file, FileInfo fileInfo,
+ boolean isOriginal,
+ List deltas,
+ boolean hasBase) throws IOException {
+ this.context = context;
+ this.fs = fs;
+ this.file = file;
+ this.blockSize = file.getBlockSize();
+ this.fileInfo = fileInfo;
+ locations = SHIMS.getLocationsWithOffset(fs, file);
+ this.isOriginal = isOriginal;
+ this.deltas = deltas;
+ this.hasBase = hasBase;
+ }
+
+ /**
+ * Compute the number of bytes that overlap between the two ranges.
+ * @param offset1 start of range1
+ * @param length1 length of range1
+ * @param offset2 start of range2
+ * @param length2 length of range2
+ * @return the number of bytes in the overlap range
+ */
+ static long getOverlap(long offset1, long length1,
+ long offset2, long length2) {
+ long end1 = offset1 + length1;
+ long end2 = offset2 + length2;
+ if (end2 <= offset1 || end1 <= offset2) {
+ return 0;
+ } else {
+ return Math.min(end1, end2) - Math.max(offset1, offset2);
+ }
+ }
+
+ Path getPath() {
+ return file.getPath();
+ }
+
+ void schedule() throws IOException {
+ if(locations.size() == 1 && file.getLen() < context.maxSize) {
+ String[] hosts = locations.firstEntry().getValue().getHosts();
+ synchronized (context.splits) {
+ context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(),
+ hosts, fileMetaInfo, isOriginal, hasBase, deltas));
+ }
+ } else {
+ // if it requires a compute task
+ context.schedule(this);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "splitter(" + file.getPath() + ")";
+ }
+
+ /**
+ * Create an input split over the given range of bytes. The location of the
+ * split is based on where the majority of the byte are coming from. ORC
+ * files are unlikely to have splits that cross between blocks because they
+ * are written with large block sizes.
+ * @param offset the start of the split
+ * @param length the length of the split
+ * @param fileMetaInfo file metadata from footer and postscript
+ * @throws IOException
+ */
+ void createSplit(long offset, long length,
+ ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException {
+ String[] hosts;
+ Map.Entry startEntry = locations.floorEntry(offset);
+ BlockLocation start = startEntry.getValue();
+ if (offset + length <= start.getOffset() + start.getLength()) {
+ // handle the single block case
+ hosts = start.getHosts();
+ } else {
+ Map.Entry endEntry = locations.floorEntry(offset + length);
+ BlockLocation end = endEntry.getValue();
+ //get the submap
+ NavigableMap navigableMap = locations.subMap(startEntry.getKey(),
+ true, endEntry.getKey(), true);
+ // Calculate the number of bytes in the split that are local to each
+ // host.
+ Map sizes = new HashMap();
+ long maxSize = 0;
+ for (BlockLocation block : navigableMap.values()) {
+ long overlap = getOverlap(offset, length, block.getOffset(),
+ block.getLength());
+ if (overlap > 0) {
+ for(String host: block.getHosts()) {
+ LongWritable val = sizes.get(host);
+ if (val == null) {
+ val = new LongWritable();
+ sizes.put(host, val);
+ }
+ val.set(val.get() + overlap);
+ maxSize = Math.max(maxSize, val.get());
+ }
+ } else {
+ throw new IOException("File " + file.getPath().toString() +
+ " should have had overlap on block starting at " + block.getOffset());
+ }
+ }
+ // filter the list of locations to those that have at least 80% of the
+ // max
+ long threshold = (long) (maxSize * MIN_INCLUDED_LOCATION);
+ List hostList = new ArrayList();
+ // build the locations in a predictable order to simplify testing
+ for(BlockLocation block: navigableMap.values()) {
+ for(String host: block.getHosts()) {
+ if (sizes.containsKey(host)) {
+ if (sizes.get(host).get() >= threshold) {
+ hostList.add(host);
+ }
+ sizes.remove(host);
+ }
+ }
+ }
+ hosts = new String[hostList.size()];
+ hostList.toArray(hosts);
+ }
+ synchronized (context.splits) {
+ context.splits.add(new OrcSplit(file.getPath(), offset, length,
+ hosts, fileMetaInfo, isOriginal, hasBase, deltas));
+ }
+ }
+
+ /**
+ * Divide the adjacent stripes in the file into input splits based on the
+ * block size and the configured minimum and maximum sizes.
+ */
+ @Override
+ public void run() {
+ try {
+ populateAndCacheStripeDetails();
+
+ // figure out which stripes we need to read
+ boolean[] includeStripe = null;
+ // we can't eliminate stripes if there are deltas because the
+ // deltas may change the rows making them match the predicate.
+ if (deltas.isEmpty()) {
+ Reader.Options options = new Reader.Options();
+ setIncludedColumns(options, types, context.conf, isOriginal);
+ setSearchArgument(options, types, context.conf, isOriginal);
+ // only do split pruning if HIVE-8732 has been fixed in the writer
+ if (options.getSearchArgument() != null &&
+ writerVersion != OrcFile.WriterVersion.ORIGINAL) {
+ SearchArgument sarg = options.getSearchArgument();
+ List sargLeaves = sarg.getLeaves();
+ List stripeStats = metadata.getStripeStatistics();
+ int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves,
+ options.getColumnNames(), getRootColumn(isOriginal));
+
+ if (stripeStats != null) {
+ // eliminate stripes that doesn't satisfy the predicate condition
+ includeStripe = new boolean[stripes.size()];
+ for(int i=0; i < stripes.size(); ++i) {
+ includeStripe[i] = (i >= stripeStats.size()) ||
+ isStripeSatisfyPredicate(stripeStats.get(i), sarg,
+ filterColumns);
+ if (LOG.isDebugEnabled() && !includeStripe[i]) {
+ LOG.debug("Eliminating ORC stripe-" + i + " of file '" +
+ file.getPath() + "' as it did not satisfy " +
+ "predicate condition.");
+ }
+ }
+ }
+ }
+ }
+
+ // if we didn't have predicate pushdown, read everything
+ if (includeStripe == null) {
+ includeStripe = new boolean[stripes.size()];
+ Arrays.fill(includeStripe, true);
+ }
+
+ long currentOffset = -1;
+ long currentLength = 0;
+ int idx = -1;
+ for(StripeInformation stripe: stripes) {
+ idx++;
+
+ if (!includeStripe[idx]) {
+ // create split for the previous unfinished stripe
+ if (currentOffset != -1) {
+ createSplit(currentOffset, currentLength, fileMetaInfo);
+ currentOffset = -1;
+ }
+ continue;
+ }
+
+ // if we are working on a stripe, over the min stripe size, and
+ // crossed a block boundary, cut the input split here.
+ if (currentOffset != -1 && currentLength > context.minSize &&
+ (currentOffset / blockSize != stripe.getOffset() / blockSize)) {
+ createSplit(currentOffset, currentLength, fileMetaInfo);
+ currentOffset = -1;
+ }
+ // if we aren't building a split, start a new one.
+ if (currentOffset == -1) {
+ currentOffset = stripe.getOffset();
+ currentLength = stripe.getLength();
+ } else {
+ currentLength =
+ (stripe.getOffset() + stripe.getLength()) - currentOffset;
+ }
+ if (currentLength >= context.maxSize) {
+ createSplit(currentOffset, currentLength, fileMetaInfo);
+ currentOffset = -1;
+ }
+ }
+ if (currentOffset != -1) {
+ createSplit(currentOffset, currentLength, fileMetaInfo);
+ }
+ } catch (Throwable th) {
+ if (!(th instanceof IOException)) {
+ LOG.error("Unexpected Exception", th);
+ }
+ synchronized (context.errors) {
+ context.errors.add(th);
+ }
+ if (!(th instanceof IOException)) {
+ context.notifyOnNonIOException(th);
+ }
+ } finally {
+ context.decrementSchedulers();
+ }
+ }
+
+ private void populateAndCacheStripeDetails() {
+ try {
+ Reader orcReader;
+ if (fileInfo != null) {
+ stripes = fileInfo.stripeInfos;
+ fileMetaInfo = fileInfo.fileMetaInfo;
+ metadata = fileInfo.metadata;
+ types = fileInfo.types;
+ writerVersion = fileInfo.writerVersion;
+ // For multiple runs, in case sendSplitsInFooter changes
+ if (fileMetaInfo == null && context.footerInSplits) {
+ orcReader = OrcFile.createReader(file.getPath(),
+ OrcFile.readerOptions(context.conf).filesystem(fs));
+ fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo();
+ fileInfo.metadata = orcReader.getMetadata();
+ fileInfo.types = orcReader.getTypes();
+ fileInfo.writerVersion = orcReader.getWriterVersion();
+ }
+ } else {
+ orcReader = OrcFile.createReader(file.getPath(),
+ OrcFile.readerOptions(context.conf).filesystem(fs));
+ stripes = orcReader.getStripes();
+ metadata = orcReader.getMetadata();
+ types = orcReader.getTypes();
+ writerVersion = orcReader.getWriterVersion();
+ fileMetaInfo = context.footerInSplits ?
+ ((ReaderImpl) orcReader).getFileMetaInfo() : null;
+ if (context.cacheStripeDetails) {
+ // Populate into cache.
+ Context.footerCache.put(file.getPath(),
+ new FileInfo(file.getModificationTime(), file.getLen(), stripes,
+ metadata, types, fileMetaInfo, writerVersion));
+ }
+ }
+ } catch (Throwable th) {
+ if (!(th instanceof IOException)) {
+ LOG.error("Unexpected Exception", th);
+ }
+ synchronized (context.errors) {
+ context.errors.add(th);
+ }
+ if (!(th instanceof IOException)) {
+ context.notifyOnNonIOException(th);
+ }
+ }
+ }
+
+ private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
+ SearchArgument sarg,
+ int[] filterColumns) {
+ List predLeaves = sarg.getLeaves();
+ TruthValue[] truthValues = new TruthValue[predLeaves.size()];
+ for (int pred = 0; pred < truthValues.length; pred++) {
+ if (filterColumns[pred] != -1) {
+
+ // column statistics at index 0 contains only the number of rows
+ ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
+ truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred));
+ } else {
+
+ // parition column case.
+ // partition filter will be evaluated by partition pruner so
+ // we will not evaluate partition filter here.
+ truthValues[pred] = TruthValue.YES_NO_NULL;
+ }
+ }
+ return sarg.evaluate(truthValues).isNeeded();
+ }
+ }
+
+ /**
+ * FileInfo.
+ *
+ * Stores information relevant to split generation for an ORC File.
+ *
+ */
+ private static class FileInfo {
+ long modificationTime;
+ long size;
+ List stripeInfos;
+ ReaderImpl.FileMetaInfo fileMetaInfo;
+ Metadata metadata;
+ List types;
+ private OrcFile.WriterVersion writerVersion;
+
+
+ FileInfo(long modificationTime, long size,
+ List stripeInfos,
+ Metadata metadata, List types,
+ ReaderImpl.FileMetaInfo fileMetaInfo,
+ OrcFile.WriterVersion writerVersion) {
+ this.modificationTime = modificationTime;
+ this.size = size;
+ this.stripeInfos = stripeInfos;
+ this.fileMetaInfo = fileMetaInfo;
+ this.metadata = metadata;
+ this.types = types;
+ this.writerVersion = writerVersion;
+ }
+ }
+
+ /**
+ * Return a RecordReader that is compatible with the Hive 0.12 reader
+ * with NullWritable for the key instead of RecordIdentifier.
+ */
+ public static final class NullKeyRecordReader implements AcidRecordReader {
+ private final RecordIdentifier id;
+ private final RowReader inner;
+
+ private NullKeyRecordReader(RowReader inner, Configuration conf) {
+ this.inner = inner;
+ id = inner.createKey();
+ }
+
+ public RecordIdentifier getRecordIdentifier() {
+ return id;
+ }
+
+ @Override
+ public boolean next(NullWritable nullWritable,
+ OrcStruct orcStruct) throws IOException {
+ return inner.next(id, orcStruct);
+ }
+
+ @Override
+ public NullWritable createKey() {
+ return NullWritable.get();
+ }
+
+ @Override
+ public OrcStruct createValue() {
+ return inner.createValue();
+ }
+
+ @Override
+ public long getPos() throws IOException {
+ return inner.getPos();
+ }
+
+ @Override
+ public void close() throws IOException {
+ inner.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException {
+ return inner.getProgress();
+ }
+ }
+
+
+}
diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/test/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormatTest.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/test/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormatTest.java
new file mode 100644
index 0000000000..6df5ceb002
--- /dev/null
+++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/test/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormatTest.java
@@ -0,0 +1,22 @@
+package com.dtstack.flinkx.hdfs.reader;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * @author jiangbo
+ * @date 2020/3/16
+ */
+public class HdfsOrcInputFormatTest {
+
+ @Test
+ public void testParseColumnAndType() {
+ HdfsOrcInputFormat hdfsOrcInputFormat = new HdfsOrcInputFormat();
+
+ String struct = "int,float(10,2),char(12)";
+ List result = hdfsOrcInputFormat.parseColumnAndType(struct);
+ Assert.assertEquals(result.size(), 3);
+ }
+}
\ No newline at end of file
diff --git a/flinkx-hdfs/flinkx-hdfs-writer/pom.xml b/flinkx-hdfs/flinkx-hdfs-writer/pom.xml
index 89f059b99d..ff3e37a7d6 100644
--- a/flinkx-hdfs/flinkx-hdfs-writer/pom.xml
+++ b/flinkx-hdfs/flinkx-hdfs-writer/pom.xml
@@ -98,6 +98,16 @@ under the License.
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java
index 38ff34f667..f611487af5 100644
--- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java
+++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java
@@ -43,6 +43,8 @@
*/
public abstract class BaseHdfsOutputFormat extends BaseFileOutputFormat {
+ private static final int FILE_NAME_PART_SIZE = 3;
+
protected int rowGroupSize;
protected FileSystem fs;
@@ -66,14 +68,29 @@ public abstract class BaseHdfsOutputFormat extends BaseFileOutputFormat {
protected Configuration conf;
+ protected boolean enableDictionary;
+
protected transient Map decimalColInfo;
@Override
protected void openInternal(int taskNumber, int numTasks) throws IOException {
+ // 这里休眠一段时间是为了避免reader和writer或者多个任务在同一个taskmanager里同时认证kerberos
+ if (FileSystemUtil.isOpenKerberos(hadoopConfig)) {
+ sleepRandomTime();
+ }
+
initColIndices();
super.openInternal(taskNumber, numTasks);
}
+ private void sleepRandomTime() {
+ try {
+ Thread.sleep(5000L + (long)(10000 * Math.random()));
+ } catch (Exception exception) {
+ LOG.warn("", exception);
+ }
+ }
+
@Override
protected void checkOutputDir() {
try{
@@ -120,7 +137,7 @@ protected void waitForActionFinishedBeforeWrite() {
n++;
}
} catch (Exception e){
-
+ LOG.warn("Call method waitForActionFinishedBeforeWrite error", e);
}
}
@@ -142,7 +159,7 @@ public boolean accept(Path path) {
}
String[] splits = fileName.split("\\.");
- if (splits.length == 3) {
+ if (splits.length == FILE_NAME_PART_SIZE) {
return Integer.parseInt(splits[2]) > fileIndex;
}
@@ -308,4 +325,8 @@ protected void moveAllTemporaryDataFileToDirectory() throws IOException {
}
}
+ @Override
+ protected void writeMultipleRecordsInternal() throws Exception {
+ notSupportBatchWrite("HdfsWriter");
+ }
}
diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java
index 90f84054d5..a112570a66 100644
--- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java
+++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java
@@ -34,7 +34,11 @@
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.compress.*;
+import org.apache.hadoop.io.compress.BZip2Codec;
+import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.io.compress.Lz4Codec;
+import org.apache.hadoop.io.compress.SnappyCodec;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
@@ -213,17 +217,7 @@ private void getData(List recordList, int index, Row row) throws WriteRe
recordList.add(Integer.valueOf(rowData));
break;
case BIGINT:
- if (column instanceof Timestamp){
- column=((Timestamp) column).getTime();
- recordList.add(column);
- break;
- }
- BigInteger data = new BigInteger(rowData);
- if (data.compareTo(new BigInteger(String.valueOf(Long.MAX_VALUE))) > 0){
- recordList.add(data);
- } else {
- recordList.add(Long.valueOf(rowData));
- }
+ recordList.add(getBigint(column, rowData));
break;
case FLOAT:
recordList.add(Float.valueOf(rowData));
@@ -232,16 +226,7 @@ private void getData(List recordList, int index, Row row) throws WriteRe
recordList.add(Double.valueOf(rowData));
break;
case DECIMAL:
- ColumnTypeUtil.DecimalInfo decimalInfo = decimalColInfo.get(fullColumnNames.get(index));
- HiveDecimal hiveDecimal = HiveDecimal.create(new BigDecimal(rowData));
- hiveDecimal = HiveDecimal.enforcePrecisionScale(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale());
- if(hiveDecimal == null){
- String msg = String.format("第[%s]个数据数据[%s]precision和scale和元数据不匹配:decimal(%s, %s)", index, decimalInfo.getPrecision(), decimalInfo.getScale(), rowData);
- throw new WriteRecordException(msg, new IllegalArgumentException());
- }
-
- HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(hiveDecimal);
- recordList.add(hiveDecimalWritable);
+ recordList.add(getDecimalWritable(index, rowData));
break;
case STRING:
case VARCHAR:
@@ -270,6 +255,32 @@ private void getData(List recordList, int index, Row row) throws WriteRe
}
}
+ private Object getBigint(Object column, String rowData) {
+ if (column instanceof Timestamp){
+ column = ((Timestamp) column).getTime();
+ return column;
+ }
+
+ BigInteger data = new BigInteger(rowData);
+ if (data.compareTo(new BigInteger(String.valueOf(Long.MAX_VALUE))) > 0){
+ return data;
+ } else {
+ return Long.valueOf(rowData);
+ }
+ }
+
+ private HiveDecimalWritable getDecimalWritable(int index, String rowData) throws WriteRecordException {
+ ColumnTypeUtil.DecimalInfo decimalInfo = decimalColInfo.get(fullColumnNames.get(index));
+ HiveDecimal hiveDecimal = HiveDecimal.create(new BigDecimal(rowData));
+ hiveDecimal = HiveDecimal.enforcePrecisionScale(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale());
+ if(hiveDecimal == null){
+ String msg = String.format("第[%s]个数据数据[%s]precision和scale和元数据不匹配:decimal(%s, %s)", index, decimalInfo.getPrecision(), decimalInfo.getScale(), rowData);
+ throw new WriteRecordException(msg, new IllegalArgumentException());
+ }
+
+ return new HiveDecimalWritable(hiveDecimal);
+ }
+
@Override
protected String recordConvertDetailErrorMessage(int pos, Row row) {
return "\nHdfsOrcOutputFormat [" + jobName + "] writeRecord error: when converting field[" + fullColumnNames.get(pos) + "] in Row(" + row + ")";
diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java
index ec30366e16..0d8b40b951 100644
--- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java
+++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java
@@ -83,6 +83,10 @@ public void setDefaultFs(String defaultFs) {
format.defaultFs = defaultFs;
}
+ public void setEnableDictionary(boolean enableDictionary) {
+ format.enableDictionary = enableDictionary;
+ }
+
@Override
protected void checkFormat() {
super.checkFormat();
diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java
index 7c4cd0e1fa..2dcb955c74 100644
--- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java
+++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java
@@ -21,13 +21,13 @@
import com.dtstack.flinkx.enums.ColumnType;
import com.dtstack.flinkx.exception.WriteRecordException;
import com.dtstack.flinkx.hdfs.ECompressType;
+import com.dtstack.flinkx.hdfs.HdfsUtil;
import com.dtstack.flinkx.util.ColumnTypeUtil;
import com.dtstack.flinkx.util.DateUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.types.Row;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.example.data.Group;
@@ -38,12 +38,16 @@
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.io.api.Binary;
-import org.apache.parquet.schema.*;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Types;
import java.io.IOException;
import java.math.BigDecimal;
import java.sql.Timestamp;
-import java.util.*;
+import java.util.Date;
+import java.util.HashMap;
/**
* The subclass of HdfsOutputFormat writing parquet files
@@ -59,22 +63,8 @@ public class HdfsParquetOutputFormat extends BaseHdfsOutputFormat {
private MessageType schema;
- private static Calendar cal = Calendar.getInstance();
-
- private static final long NANO_SECONDS_PER_DAY = 86400_000_000_000L;
-
- private static final long JULIAN_EPOCH_OFFSET_DAYS = 2440588;
-
private static ColumnTypeUtil.DecimalInfo PARQUET_DEFAULT_DECIMAL_INFO = new ColumnTypeUtil.DecimalInfo(10, 0);
- static {
- try {
- cal.setTime(DateUtil.getDateFormatter().parse("1970-01-01"));
- } catch (Exception e){
- throw new RuntimeException("Init calendar fail:",e);
- }
- }
-
@Override
protected void openSource() throws IOException{
super.openSource();
@@ -101,6 +91,7 @@ protected void nextBlock(){
.withCompressionCodec(getCompressType())
.withConf(conf)
.withType(schema)
+ .withDictionaryEncoding(enableDictionary)
.withRowGroupSize(rowGroupSize);
writer = builder.build();
@@ -162,7 +153,7 @@ public void writeSingleRecordToFile(Row row) throws WriteRecordException {
try {
for (; i < fullColumnNames.size(); i++) {
Object valObj = row.getField(colIndices[i]);
- if(valObj == null){
+ if(valObj == null || valObj.toString().length() == 0){
continue;
}
@@ -233,7 +224,7 @@ private void addDataToGroup(Group group, Object valObj, int i) throws Exception{
case "boolean" : group.add(colName,Boolean.parseBoolean(val));break;
case "timestamp" :
Timestamp ts = DateUtil.columnToTimestamp(valObj,null);
- byte[] dst = longToByteArray(ts.getTime());
+ byte[] dst = HdfsUtil.longToByteArray(ts.getTime());
group.add(colName, Binary.fromConstantByteArray(dst));
break;
case "decimal" :
@@ -246,7 +237,7 @@ private void addDataToGroup(Group group, Object valObj, int i) throws Exception{
throw new WriteRecordException(msg, new IllegalArgumentException());
}
- group.add(colName,decimalToBinary(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale()));
+ group.add(colName, HdfsUtil.decimalToBinary(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale()));
break;
case "date" :
Date date = DateUtil.columnToDate(valObj,null);
@@ -256,29 +247,6 @@ private void addDataToGroup(Group group, Object valObj, int i) throws Exception{
}
}
- private Binary decimalToBinary(final HiveDecimal hiveDecimal, int prec,int scale) {
- byte[] decimalBytes = hiveDecimal.setScale(scale).unscaledValue().toByteArray();
-
- // Estimated number of bytes needed.
- int precToBytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
- if (precToBytes == decimalBytes.length) {
- // No padding needed.
- return Binary.fromReusedByteArray(decimalBytes);
- }
-
- byte[] tgt = new byte[precToBytes];
- if (hiveDecimal.signum() == -1) {
- // For negative number, initializing bits to 1
- for (int i = 0; i < precToBytes; i++) {
- tgt[i] |= 0xFF;
- }
- }
-
- // Padding leading zeroes/ones.
- System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length);
- return Binary.fromReusedByteArray(tgt);
- }
-
@Override
protected String recordConvertDetailErrorMessage(int pos, Row row) {
return "\nHdfsParquetOutputFormat [" + jobName + "] writeRecord error: when converting field[" + fullColumnNames.get(pos) + "] in Row(" + row + ")";
@@ -292,6 +260,7 @@ protected void closeSource() throws IOException {
}
private MessageType buildSchema(){
+ decimalColInfo = new HashMap<>(16);
Types.MessageTypeBuilder typeBuilder = Types.buildMessage();
for (int i = 0; i < fullColumnNames.size(); i++) {
String name = fullColumnNames.get(i);
@@ -317,10 +286,10 @@ private MessageType buildSchema(){
.as(OriginalType.DECIMAL)
.precision(decimalInfo.getPrecision())
.scale(decimalInfo.getScale())
- .length(computeMinBytesForPrecision(decimalInfo.getPrecision()))
+ .length(HdfsUtil.computeMinBytesForPrecision(decimalInfo.getPrecision()))
.named(name);
- decimalColInfo = Collections.singletonMap(name, decimalInfo);
+ decimalColInfo.put(name, decimalInfo);
} else {
typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).named(name);
}
@@ -330,55 +299,4 @@ private MessageType buildSchema(){
return typeBuilder.named("Pair");
}
-
- private int computeMinBytesForPrecision(int precision){
- int numBytes = 1;
- while (Math.pow(2.0, 8 * numBytes - 1) < Math.pow(10.0, precision)) {
- numBytes += 1;
- }
- return numBytes;
- }
-
- private static byte[] longToByteArray(long data){
- long nano = data * 1000_000;
-
- int julianDays = (int) ((nano / NANO_SECONDS_PER_DAY) + JULIAN_EPOCH_OFFSET_DAYS);
- byte[] julianDaysBytes = getBytes(julianDays);
- flip(julianDaysBytes);
-
- long lastDayNanos = nano % NANO_SECONDS_PER_DAY;
- byte[] lastDayNanosBytes = getBytes(lastDayNanos);
- flip(lastDayNanosBytes);
-
- byte[] dst = new byte[12];
-
- System.arraycopy(lastDayNanosBytes, 0, dst, 0, 8);
- System.arraycopy(julianDaysBytes, 0, dst, 8, 4);
-
- return dst;
- }
-
- private static byte[] getBytes(long i) {
- byte[] bytes=new byte[8];
- bytes[0]=(byte)((i >> 56) & 0xFF);
- bytes[1]=(byte)((i >> 48) & 0xFF);
- bytes[2]=(byte)((i >> 40) & 0xFF);
- bytes[3]=(byte)((i >> 32) & 0xFF);
- bytes[4]=(byte)((i >> 24) & 0xFF);
- bytes[5]=(byte)((i >> 16) & 0xFF);
- bytes[6]=(byte)((i >> 8) & 0xFF);
- bytes[7]=(byte)(i & 0xFF);
- return bytes;
- }
-
- /**
- * @param bytes
- */
- private static void flip(byte[] bytes) {
- for(int i=0,j=bytes.length-1;i writeData(DataStream dataSet) {
builder.setRestoreConfig(restoreConfig);
builder.setMaxFileSize(maxFileSize);
builder.setFlushBlockInterval(flushInterval);
+ builder.setEnableDictionary(enableDictionary);
return createOutput(dataSet, builder.finish());
}
diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java
index 0de6b79e84..33917f375d 100644
--- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java
+++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java
@@ -29,9 +29,9 @@
public class TimePartitionFormat {
- private static final long CONSTANT_TWO_DAY_TIME = 1000 * 60 * 60 * 24 * 2;
- private static final long CONSTANT_TWO_HOUR_TIME = 1000 * 60 * 60 * 2;
- private static final long CONSTANT_TWO_MINUTE_TIME = 1000 * 60 * 2;
+ private static final long CONSTANT_TWO_DAY_TIME = 1000 * 60 * 60 * 24 * 2L;
+ private static final long CONSTANT_TWO_HOUR_TIME = 1000 * 60 * 60 * 2L;
+ private static final long CONSTANT_TWO_MINUTE_TIME = 1000 * 60 * 2L;
private static PartitionEnum partitionEnum;
diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java
index 21cb769d04..10d49b992c 100644
--- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java
+++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java
@@ -25,7 +25,9 @@
import java.util.List;
import java.util.Map;
-import static com.dtstack.flinkx.hive.EStoreType.*;
+import static com.dtstack.flinkx.hive.EStoreType.ORC;
+import static com.dtstack.flinkx.hive.EStoreType.PARQUET;
+import static com.dtstack.flinkx.hive.EStoreType.TEXT;
/**
* @author jiangbo
diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java
index 2c36ff3c74..4cb1fc6d09 100644
--- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java
+++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java
@@ -21,6 +21,7 @@
import com.dtstack.flinkx.authenticate.KerberosUtil;
import com.dtstack.flinkx.util.ExceptionUtil;
import com.dtstack.flinkx.util.FileSystemUtil;
+import com.dtstack.flinkx.util.RetryUtil;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.collections.MapUtils;
@@ -55,6 +56,8 @@ public final class HiveDbUtil {
public static final String SQLSTATE_CANNOT_ACQUIRE_CONNECT = "08004";
+ public static final int JDBC_PART_SIZE = 2;
+
public static final String JDBC_REGEX = "[\\?|;|#]";
public static final String KEY_VAL_DELIMITER = "=";
public static final String PARAM_DELIMITER = "&";
@@ -103,7 +106,7 @@ private static Connection getConnectionWithKerberos(ConnectionInfo connectionInf
String keytabFileName = KerberosUtil.getPrincipalFileName(connectionInfo.getHiveConf());
keytabFileName = KerberosUtil.loadFile(connectionInfo.getHiveConf(), keytabFileName);
- String principal = KerberosUtil.findPrincipalFromKeytab(keytabFileName);
+ String principal = KerberosUtil.getPrincipal(connectionInfo.getHiveConf(), keytabFileName);
KerberosUtil.loadKrb5Conf(connectionInfo.getHiveConf());
Configuration conf = FileSystemUtil.getConfiguration(connectionInfo.getHiveConf(), null);
@@ -126,7 +129,7 @@ public Connection run(){
private static boolean openKerberos(final String jdbcUrl){
String[] splits = jdbcUrl.split(JDBC_REGEX);
- if (splits.length != 2) {
+ if (splits.length != JDBC_PART_SIZE) {
return false;
}
@@ -232,10 +235,10 @@ private static Connection getHiveConnection(String url, Properties prop) throws
url = String.format("jdbc:hive2://%s:%s/%s", host, port, param);
Connection connection = DriverManager.getConnection(url, prop);
if (StringUtils.isNotEmpty(db)) {
- try {
- connection.createStatement().execute("use " + db);
+ try (Statement statement = connection.createStatement()) {
+ statement.execute("use " + db);
} catch (SQLException e) {
- if (connection != null) {
+ if (null != connection) {
connection.close();
}
diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java
index 7e0c0193fa..75209ebdfa 100644
--- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java
+++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java
@@ -28,7 +28,8 @@
import java.util.List;
import java.util.Map;
-import static com.dtstack.flinkx.hive.EStoreType.*;
+import static com.dtstack.flinkx.hive.EStoreType.ORC;
+import static com.dtstack.flinkx.hive.EStoreType.TEXT;
/**
* @author toutian
@@ -52,26 +53,27 @@ public class HiveUtil {
private HiveDbUtil.ConnectionInfo connectionInfo;
- enum HiveReleaseVersion{
- /**
- * apache hive 1.x
- */
- APACHE_1,
-
- /**
- * apache hive 2.x
- */
- APACHE_2,
-
- /**
- * cdh hive 1.x
- */
- CDH_1,
-
- /**
- * cdh hive 2.x
- */
- CDH_2
+ public static String getHiveColumnType(String originType) {
+ originType = originType.trim();
+ int indexOfBrackets = originType.indexOf(LEFT_BRACKETS);
+ if (indexOfBrackets > -1) {
+ String params = originType.substring(indexOfBrackets);
+ int index = params.indexOf(",");
+ int right = Integer.parseInt(params.substring(index+1, params.length()-1).trim());
+ if(right == 0){
+ int left = Integer.parseInt(params.substring(1, index).trim());
+ if(left <= 4){
+ return "SMALLINT";
+ }else if(left <= 9){
+ return "INT";
+ }else if(left <= 18){
+ return "BIGINT";
+ }
+ }
+ return "DECIMAL" + params;
+ } else {
+ return convertType(originType);
+ }
}
public HiveUtil() {
@@ -176,70 +178,6 @@ private AbstractHiveMetadataParser getMetadataParser(HiveReleaseVersion hiveVers
}
}
- public HiveReleaseVersion getHiveVersion(Connection connection){
- HiveReleaseVersion version = HiveReleaseVersion.APACHE_2;
- try {
- ResultSet resultSet = connection.createStatement().executeQuery("select version()");
- if (resultSet.next()) {
- String versionMsg = resultSet.getString(1);
- if (versionMsg.contains("cdh")){
- // 结果示例:2.1.1-cdh6.3.1 re8d55f408b4f9aa2648bc9e34a8f802d53d6aab3
- if (versionMsg.startsWith("2")) {
- version = HiveReleaseVersion.CDH_2;
- } else if(versionMsg.startsWith("1")){
- version = HiveReleaseVersion.CDH_1;
- }
- } else {
- // FIXME spark thrift server不支持 version()函数,所以使用默认的版本
- }
- }
- } catch (Exception ignore) {
- }
-
- return version;
- }
-
- public static String getCreateTableHql(TableInfo tableInfo) {
- //不要使用create table if not exist,可能以后会在业务逻辑中判断表是否已经存在
- StringBuilder fieldsb = new StringBuilder("CREATE TABLE %s (");
- for (int i = 0; i < tableInfo.getColumns().size(); i++) {
- fieldsb.append(String.format("`%s` %s", tableInfo.getColumns().get(i), tableInfo.getColumnTypes().get(i)));
- if (i != tableInfo.getColumns().size() - 1) {
- fieldsb.append(",");
- }
- }
- fieldsb.append(") ");
- if (!tableInfo.getPartitions().isEmpty()) {
- fieldsb.append(" PARTITIONED BY (");
- for (String partitionField : tableInfo.getPartitions()) {
- fieldsb.append(String.format("`%s` string", partitionField));
- }
- fieldsb.append(") ");
- }
- if (TEXT.name().equalsIgnoreCase(tableInfo.getStore())) {
- fieldsb.append(" ROW FORMAT DELIMITED FIELDS TERMINATED BY '");
- fieldsb.append(tableInfo.getDelimiter());
- fieldsb.append("' LINES TERMINATED BY '\\n' STORED AS TEXTFILE ");
- } else if(ORC.name().equalsIgnoreCase(tableInfo.getStore())) {
- fieldsb.append(" STORED AS ORC ");
- }else{
- fieldsb.append(" STORED AS PARQUET ");
- }
- return fieldsb.toString();
- }
-
- public static String getHiveColumnType(String originType) {
- originType = originType.trim();
- int indexOfBrackets = originType.indexOf(LEFT_BRACKETS);
- if (indexOfBrackets > -1) {
- String type = originType.substring(0, indexOfBrackets);
- String params = originType.substring(indexOfBrackets);
- return convertType(type) + params;
- } else {
- return convertType(originType);
- }
- }
-
private static String convertType(String type) {
switch (type.toUpperCase()) {
case "BIT":
@@ -258,6 +196,8 @@ private static String convertType(String type) {
case "INT8":
type = "INT";
break;
+ case "NUMERIC":
+ case "NUMBER":
case "BIGINT":
type = "BIGINT";
break;
@@ -272,8 +212,6 @@ private static String convertType(String type) {
case "BINARY_DOUBLE":
type = "DOUBLE";
break;
- case "NUMERIC":
- case "NUMBER":
case "DECIMAL":
type = "DECIMAL";
break;
@@ -310,4 +248,102 @@ private static String convertType(String type) {
}
return type;
}
+
+ public static String getCreateTableHql(TableInfo tableInfo) {
+ //不要使用create table if not exist,可能以后会在业务逻辑中判断表是否已经存在
+ StringBuilder fieldsb = new StringBuilder("CREATE TABLE %s (");
+ for (int i = 0; i < tableInfo.getColumns().size(); i++) {
+ fieldsb.append(String.format("`%s` %s", tableInfo.getColumns().get(i), tableInfo.getColumnTypes().get(i)));
+ if (i != tableInfo.getColumns().size() - 1) {
+ fieldsb.append(",");
+ }
+ }
+ fieldsb.append(") ");
+ if (!tableInfo.getPartitions().isEmpty()) {
+ fieldsb.append(" PARTITIONED BY (");
+ for (String partitionField : tableInfo.getPartitions()) {
+ fieldsb.append(String.format("`%s` string", partitionField));
+ }
+ fieldsb.append(") ");
+ }
+ if (TEXT.name().equalsIgnoreCase(tableInfo.getStore())) {
+ fieldsb.append(" ROW FORMAT DELIMITED FIELDS TERMINATED BY '");
+ fieldsb.append(tableInfo.getDelimiter());
+ fieldsb.append("' LINES TERMINATED BY '\\n' STORED AS TEXTFILE ");
+ } else if(ORC.name().equalsIgnoreCase(tableInfo.getStore())) {
+ fieldsb.append(" STORED AS ORC ");
+ }else{
+ fieldsb.append(" STORED AS PARQUET ");
+ }
+ return fieldsb.toString();
+ }
+
+ public HiveReleaseVersion getHiveVersion(Connection connection){
+ HiveReleaseVersion version = HiveReleaseVersion.APACHE_2;
+ try (ResultSet resultSet = connection.createStatement().executeQuery("select version()")) {
+ if (resultSet.next()) {
+ String versionMsg = resultSet.getString(1);
+ if (versionMsg.contains(HiveReleaseVersion.CDH_1.getName())){
+ // 结果示例:2.1.1-cdh6.3.1 re8d55f408b4f9aa2648bc9e34a8f802d53d6aab3
+ if (versionMsg.startsWith(HiveReleaseVersion.CDH_2.getVersion())) {
+ version = HiveReleaseVersion.CDH_2;
+ } else if(versionMsg.startsWith(HiveReleaseVersion.CDH_1.getVersion())){
+ version = HiveReleaseVersion.CDH_1;
+ }
+ } else {
+ //spark thrift server不支持 version()函数,所以使用默认的版本
+ }
+ }
+ } catch (Exception ignore) {
+ }
+
+ return version;
+ }
+
+ enum HiveReleaseVersion{
+ /**
+ * apache hive 1.x
+ */
+ APACHE_1("apache", "1"),
+
+ /**
+ * apache hive 2.x
+ */
+ APACHE_2("apache", "2"),
+
+ /**
+ * cdh hive 1.x
+ */
+ CDH_1("cdh", "1"),
+
+ /**
+ * cdh hive 2.x
+ */
+ CDH_2("cdh", "2");
+
+ private String name;
+
+ private String version;
+
+ HiveReleaseVersion(String name, String version) {
+ this.name = name;
+ this.version = version;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getVersion() {
+ return version;
+ }
+
+ public void setVersion(String version) {
+ this.version = version;
+ }
+ }
}
diff --git a/flinkx-hive/flinkx-hive-core/src/test/java/com/dtstack/flinkx/hive/test/DBUtilTest.java b/flinkx-hive/flinkx-hive-core/src/test/java/com/dtstack/flinkx/hive/test/DBUtilTest.java
deleted file mode 100644
index 0b7c2e533d..0000000000
--- a/flinkx-hive/flinkx-hive-core/src/test/java/com/dtstack/flinkx/hive/test/DBUtilTest.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package com.dtstack.flinkx.hive.test;
-
-import com.dtstack.flinkx.hive.util.HiveDbUtil;
-
-import java.sql.Connection;
-import java.sql.ResultSet;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * @author jiangbo
- * @date 2019/8/29
- */
-public class DBUtilTest {
-
- public static void main(String[] args) throws Exception{
- Map sftpConf = new HashMap<>();
- sftpConf.put("host", "172.16.10.79");
- sftpConf.put("port", "22");
- sftpConf.put("username", "root");
- sftpConf.put("password", "abc123");
-
- Map hiveConf = new HashMap<>();
- hiveConf.put("hive.server2.authentication.kerberos.principal", "hive/cdh02@HADOOP.COM");
- hiveConf.put("hive.server2.authentication.kerberos.keytab", "D:\\cdh_cluster\\hive.keytab");
- hiveConf.put("java.security.krb5.conf", "D:\\cdh_cluster\\krb5.conf");
- hiveConf.put("useLocalFile", "true");
- hiveConf.put("sftpConf", sftpConf);
- hiveConf.put("remoteDir", "/home/sftp/keytab/jiangbo");
-
- HiveDbUtil.ConnectionInfo connectionInfo = new HiveDbUtil.ConnectionInfo();
- connectionInfo.setJdbcUrl("jdbc:hive2://172.16.10.75:10000/default;principal=hive/cdh02@HADOOP.COM");
- connectionInfo.setUsername("");
- connectionInfo.setPassword("");
- connectionInfo.setHiveConf(hiveConf);
-
- Connection connection = HiveDbUtil.getConnection(connectionInfo);
- ResultSet rs = connection.createStatement().executeQuery("show tables");
- while (rs.next()) {
- System.out.println(rs.getObject(2));
- }
-
- connection.close();
- }
-}
diff --git a/flinkx-hive/flinkx-hive-writer/pom.xml b/flinkx-hive/flinkx-hive-writer/pom.xml
index 25357e479f..e93421f32a 100644
--- a/flinkx-hive/flinkx-hive-writer/pom.xml
+++ b/flinkx-hive/flinkx-hive-writer/pom.xml
@@ -92,6 +92,16 @@ under the License.
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java
index 217163c261..67731f0503 100644
--- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java
+++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java
@@ -155,7 +155,7 @@ public FormatState getFormatState() {
}
private Map flushOutputFormat() {
- Map formatStateMap = new HashMap<>();
+ Map formatStateMap = new HashMap<>(outputFormats.size());
Iterator> entryIterator = outputFormats.entrySet().iterator();
while (entryIterator.hasNext()) {
Map.Entry entry = entryIterator.next();
@@ -178,6 +178,7 @@ private Map flushOutputFormat() {
@Override
protected void writeMultipleRecordsInternal() throws Exception {
+ notSupportBatchWrite("HiveWriter");
}
@Override
@@ -187,6 +188,11 @@ public void writeRecord(Row row) throws IOException {
Map event = null;
if (row.getField(0) instanceof Map) {
event = (Map) row.getField(0);
+
+ if (null != event && event.containsKey("message")) {
+ event = MapUtils.getMap(event, "message");
+ }
+
tablePath = PathConverterUtil.regaxByRules(event, tableBasePath, distributeTableMapping);
fromLogData = true;
} else {
@@ -210,11 +216,11 @@ public void writeRecord(Row row) throws IOException {
//row包含map嵌套的数据内容和channel, 而rowData是非常简单的纯数据,此处补上数据差额
if (fromLogData && bytesWriteCounter != null) {
- bytesWriteCounter.add(row.toString().length() - rowData.toString().length());
+ bytesWriteCounter.add((long)row.toString().length() - rowData.toString().length());
}
} catch (Exception e) {
// 写入产生的脏数据已经由hdfsOutputFormat处理了,这里不用再处理了,只打印日志
- if (numWriteCounter.getLocalValue() % 1000 == 0) {
+ if (numWriteCounter.getLocalValue() % LOG_PRINT_INTERNAL == 0) {
LOG.warn("写入hdfs异常:", e);
}
}
diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java
index f8c69886b1..529d8d743b 100644
--- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java
+++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java
@@ -131,9 +131,12 @@ protected void checkFormat() {
if (this.format.tableBasePath == null || this.format.tableBasePath.length() == 0) {
throw new IllegalArgumentException("No tableBasePath supplied.");
}
+
if (this.format.tableInfos.isEmpty()){
throw new IllegalArgumentException("No tableInfos supplied.");
}
+
+ notSupportBatchWrite("HiveWriter");
}
}
diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java
index 5168b850a6..a3483caaea 100644
--- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java
+++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java
@@ -19,31 +19,34 @@
import com.dtstack.flinkx.config.DataTransferConfig;
import com.dtstack.flinkx.config.WriterConfig;
+import com.dtstack.flinkx.constants.ConstantValue;
import com.dtstack.flinkx.hive.TableInfo;
import com.dtstack.flinkx.hive.TimePartitionFormat;
import com.dtstack.flinkx.hive.util.HiveUtil;
import com.dtstack.flinkx.writer.BaseDataWriter;
import com.dtstack.flinkx.writer.WriteMode;
-import com.google.gson.Gson;
+import com.google.gson.internal.LinkedTreeMap;
+import com.google.gson.reflect.TypeToken;
import org.apache.commons.collections.MapUtils;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.types.Row;
import parquet.hadoop.ParquetWriter;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import static com.dtstack.flinkx.hdfs.HdfsConfigKeys.KEY_ROW_GROUP_SIZE;
import static com.dtstack.flinkx.hive.HiveConfigKeys.*;
+import static com.dtstack.flinkx.util.GsonUtil.GSON;
/**
* @author toutian
*/
public class HiveWriter extends BaseDataWriter {
+ private String readerName;
+
private String defaultFs;
private String fileType;
@@ -80,10 +83,9 @@ public class HiveWriter extends BaseDataWriter {
private boolean autoCreateTable;
- private Gson gson = new Gson();
-
public HiveWriter(DataTransferConfig config) {
super(config);
+ readerName = config.getJob().getContent().get(0).getReader().getName();
WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter();
hadoopConfig = (Map) writerConfig.getParameter().getVal(KEY_HADOOP_CONFIG);
defaultFs = writerConfig.getParameter().getStringVal(KEY_DEFAULT_FS);
@@ -95,9 +97,9 @@ public HiveWriter(DataTransferConfig config) {
partition = writerConfig.getParameter().getStringVal(KEY_PARTITION, "pt");
delimiter = writerConfig.getParameter().getStringVal(KEY_FIELD_DELIMITER, "\u0001");
charSet = writerConfig.getParameter().getStringVal(KEY_CHARSET_NAME);
- maxFileSize = writerConfig.getParameter().getLongVal(KEY_MAX_FILE_SIZE, 1024 * 1024 * 1024);
+ maxFileSize = writerConfig.getParameter().getLongVal(KEY_MAX_FILE_SIZE, ConstantValue.STORE_SIZE_G);
compress = writerConfig.getParameter().getStringVal(KEY_COMPRESS);
- bufferSize = writerConfig.getParameter().getLongVal(KEY_BUFFER_SIZE, 128 * 1024 * 1024);
+ bufferSize = writerConfig.getParameter().getLongVal(KEY_BUFFER_SIZE, 128 * ConstantValue.STORE_SIZE_M);
rowGroupSize = writerConfig.getParameter().getIntVal(KEY_ROW_GROUP_SIZE, ParquetWriter.DEFAULT_BLOCK_SIZE);
mode = writerConfig.getParameter().getStringVal(KEY_WRITE_MODE, WriteMode.APPEND.name());
@@ -120,18 +122,18 @@ public HiveWriter(DataTransferConfig config) {
}
}
+ /**
+ * 分表的映射关系
+ * distributeTableMapping 的数据结构为
+ * tableInfos的数据结构为
+ */
private void formatHiveDistributeInfo(String distributeTable) {
- /**
- * 分表的映射关系
- * distributeTableMapping 的数据结构为
- * tableInfos的数据结构为
- */
distributeTableMapping = new HashMap<>(32);
if (StringUtils.isNotBlank(distributeTable)) {
- Map distributeTableMap = gson.fromJson(distributeTable, Map.class);
- for (Map.Entry entry : distributeTableMap.entrySet()) {
+ Map> distributeTableMap = GSON.fromJson(distributeTable, new TypeToken>>(){}.getType());
+ for (Map.Entry> entry : distributeTableMap.entrySet()) {
String groupName = entry.getKey();
- List groupTables = (List) entry.getValue();
+ List groupTables = entry.getValue();
for (String tableName : groupTables) {
distributeTableMapping.put(tableName, groupName);
}
@@ -142,10 +144,12 @@ private void formatHiveDistributeInfo(String distributeTable) {
private void formatHiveTableInfo(String tablesColumn) {
tableInfos = new HashMap<>(16);
if (StringUtils.isNotEmpty(tablesColumn)) {
- Map tableColumnMap = gson.fromJson(tablesColumn, Map.class);
- for (Map.Entry entry : tableColumnMap.entrySet()) {
+ Map>> tableColumnMap = GSON.fromJson(tablesColumn, new TypeToken> >>(){}.getType());
+ List
-
- junit
- junit
- 4.12
- test
-
\ No newline at end of file
diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java
index 928580a166..4eb6ec865a 100644
--- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java
+++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java
@@ -26,13 +26,22 @@
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.kudu.ColumnSchema;
import org.apache.kudu.Type;
-import org.apache.kudu.client.*;
+import org.apache.kudu.client.AsyncKuduClient;
+import org.apache.kudu.client.AsyncKuduScanner;
+import org.apache.kudu.client.KuduClient;
+import org.apache.kudu.client.KuduPredicate;
+import org.apache.kudu.client.KuduScanToken;
+import org.apache.kudu.client.KuduTable;
import java.io.IOException;
import java.math.BigDecimal;
import java.security.PrivilegedExceptionAction;
import java.sql.Timestamp;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
diff --git a/flinkx-kudu/flinkx-kudu-reader/pom.xml b/flinkx-kudu/flinkx-kudu-reader/pom.xml
index c8a99fea22..f9a1580d71 100644
--- a/flinkx-kudu/flinkx-kudu-reader/pom.xml
+++ b/flinkx-kudu/flinkx-kudu-reader/pom.xml
@@ -49,8 +49,12 @@
shade.kudureader.io.netty
- com.google
- shade.kudureader.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java
index f0ddce2857..6be5434a52 100644
--- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java
+++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java
@@ -26,7 +26,11 @@
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.types.Row;
import org.apache.kudu.Type;
-import org.apache.kudu.client.*;
+import org.apache.kudu.client.KuduClient;
+import org.apache.kudu.client.KuduScanToken;
+import org.apache.kudu.client.KuduScanner;
+import org.apache.kudu.client.RowResult;
+import org.apache.kudu.client.RowResultIterator;
import java.io.IOException;
import java.util.List;
diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java
index 48d0e43fc5..4959731c76 100644
--- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java
+++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java
@@ -19,6 +19,7 @@
package com.dtstack.flinkx.kudu.reader;
+import com.dtstack.flinkx.constants.ConstantValue;
import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder;
import com.dtstack.flinkx.kudu.core.KuduConfig;
import com.dtstack.flinkx.reader.MetaColumn;
@@ -50,5 +51,9 @@ protected void checkFormat() {
if (format.columns == null || format.columns.size() == 0){
throw new IllegalArgumentException("columns can not be empty");
}
+
+ if (format.kuduConfig.getBatchSizeBytes() > ConstantValue.STORE_SIZE_G) {
+ throw new IllegalArgumentException("批量读取字节数必须小于[1G]");
+ }
}
}
diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java
index 7b09c9048d..604b920a6e 100644
--- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java
+++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java
@@ -32,7 +32,19 @@
import java.util.List;
-import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_ADMIN_OPERATION_TIMEOUT;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_AUTHENTICATION;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_BATCH_SIZE_BYTES;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_BOSS_COUNT;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_FILTER;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_KEYTABFILE;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_MASTER_ADDRESSES;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_OPERATION_TIMEOUT;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_PRINCIPAL;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_QUERY_TIMEOUT;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_READ_MODE;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_TABLE;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_WORKER_COUNT;
/**
* @author jiangbo
diff --git a/flinkx-kudu/flinkx-kudu-writer/pom.xml b/flinkx-kudu/flinkx-kudu-writer/pom.xml
index 742fae63ab..256505e5ab 100644
--- a/flinkx-kudu/flinkx-kudu-writer/pom.xml
+++ b/flinkx-kudu/flinkx-kudu-writer/pom.xml
@@ -49,8 +49,12 @@
shade.kuduwriter.io.netty
- com.google
- shade.kuduwriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java
index df7a521186..69f346dcba 100644
--- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java
+++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java
@@ -28,7 +28,12 @@
import com.dtstack.flinkx.util.ExceptionUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.types.Row;
-import org.apache.kudu.client.*;
+import org.apache.kudu.client.KuduClient;
+import org.apache.kudu.client.KuduException;
+import org.apache.kudu.client.KuduSession;
+import org.apache.kudu.client.KuduTable;
+import org.apache.kudu.client.Operation;
+import org.apache.kudu.client.SessionConfiguration;
import java.io.IOException;
import java.util.List;
diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java
index 265e5610c2..2cb5532262 100644
--- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java
+++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java
@@ -32,7 +32,16 @@
import java.util.List;
-import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_ADMIN_OPERATION_TIMEOUT;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_AUTHENTICATION;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_BOSS_COUNT;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_FLUSH_MODE;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_KEYTABFILE;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_MASTER_ADDRESSES;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_OPERATION_TIMEOUT;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_PRINCIPAL;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_TABLE;
+import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_WORKER_COUNT;
/**
* @author jiangbo
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java
index 540db38056..d287c44a70 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package com.dtstack.flinkx.launcher;
/**
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java
index 1b823c77eb..c5318c00eb 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java
@@ -40,8 +40,6 @@
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.File;
import java.net.InetSocketAddress;
@@ -97,7 +95,7 @@ public static ClusterClient createYarnClient(Options launcherOptions) {
ApplicationId applicationId;
if (StringUtils.isEmpty(launcherOptions.getAppId())) {
- applicationId = getAppIdFromYarn(yarnClient);
+ applicationId = getAppIdFromYarn(yarnClient, launcherOptions);
if(applicationId != null && StringUtils.isEmpty(applicationId.toString())) {
throw new RuntimeException("No flink session found on yarn cluster.");
}
@@ -156,7 +154,7 @@ private static AbstractYarnClusterDescriptor getClusterDescriptor(Options launch
return yarnClusterDescriptor;
}
- private static ApplicationId getAppIdFromYarn(YarnClient yarnClient) throws Exception{
+ private static ApplicationId getAppIdFromYarn(YarnClient yarnClient, Options launcherOptions) throws Exception{
Set set = new HashSet<>();
set.add("Apache Flink");
EnumSet enumSet = EnumSet.noneOf(YarnApplicationState.class);
@@ -175,6 +173,10 @@ private static ApplicationId getAppIdFromYarn(YarnClient yarnClient) throws Exce
continue;
}
+ if(!report.getQueue().equals(launcherOptions.getQueue())) {
+ continue;
+ }
+
int thisMemory = report.getApplicationResourceUsageReport().getNeededResources().getMemory();
int thisCores = report.getApplicationResourceUsageReport().getNeededResources().getVirtualCores();
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java
index 91a4bd1409..c5c19fc9f4 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java
@@ -25,6 +25,7 @@
import com.dtstack.flinkx.launcher.perjob.PerJobSubmitter;
import com.dtstack.flinkx.options.OptionParser;
import com.dtstack.flinkx.options.Options;
+import com.dtstack.flinkx.util.JsonModifyUtil;
import com.dtstack.flinkx.util.SysUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.client.program.ClusterClient;
@@ -41,6 +42,7 @@
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
/**
@@ -94,8 +96,25 @@ public static void main(String[] args) throws Exception {
String mode = launcherOptions.getMode();
List argList = optionParser.getProgramExeArgList();
+
+ // 将argList转化为HashMap,方便通过参数名称来获取参数值
+ HashMap temp = new HashMap<>(16);
+ for (int i = 0; i < argList.size(); i += 2) {
+ temp.put(argList.get(i), argList.get(i + 1));
+ }
+ // 对json中的值进行修改
+ HashMap parameter = JsonModifyUtil.CommandTransform(temp.get("-p"));
+ temp.put("-job", JsonModifyUtil.JsonValueReplace(temp.get("-job"), parameter));
+
+ // 清空list,填充修改后的参数值
+ argList.clear();
+ for (int i = 0; i < temp.size(); i++) {
+ argList.add(temp.keySet().toArray()[i].toString());
+ argList.add(temp.values().toArray()[i].toString());
+ }
+
if(mode.equals(ClusterMode.local.name())) {
- String[] localArgs = argList.toArray(new String[argList.size()]);
+ String[] localArgs = argList.toArray(new String[0]);
com.dtstack.flinkx.Main.main(localArgs);
} else {
String pluginRoot = launcherOptions.getPluginRoot();
@@ -237,9 +256,8 @@ public boolean accept(File dir, String name) {
}
private static String readJob(String job) {
- try {
- File file = new File(job);
- FileInputStream in = new FileInputStream(file);
+ File file = new File(job);
+ try (FileInputStream in = new FileInputStream(file)) {
byte[] fileContent = new byte[(int) file.length()];
in.read(fileContent);
in.close();
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java
index 6707c8f138..b88eb078b8 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java
@@ -18,6 +18,7 @@
package com.dtstack.flinkx.launcher;
+import com.dtstack.flinkx.constants.ConstantValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -42,7 +43,7 @@ public static YarnConfiguration getYarnConf(String yarnConfDir) {
if(dir.exists() && dir.isDirectory()) {
File[] xmlFileList = new File(yarnConfDir).listFiles((dir1, name) -> {
- if(name.endsWith(".xml")){
+ if(name.endsWith(ConstantValue.FILE_SUFFIX_XML)){
return true;
}
return false;
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java
index 1d64181e81..508098067d 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java
@@ -52,10 +52,10 @@ public static ClusterSpecification createClusterSpecification(Properties conProp
int slotsPerTaskManager = 1;
if(conProp != null){
- if(conProp.contains(JOBMANAGER_MEMORY_MB)){
+ if(conProp.containsKey(JOBMANAGER_MEMORY_MB)){
jobmanagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(JOBMANAGER_MEMORY_MB)));
}
- if(conProp.contains(TASKMANAGER_MEMORY_MB)){
+ if(conProp.containsKey(TASKMANAGER_MEMORY_MB)){
taskmanagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(TASKMANAGER_MEMORY_MB)));
}
diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java
index d9025e2ed9..81c6387395 100644
--- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java
+++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java
@@ -21,7 +21,7 @@
import com.dtstack.flinkx.launcher.ClassLoaderType;
import com.dtstack.flinkx.launcher.YarnConfLoader;
import com.dtstack.flinkx.options.Options;
-import com.dtstack.flinkx.util.MapUtil;
+import com.dtstack.flinkx.util.GsonUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.client.deployment.ClusterSpecification;
import org.apache.flink.client.program.ClusterClient;
@@ -64,7 +64,7 @@ public static String submit(Options options, File jarFile, String[] programArgs)
Configuration flinkConfig = StringUtils.isEmpty(options.getFlinkconf()) ? new Configuration() : GlobalConfiguration.loadConfiguration(options.getFlinkconf());
flinkConfig.setString("classloader.resolve-order", "child-first");
- Properties conProp = MapUtil.jsonStrToObject(options.getConfProp(), Properties.class);
+ Properties conProp = GsonUtil.GSON.fromJson(options.getConfProp(), Properties.class);
ClusterSpecification clusterSpecification = FlinkPerJobResourceUtil.createClusterSpecification(conProp);
clusterSpecification.setCreateProgramDelay(true);
clusterSpecification.setConfiguration(flinkConfig);
diff --git a/flinkx-launcher/src/test/java/bigdata/FlinkConfigTest.java b/flinkx-launcher/src/test/java/bigdata/FlinkConfigTest.java
deleted file mode 100644
index b974511769..0000000000
--- a/flinkx-launcher/src/test/java/bigdata/FlinkConfigTest.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package bigdata;
-
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.configuration.GlobalConfiguration;
-import org.apache.flink.configuration.JobManagerOptions;
-
-import java.io.File;
-
-/**
- * Created by softfly on 18/4/24.
- */
-public class FlinkConfigTest {
- public static void main(String[] args) {
- //Configuration config = GlobalConfiguration.loadConfiguration("/hadoop/flink-1.4.0/conf");
- //System.out.println(config.getString(JobManagerOptions.ADDRESS));
- String msg = "xxx" + File.separator;
- System.out.println(msg);
- }
-}
diff --git a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java
index 26438d51fe..b5d78ffecc 100644
--- a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java
+++ b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java
@@ -19,7 +19,13 @@
package com.dtstack.flinkx.mongodb;
-import com.mongodb.*;
+import com.mongodb.AuthenticationMechanism;
+import com.mongodb.MongoClient;
+import com.mongodb.MongoClientOptions;
+import com.mongodb.MongoClientURI;
+import com.mongodb.MongoCredential;
+import com.mongodb.ServerAddress;
+import com.mongodb.WriteConcern;
import com.mongodb.client.MongoCursor;
import org.apache.commons.lang.StringUtils;
import org.bson.Document;
diff --git a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java
index 425dc21aa4..c419a2672e 100644
--- a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java
+++ b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java
@@ -29,9 +29,11 @@
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
-import java.util.*;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
-import static com.dtstack.flinkx.enums.ColumnType.*;
+import static com.dtstack.flinkx.enums.ColumnType.getType;
/**
* Utilities for mongodb database connection and data format conversion
diff --git a/flinkx-mongodb/flinkx-mongodb-reader/pom.xml b/flinkx-mongodb/flinkx-mongodb-reader/pom.xml
index 09814f6c3f..a9caa60251 100644
--- a/flinkx-mongodb/flinkx-mongodb-reader/pom.xml
+++ b/flinkx-mongodb/flinkx-mongodb-reader/pom.xml
@@ -56,8 +56,12 @@
shade.mongodbreader.io.netty
- com.google
- shade.mongodbreader.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java
index 3f3ec2d863..ac8e65d705 100644
--- a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java
+++ b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java
@@ -23,6 +23,7 @@
import com.dtstack.flinkx.mongodb.MongodbClientUtil;
import com.dtstack.flinkx.mongodb.MongodbConfig;
import com.dtstack.flinkx.reader.MetaColumn;
+import com.dtstack.flinkx.util.ExceptionUtil;
import com.dtstack.flinkx.util.StringUtil;
import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient;
@@ -37,7 +38,8 @@
import org.bson.conversions.Bson;
import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.List;
/**
* Read plugin for reading static data
@@ -149,6 +151,9 @@ public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOExcepti
if(size * minNumSplits < docNum){
splits.add(new MongodbInputSplit((int)(size * minNumSplits), (int)(docNum - size * minNumSplits)));
}
+ } catch (Exception e){
+ LOG.error("error to create inputSplits, e = {}", ExceptionUtil.getErrorMessage(e));
+ throw e;
} finally {
MongodbClientUtil.close(client, null);
}
diff --git a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java
index 918fb6b8f3..e1661838c2 100644
--- a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java
+++ b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java
@@ -18,6 +18,7 @@
package com.dtstack.flinkx.mongodb.reader;
+import com.dtstack.flinkx.constants.ConstantValue;
import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder;
import com.dtstack.flinkx.mongodb.MongodbConfig;
import com.dtstack.flinkx.reader.MetaColumn;
@@ -55,5 +56,9 @@ protected void checkFormat() {
if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){
throw new UnsupportedOperationException("This plugin not support restore from failed state");
}
+
+ if (format.mongodbConfig.getFetchSize() > ConstantValue.MAX_BATCH_SIZE) {
+ throw new IllegalArgumentException("批量读取条数必须小于[200000]条");
+ }
}
}
diff --git a/flinkx-mongodb/flinkx-mongodb-writer/pom.xml b/flinkx-mongodb/flinkx-mongodb-writer/pom.xml
index 1d81ef36ef..a411920d71 100644
--- a/flinkx-mongodb/flinkx-mongodb-writer/pom.xml
+++ b/flinkx-mongodb/flinkx-mongodb-writer/pom.xml
@@ -56,8 +56,12 @@
shade.mongodbwriter.io.netty
- com.google
- shade.mongodbwriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-mysql/flinkx-mysql-dreader/pom.xml b/flinkx-mysql/flinkx-mysql-dreader/pom.xml
index 047e273350..21e5e39d4f 100644
--- a/flinkx-mysql/flinkx-mysql-dreader/pom.xml
+++ b/flinkx-mysql/flinkx-mysql-dreader/pom.xml
@@ -62,8 +62,12 @@
shade.mysqldreader.io.netty
- com.google
- shade.mysqldreader.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java
index 9eae9b3dee..60181af17c 100644
--- a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java
+++ b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java
@@ -29,7 +29,6 @@
import java.util.ArrayList;
import java.util.Collections;
-import java.util.List;
/**
* @author toutian
@@ -47,8 +46,8 @@ protected DistributedJdbcInputFormatBuilder getBuilder(){
}
@Override
- protected List buildConnections(){
- List sourceList = new ArrayList<>(connectionConfigs.size());
+ protected ArrayList buildConnections(){
+ ArrayList sourceList = new ArrayList<>(connectionConfigs.size());
for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) {
String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0)
? username : connectionConfig.getUsername();
diff --git a/flinkx-mysql/flinkx-mysql-reader/pom.xml b/flinkx-mysql/flinkx-mysql-reader/pom.xml
index 8856cdf063..a52536b7ec 100644
--- a/flinkx-mysql/flinkx-mysql-reader/pom.xml
+++ b/flinkx-mysql/flinkx-mysql-reader/pom.xml
@@ -62,9 +62,13 @@
shade.mysqlreader.io.netty
- com.google
- shade.mysqlreader.com.google
-
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java
index 9836df273a..148d4bbb95 100644
--- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java
+++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java
@@ -18,11 +18,10 @@
package com.dtstack.flinkx.mysql.format;
import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat;
+import com.dtstack.flinkx.rdb.inputformat.JdbcInputSplit;
import com.dtstack.flinkx.rdb.util.DbUtil;
import com.dtstack.flinkx.util.ClassUtil;
import com.dtstack.flinkx.util.DateUtil;
-import com.dtstack.flinkx.util.ExceptionUtil;
-import com.google.gson.Gson;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.core.io.InputSplit;
@@ -51,7 +50,9 @@ public void openInternal(InputSplit inputSplit) throws IOException {
String startLocation = incrementConfig.getStartLocation();
if (incrementConfig.isPolling()) {
- endLocationAccumulator.add(Long.parseLong(startLocation));
+ if (StringUtils.isNotEmpty(startLocation)) {
+ endLocationAccumulator.add(Long.parseLong(startLocation));
+ }
isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType());
} else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) {
getMaxValue(inputSplit);
@@ -64,8 +65,10 @@ public void openInternal(InputSplit inputSplit) throws IOException {
}
querySql = buildQuerySql(inputSplit);
- //MySQL流式读取
- fetchSize = Integer.MIN_VALUE;
+ JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit;
+ if (null != jdbcInputSplit.getStartLocation()) {
+ startLocation = jdbcInputSplit.getStartLocation();
+ }
executeQuery(startLocation);
columnCount = resultSet.getMetaData().getColumnCount();
boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("(");
@@ -112,7 +115,6 @@ public Row nextRecordInternal(Row row) throws IOException {
}
return super.nextRecordInternal(row);
}catch (Exception e) {
- LOG.error("error to get next record, row = {}, descColumnTypeList = {}, e = {}", row, new Gson().toJson(descColumnTypeList), ExceptionUtil.getErrorMessage(e));
throw new IOException("Couldn't read data - " + e.getMessage(), e);
}
}
diff --git a/flinkx-mysql/flinkx-mysql-reader/src/test/java/bigdata/TestMysqlPk.java b/flinkx-mysql/flinkx-mysql-reader/src/test/java/bigdata/TestMysqlPk.java
deleted file mode 100644
index 50dff8f343..0000000000
--- a/flinkx-mysql/flinkx-mysql-reader/src/test/java/bigdata/TestMysqlPk.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package bigdata;
-
-import com.dtstack.flinkx.mysql.MySqlDatabaseMeta;
-import com.dtstack.flinkx.rdb.util.DbUtil;
-
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.SQLException;
-import java.util.Map;
-
-
-public class TestMysqlPk {
- public static void main(String[] args) throws ClassNotFoundException, SQLException {
- MySqlDatabaseMeta databaseMeta = new MySqlDatabaseMeta();
- Class.forName(databaseMeta.getDriverClass());
- Connection conn = DriverManager.getConnection("jdbc:mysql://172.16.8.104:3306/test?useCursorFetch=true", "dtstack", "abc123");
- //List list = databaseMeta.listUniqueKeys("sb250", conn);
- //System.out.println(list);
- Map map = DbUtil.getPrimaryOrUniqueKeys("sb252", conn);
- System.out.println(map);
- }
-}
diff --git a/flinkx-mysql/flinkx-mysql-writer/pom.xml b/flinkx-mysql/flinkx-mysql-writer/pom.xml
index 82e3aa55c4..94e5d95a50 100644
--- a/flinkx-mysql/flinkx-mysql-writer/pom.xml
+++ b/flinkx-mysql/flinkx-mysql-writer/pom.xml
@@ -62,8 +62,12 @@
shade.mysqlwriter.io.netty
- com.google
- shade.mysqlwriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java b/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java
index dd54b0ba18..1cb857e3ec 100644
--- a/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java
+++ b/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java
@@ -56,7 +56,7 @@ public class OdpsUtil {
public static int MAX_RETRY_TIME = 3;
- public static final long BUFFER_SIZE_DEFAULT = 64 * 1024 * 1024;
+ public static final long BUFFER_SIZE_DEFAULT = 64 * 1024 * 1024L;
public static Odps initOdps(Map odpsConfig) {
String odpsServer = odpsConfig.get(OdpsConfigKeys.KEY_ODPS_SERVER);
diff --git a/flinkx-odps/flinkx-odps-reader/pom.xml b/flinkx-odps/flinkx-odps-reader/pom.xml
index 866fd622fe..8007e473c0 100644
--- a/flinkx-odps/flinkx-odps-reader/pom.xml
+++ b/flinkx-odps/flinkx-odps-reader/pom.xml
@@ -50,6 +50,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java
index 1bd4db402b..eba6fc7175 100644
--- a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java
+++ b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java
@@ -19,11 +19,11 @@
package com.dtstack.flinkx.odps.reader;
import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder;
+import com.dtstack.flinkx.odps.OdpsConfigKeys;
import com.dtstack.flinkx.reader.MetaColumn;
import java.util.List;
import java.util.Map;
-import static com.dtstack.flinkx.odps.OdpsConfigKeys.*;
/**
* The Builder of OdpsInputFormat
@@ -41,7 +41,7 @@ public OdpsInputFormatBuilder() {
public void setOdpsConfig(Map odpsConfig) {
format.odpsConfig = odpsConfig;
- format.projectName = odpsConfig.get(KEY_PROJECT);
+ format.projectName = odpsConfig.get(OdpsConfigKeys.KEY_PROJECT);
}
public void setTableName(String tableName) {
diff --git a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java
index 4a076ce122..f45ee2d76a 100644
--- a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java
+++ b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java
@@ -29,7 +29,9 @@
import java.util.List;
import java.util.Map;
-import static com.dtstack.flinkx.odps.OdpsConfigKeys.*;
+import static com.dtstack.flinkx.odps.OdpsConfigKeys.KEY_ODPS_CONFIG;
+import static com.dtstack.flinkx.odps.OdpsConfigKeys.KEY_PARTITION;
+import static com.dtstack.flinkx.odps.OdpsConfigKeys.KEY_TABLE;
/**
* The reader plugin of Odps
diff --git a/flinkx-odps/flinkx-odps-writer/pom.xml b/flinkx-odps/flinkx-odps-writer/pom.xml
index b268bf43b1..f1622adc0b 100644
--- a/flinkx-odps/flinkx-odps-writer/pom.xml
+++ b/flinkx-odps/flinkx-odps-writer/pom.xml
@@ -55,6 +55,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java
index 7e6510104d..cdf8b9c6e8 100644
--- a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java
+++ b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java
@@ -116,7 +116,7 @@ public void writeSingleRecordInternal(Row row) throws WriteRecordException{
@Override
protected void writeMultipleRecordsInternal() throws Exception {
- throw new UnsupportedOperationException();
+ notSupportBatchWrite("OdpsWriter");
}
private Record row2record(Row row, String[] columnTypes) throws WriteRecordException {
diff --git a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java
index 95c7dad246..ee3d21d10a 100644
--- a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java
+++ b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java
@@ -76,5 +76,7 @@ protected void checkFormat() {
if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){
throw new UnsupportedOperationException("This plugin not support restore from failed state");
}
+
+ notSupportBatchWrite("OdpsWriter");
}
}
diff --git a/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java b/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java
index 5033bf64af..bc4b88f3ba 100644
--- a/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java
+++ b/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java
@@ -36,7 +36,7 @@ public class OracleDatabaseMeta extends BaseDatabaseMeta {
public String quoteTable(String table) {
table = table.replace("\"","");
String[] part = table.split("\\.");
- if(part.length == 2) {
+ if(part.length == DB_TABLE_PART_SIZE) {
table = getStartQuote() + part[0] + getEndQuote() + "." + getStartQuote() + part[1] + getEndQuote();
} else {
table = getStartQuote() + table + getEndQuote();
diff --git a/flinkx-oracle/flinkx-oracle-reader/pom.xml b/flinkx-oracle/flinkx-oracle-reader/pom.xml
index 52ff1880be..225d8e2554 100644
--- a/flinkx-oracle/flinkx-oracle-reader/pom.xml
+++ b/flinkx-oracle/flinkx-oracle-reader/pom.xml
@@ -62,8 +62,12 @@
shade.oracleReader.io.netty
- com.google.guava
- shade.oracleReader.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-oracle/flinkx-oracle-reader/src/test/java/com/dtstack/flinkx/oracle/reader/test/OracleLocalTest.java b/flinkx-oracle/flinkx-oracle-reader/src/test/java/com/dtstack/flinkx/oracle/reader/test/OracleLocalTest.java
deleted file mode 100644
index d4227ff774..0000000000
--- a/flinkx-oracle/flinkx-oracle-reader/src/test/java/com/dtstack/flinkx/oracle/reader/test/OracleLocalTest.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package com.dtstack.flinkx.oracle.reader.test;
-
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.SQLException;
-
-/**
- * Created by softfly on 18/2/1.
- */
-public class OracleLocalTest {
- public static void main(String[] args) throws ClassNotFoundException, SQLException {
- final String DRIVER = "oracle.jdbc.OracleDriver";
- final String URL = "jdbc:oracle:thin:dev/pass1234@172.16.8.121:1521:dtstack";
- final String USER = "dev";
- final String PASSWORD = "pass1234";
- Connection connection = null;
- Class.forName(DRIVER);
- connection = DriverManager.getConnection(URL);
-
-
- }
-}
diff --git a/flinkx-oracle/flinkx-oracle-writer/pom.xml b/flinkx-oracle/flinkx-oracle-writer/pom.xml
index c6fc7b4e2b..6e9c1850e7 100644
--- a/flinkx-oracle/flinkx-oracle-writer/pom.xml
+++ b/flinkx-oracle/flinkx-oracle-writer/pom.xml
@@ -63,8 +63,12 @@
shade.oracleWriter.io.netty
- com.google.guava
- shade.oracleWriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java
index ed77a77ea4..98840d5b10 100644
--- a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java
+++ b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java
@@ -18,11 +18,16 @@
package com.dtstack.flinkx.oracle.format;
import com.dtstack.flinkx.enums.ColumnType;
+import com.dtstack.flinkx.oracle.OracleDatabaseMeta;
import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat;
import com.dtstack.flinkx.util.DateUtil;
import org.apache.flink.types.Row;
-import java.sql.*;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
@@ -63,7 +68,7 @@ protected List probeFullColumns(String table, Connection dbConn) throws
String schema =null;
String[] parts = table.split("\\.");
- if(parts.length == 2) {
+ if(parts.length == OracleDatabaseMeta.DB_TABLE_PART_SIZE) {
schema = parts[0].toUpperCase();
table = parts[1];
}
@@ -79,24 +84,26 @@ protected List probeFullColumns(String table, Connection dbConn) throws
@Override
protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException {
Map> map = new HashMap<>(16);
- PreparedStatement ps = dbConn.prepareStatement(String.format(GET_INDEX_SQL,table));
- ResultSet rs = ps.executeQuery();
- while(rs.next()) {
- String indexName = rs.getString("INDEX_NAME");
- if(!map.containsKey(indexName)) {
- map.put(indexName,new ArrayList<>());
+ try (PreparedStatement ps = dbConn.prepareStatement(String.format(GET_INDEX_SQL, table));
+ ResultSet rs = ps.executeQuery()) {
+ while(rs.next()) {
+ String indexName = rs.getString("INDEX_NAME");
+ if(!map.containsKey(indexName)) {
+ map.put(indexName,new ArrayList<>());
+ }
+ map.get(indexName).add(rs.getString("COLUMN_NAME"));
}
- map.get(indexName).add(rs.getString("COLUMN_NAME"));
- }
- Map> retMap = new HashMap<>((map.size()<<2)/3);
- for(Map.Entry> entry: map.entrySet()) {
- String k = entry.getKey();
- List v = entry.getValue();
- if(v!=null && v.size() != 0 && v.get(0) != null) {
- retMap.put(k, v);
+
+ Map> retMap = new HashMap<>((map.size()<<2)/3);
+ for(Map.Entry> entry: map.entrySet()) {
+ String k = entry.getKey();
+ List v = entry.getValue();
+ if(v!=null && v.size() != 0 && v.get(0) != null) {
+ retMap.put(k, v);
+ }
}
+ return retMap;
}
- return retMap;
}
}
diff --git a/flinkx-pgwal/flinkx-pgwal-reader/pom.xml b/flinkx-pgwal/flinkx-pgwal-reader/pom.xml
index 182ea07067..04f327d64e 100644
--- a/flinkx-pgwal/flinkx-pgwal-reader/pom.xml
+++ b/flinkx-pgwal/flinkx-pgwal-reader/pom.xml
@@ -33,6 +33,16 @@
false
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-phoenix/flinkx-phoenix-reader/pom.xml b/flinkx-phoenix/flinkx-phoenix-reader/pom.xml
index 180385dc01..b067a02a7e 100644
--- a/flinkx-phoenix/flinkx-phoenix-reader/pom.xml
+++ b/flinkx-phoenix/flinkx-phoenix-reader/pom.xml
@@ -56,6 +56,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java b/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java
index 6d6b416559..89bdc80968 100644
--- a/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java
+++ b/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java
@@ -29,7 +29,6 @@
import java.io.IOException;
import java.sql.SQLException;
-import java.sql.Statement;
import static com.dtstack.flinkx.rdb.util.DbUtil.clobToString;
@@ -65,7 +64,7 @@ public void openInternal(InputSplit inputSplit) throws IOException {
// 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用
dbConn.setAutoCommit(false);
- Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency);
+ statement = dbConn.createStatement(resultSetType, resultSetConcurrency);
statement.setFetchSize(0);
diff --git a/flinkx-phoenix/flinkx-phoenix-writer/pom.xml b/flinkx-phoenix/flinkx-phoenix-writer/pom.xml
index b97dab27c8..1dc16435a8 100644
--- a/flinkx-phoenix/flinkx-phoenix-writer/pom.xml
+++ b/flinkx-phoenix/flinkx-phoenix-writer/pom.xml
@@ -56,6 +56,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-polardb/flinkx-polardb-dreader/pom.xml b/flinkx-polardb/flinkx-polardb-dreader/pom.xml
index d477787cac..26060ebcf6 100644
--- a/flinkx-polardb/flinkx-polardb-dreader/pom.xml
+++ b/flinkx-polardb/flinkx-polardb-dreader/pom.xml
@@ -56,6 +56,14 @@
com.mysqlshade.polardb.com.mysql
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
diff --git a/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java b/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java
index b7b3e95855..5e21aec025 100644
--- a/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java
+++ b/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java
@@ -21,7 +21,6 @@
import com.dtstack.flinkx.mysqld.reader.MysqldReader;
import com.dtstack.flinkx.polardbd.PolardbDatabaseMeta;
import com.dtstack.flinkx.polardbd.format.PolardbdInputFormat;
-import com.dtstack.flinkx.rdb.inputformat.DistributedJdbcInputFormat;
import com.dtstack.flinkx.rdb.inputformat.DistributedJdbcInputFormatBuilder;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
diff --git a/flinkx-polardb/flinkx-polardb-reader/pom.xml b/flinkx-polardb/flinkx-polardb-reader/pom.xml
index 093bdb5152..77ca955ae3 100644
--- a/flinkx-polardb/flinkx-polardb-reader/pom.xml
+++ b/flinkx-polardb/flinkx-polardb-reader/pom.xml
@@ -55,6 +55,14 @@
com.mysqlshade.polardb.com.mysql
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
diff --git a/flinkx-polardb/flinkx-polardb-writer/pom.xml b/flinkx-polardb/flinkx-polardb-writer/pom.xml
index d088128085..4ca3fb7511 100644
--- a/flinkx-polardb/flinkx-polardb-writer/pom.xml
+++ b/flinkx-polardb/flinkx-polardb-writer/pom.xml
@@ -56,6 +56,14 @@
com.mysqlshade.polardb.com.mysql
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
diff --git a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java
index 985b99efaa..4af93b9480 100644
--- a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java
+++ b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java
@@ -20,10 +20,7 @@
import com.dtstack.flinkx.enums.EDatabaseType;
import com.dtstack.flinkx.rdb.BaseDatabaseMeta;
-import org.apache.commons.lang3.StringUtils;
-import java.util.ArrayList;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
diff --git a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java
index 190ff818b1..04f5616f23 100644
--- a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java
+++ b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java
@@ -19,8 +19,10 @@
package com.dtstack.flinkx.postgresql;
import com.dtstack.flinkx.rdb.type.TypeConverterInterface;
+import org.apache.commons.lang3.StringUtils;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
/**
@@ -35,9 +37,9 @@ public class PostgresqlTypeConverter implements TypeConverterInterface {
private List byteTypes = Arrays.asList("bytea","bit varying");
- private List bitTypes = Arrays.asList("bit");
+ private List bitTypes = Collections.singletonList("bit");
- private List doubleTypes = Arrays.asList("money");
+ private List doubleTypes = Collections.singletonList("money");
private List intTypes = Arrays.asList("int","int2","int4","int8");
@@ -46,19 +48,24 @@ public Object convert(Object data,String typeName) {
if (data == null){
return null;
}
-
+ String dataValue = data.toString();
+ if(stringTypes.contains(typeName)){
+ return dataValue;
+ }
+ if(StringUtils.isBlank(dataValue)){
+ return null;
+ }
if(doubleTypes.contains(typeName)){
- data = Double.parseDouble(String.valueOf(data));
+ if(StringUtils.startsWith(dataValue, "$")){
+ dataValue = StringUtils.substring(dataValue, 1);
+ }
+ data = Double.parseDouble(dataValue);
} else if(bitTypes.contains(typeName)){
//
- } else if(stringTypes.contains(typeName)){
- data = String.valueOf(data);
- } else if(byteTypes.contains(typeName)){
- data = Byte.valueOf(String.valueOf(data));
+ }else if(byteTypes.contains(typeName)){
+ data = Byte.valueOf(dataValue);
} else if(intTypes.contains(typeName)){
- if(data instanceof String){
- data = Integer.parseInt(data.toString());
- }
+ data = Integer.parseInt(dataValue);
}
return data;
diff --git a/flinkx-postgresql/flinkx-postgresql-reader/pom.xml b/flinkx-postgresql/flinkx-postgresql-reader/pom.xml
index f103f55d85..6e3b8612d6 100644
--- a/flinkx-postgresql/flinkx-postgresql-reader/pom.xml
+++ b/flinkx-postgresql/flinkx-postgresql-reader/pom.xml
@@ -63,8 +63,12 @@
shade.postgresqlreader.io.netty
- com.google
- shade.postgresqlreader.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java
index 5ba10faad7..790f66f7e9 100644
--- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java
+++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java
@@ -19,6 +19,7 @@
package com.dtstack.flinkx.postgresql.format;
import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat;
+import com.dtstack.flinkx.rdb.inputformat.JdbcInputSplit;
import com.dtstack.flinkx.rdb.util.DbUtil;
import com.dtstack.flinkx.util.ClassUtil;
import org.apache.commons.collections.CollectionUtils;
@@ -49,7 +50,9 @@ public void openInternal(InputSplit inputSplit) throws IOException {
String startLocation = incrementConfig.getStartLocation();
if (incrementConfig.isPolling()) {
- endLocationAccumulator.add(Long.parseLong(startLocation));
+ if (StringUtils.isNotEmpty(startLocation)) {
+ endLocationAccumulator.add(Long.parseLong(startLocation));
+ }
isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType());
} else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) {
getMaxValue(inputSplit);
@@ -62,6 +65,10 @@ public void openInternal(InputSplit inputSplit) throws IOException {
}
querySql = buildQuerySql(inputSplit);
+ JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit;
+ if (null != jdbcInputSplit.getStartLocation()) {
+ startLocation = jdbcInputSplit.getStartLocation();
+ }
executeQuery(startLocation);
columnCount = resultSet.getMetaData().getColumnCount();
boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("(");
diff --git a/flinkx-postgresql/flinkx-postgresql-writer/pom.xml b/flinkx-postgresql/flinkx-postgresql-writer/pom.xml
index 2264b0dddb..7f6857ea04 100644
--- a/flinkx-postgresql/flinkx-postgresql-writer/pom.xml
+++ b/flinkx-postgresql/flinkx-postgresql-writer/pom.xml
@@ -62,8 +62,12 @@
shade.postgresqlwriter.io.netty
- com.google
- shade.postgresqlwriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-pulsar/flinkx-pulsar-writer/pom.xml b/flinkx-pulsar/flinkx-pulsar-writer/pom.xml
index 72c8493094..88396482f4 100644
--- a/flinkx-pulsar/flinkx-pulsar-writer/pom.xml
+++ b/flinkx-pulsar/flinkx-pulsar-writer/pom.xml
@@ -11,11 +11,8 @@
flinkx-pulsar-writer
-
-
-
org.apache.maven.pluginsmaven-shade-plugin
diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java
index b12d1f8ece..28cd09eaad 100644
--- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java
+++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package com.dtstack.flinkx.pulsar.writer;
/**
diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java
index 55aeee7dda..fdfab032da 100644
--- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java
+++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package com.dtstack.flinkx.pulsar.writer;
import com.dtstack.flinkx.exception.WriteRecordException;
@@ -92,7 +109,7 @@ protected void emit(Map event) throws IOException {
}
@Override
- protected void writeMultipleRecordsInternal() throws Exception {
+ protected void writeMultipleRecordsInternal() {
throw new UnsupportedOperationException();
}
diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java
index 637a97e67a..c6e35cc79c 100644
--- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java
+++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package com.dtstack.flinkx.pulsar.writer;
import com.dtstack.flinkx.outputformat.BaseRichOutputFormatBuilder;
diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java
index ae84b857f1..2068128992 100644
--- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java
+++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java
@@ -1,8 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package com.dtstack.flinkx.pulsar.writer;
import com.dtstack.flinkx.config.DataTransferConfig;
import com.dtstack.flinkx.writer.BaseDataWriter;
-import static com.dtstack.flinkx.pulsar.writer.Constants.*;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.types.Row;
@@ -10,6 +26,8 @@
import java.util.List;
import java.util.Map;
+import static com.dtstack.flinkx.pulsar.writer.Constants.*;
+
/**
* @author: pierre
@@ -22,6 +40,7 @@ public class PulsarWriter extends BaseDataWriter {
protected List tableFields;
protected Map producerSettings;
+ @SuppressWarnings("unchecked")
public PulsarWriter(DataTransferConfig config){
super(config);
topic = config.getJob().getContent().get(0).getWriter().getParameter().getStringVal(KEY_TOPIC);
diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java
index ad667ceb13..758be23ace 100644
--- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java
+++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java
@@ -21,7 +21,11 @@
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import java.io.Serializable;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
/**
* Abstract base parent class of other database prototype implementations
@@ -31,6 +35,8 @@
*/
public abstract class BaseDatabaseMeta implements DatabaseInterface, Serializable {
+ public static final int DB_TABLE_PART_SIZE = 2;
+
@Override
public String getStartQuote() {
return "\"";
diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java
index 26269c7c34..88477b16b3 100644
--- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java
+++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java
@@ -18,21 +18,17 @@
package com.dtstack.flinkx.rdb.util;
import com.dtstack.flinkx.constants.ConstantValue;
-import com.dtstack.flinkx.rdb.DatabaseInterface;
import com.dtstack.flinkx.rdb.ParameterValuesProvider;
-import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.ClassUtil;
import com.dtstack.flinkx.util.ExceptionUtil;
import com.dtstack.flinkx.util.SysUtil;
import com.dtstack.flinkx.util.TelnetUtil;
-import com.google.gson.Gson;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.util.CollectionUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
-import java.math.BigDecimal;
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
@@ -41,9 +37,9 @@
import java.util.regex.Pattern;
/**
+ *
* Utilities for relational database connection and sql execution
* company: www.dtstack.com
- *
* @author huyifan_zju@
*/
public class DbUtil {
@@ -72,6 +68,9 @@ public class DbUtil {
*/
private static int NANOS_LENGTH = 19;
+ public static int NANOS_PART_LENGTH = 9;
+ private static int FORMAT_TIME_NANOS_LENGTH = 29;
+
/**
* jdbc连接URL的分割正则,用于获取URL?后的连接参数
*/
@@ -93,16 +92,15 @@ public class DbUtil {
/**
* 获取jdbc连接(超时10S)
- *
- * @param url url
- * @param username 账号
- * @param password 密码
+ * @param url url
+ * @param username 账号
+ * @param password 密码
* @return
* @throws SQLException
*/
private static Connection getConnectionInternal(String url, String username, String password) throws SQLException {
Connection dbConn;
- synchronized (ClassUtil.LOCK_STR) {
+ synchronized (ClassUtil.LOCK_STR){
DriverManager.setLoginTimeout(10);
// telnet
@@ -120,10 +118,9 @@ private static Connection getConnectionInternal(String url, String username, Str
/**
* 获取jdbc连接(重试3次)
- *
- * @param url url
- * @param username 账号
- * @param password 密码
+ * @param url url
+ * @param username 账号
+ * @param password 密码
* @return
* @throws SQLException
*/
@@ -136,8 +133,10 @@ public static Connection getConnection(String url, String username, String passw
for (int i = 0; i < MAX_RETRY_TIMES && failed; ++i) {
try {
dbConn = getConnectionInternal(url, username, password);
- dbConn.createStatement().execute("select 111");
- failed = false;
+ try (Statement statement = dbConn.createStatement()){
+ statement.execute("select 111");
+ failed = false;
+ }
} catch (Exception e) {
if (dbConn != null) {
dbConn.close();
@@ -157,10 +156,9 @@ public static Connection getConnection(String url, String username, String passw
/**
* 关闭连接资源
- *
- * @param rs ResultSet
- * @param stmt Statement
- * @param conn Connection
+ * @param rs ResultSet
+ * @param stmt Statement
+ * @param conn Connection
* @param commit
*/
public static void closeDbResources(ResultSet rs, Statement stmt, Connection conn, boolean commit) {
@@ -182,7 +180,7 @@ public static void closeDbResources(ResultSet rs, Statement stmt, Connection con
if (null != conn) {
try {
- if (commit) {
+ if(commit){
commit(conn);
}
@@ -195,76 +193,49 @@ public static void closeDbResources(ResultSet rs, Statement stmt, Connection con
/**
* 手动提交事物
- *
* @param conn Connection
*/
- public static void commit(Connection conn) {
+ public static void commit(Connection conn){
try {
- if (!conn.isClosed() && !conn.getAutoCommit()) {
+ if (!conn.isClosed() && !conn.getAutoCommit()){
conn.commit();
}
- } catch (SQLException e) {
+ } catch (SQLException e){
LOG.warn("commit error:{}", ExceptionUtil.getErrorMessage(e));
}
}
/**
* 批量执行sql
- *
* @param dbConn Connection
* @param sqls sql列表
*/
public static void executeBatch(Connection dbConn, List sqls) {
- if (sqls == null || sqls.size() == 0) {
+ if(sqls == null || sqls.size() == 0) {
return;
}
- try {
- Statement stmt = dbConn.createStatement();
- for (String sql : sqls) {
+ try (Statement stmt = dbConn.createStatement()) {
+ for(String sql : sqls) {
stmt.addBatch(sql);
}
stmt.executeBatch();
} catch (SQLException e) {
- throw new RuntimeException("execute batch sql error:{}", e);
+ throw new RuntimeException("execute batch sql error:{}",e);
} finally {
commit(dbConn);
}
}
- /**
- * 获取某数据库某表的主键和唯一索引
- *
- * @param table 表名
- * @param dbConn 数据库连接
- * @return
- * @throws SQLException
- */
- public static Map> getPrimaryOrUniqueKeys(String table, Connection dbConn) throws SQLException {
- Map> keyMap = new HashMap<>(16);
- DatabaseMetaData meta = dbConn.getMetaData();
- ResultSet rs = meta.getIndexInfo(null, null, table, true, false);
- while (rs.next()) {
- String pkName = rs.getString(6);
- String columnName = rs.getString(9);
- if (!keyMap.containsKey(pkName)) {
- keyMap.put(pkName, new ArrayList<>());
- }
- keyMap.get(pkName).add(columnName);
- }
- return keyMap;
- }
-
/**
* 封装channel通道顺序
- *
* @param channels
* @return
*/
- public static Object[][] getParameterValues(final int channels) {
+ public static Object[][] getParameterValues(final int channels){
ParameterValuesProvider provider = () -> {
Integer[][] parameters = new Integer[channels][];
- for (int i = 0; i < channels; ++i) {
+ for(int i = 0; i < channels; ++i) {
parameters[i] = new Integer[2];
parameters[i][0] = channels;
parameters[i][1] = i;
@@ -295,62 +266,20 @@ public static List analyzeColumnType(ResultSet resultSet){
return columnTypeList;
}
- /**
- * 占位符设值
- *
- * @param param 参数
- * @param statement PreparedStatement
- * @param i 占位符位置
- * @throws SQLException
- */
- public static void setParameterValue(Object param, PreparedStatement statement, int i) throws SQLException {
- if (param instanceof String) {
- statement.setString(i + 1, (String) param);
- } else if (param instanceof Long) {
- statement.setLong(i + 1, (Long) param);
- } else if (param instanceof Integer) {
- statement.setInt(i + 1, (Integer) param);
- } else if (param instanceof Double) {
- statement.setDouble(i + 1, (Double) param);
- } else if (param instanceof Boolean) {
- statement.setBoolean(i + 1, (Boolean) param);
- } else if (param instanceof Float) {
- statement.setFloat(i + 1, (Float) param);
- } else if (param instanceof BigDecimal) {
- statement.setBigDecimal(i + 1, (BigDecimal) param);
- } else if (param instanceof Byte) {
- statement.setByte(i + 1, (Byte) param);
- } else if (param instanceof Short) {
- statement.setShort(i + 1, (Short) param);
- } else if (param instanceof Date) {
- statement.setDate(i + 1, (Date) param);
- } else if (param instanceof Time) {
- statement.setTime(i + 1, (Time) param);
- } else if (param instanceof Timestamp) {
- statement.setTimestamp(i + 1, (Timestamp) param);
- } else if (param instanceof Array) {
- statement.setArray(i + 1, (Array) param);
- } else {
- //extends with other types if needed
- throw new IllegalArgumentException("open() failed. Parameter " + i + " of type " + param.getClass() + " is not handled (yet).");
- }
- }
-
/**
* clob转string
- *
- * @param obj clob
+ * @param obj clob
* @return
* @throws Exception
*/
- public static Object clobToString(Object obj) throws Exception {
+ public static Object clobToString(Object obj) throws Exception{
String dataStr;
- if (obj instanceof Clob) {
- Clob clob = (Clob) obj;
+ if(obj instanceof Clob){
+ Clob clob = (Clob)obj;
BufferedReader bf = new BufferedReader(clob.getCharacterStream());
StringBuilder stringBuilder = new StringBuilder();
String line;
- while ((line = bf.readLine()) != null) {
+ while ((line = bf.readLine()) != null){
stringBuilder.append(line);
}
dataStr = stringBuilder.toString();
@@ -363,34 +292,32 @@ public static Object clobToString(Object obj) throws Exception {
/**
* 获取纳秒字符串
- *
- * @param timeStr
+ * @param timeStr 2020-03-23 11:03:22.000000000
* @return
*/
- public static String getNanosTimeStr(String timeStr) {
- if (timeStr.length() < 29) {
- timeStr += StringUtils.repeat("0", 29 - timeStr.length());
+ public static String getNanosTimeStr(String timeStr){
+ if(timeStr.length() < FORMAT_TIME_NANOS_LENGTH){
+ timeStr += StringUtils.repeat("0",FORMAT_TIME_NANOS_LENGTH - timeStr.length());
}
return timeStr;
}
/**
* 将边界位置时间转换成对应饿的纳秒时间
- *
* @param startLocation 边界位置(起始/结束)
* @return
*/
- public static int getNanos(long startLocation) {
+ public static int getNanos(long startLocation){
String timeStr = String.valueOf(startLocation);
int nanos;
- if (timeStr.length() == SECOND_LENGTH) {
+ if (timeStr.length() == SECOND_LENGTH){
nanos = 0;
- } else if (timeStr.length() == MILLIS_LENGTH) {
- nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH, MILLIS_LENGTH)) * 1000000;
- } else if (timeStr.length() == MICRO_LENGTH) {
- nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH, MICRO_LENGTH)) * 1000;
- } else if (timeStr.length() == NANOS_LENGTH) {
- nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH, NANOS_LENGTH));
+ } else if (timeStr.length() == MILLIS_LENGTH){
+ nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH,MILLIS_LENGTH)) * 1000000;
+ } else if (timeStr.length() == MICRO_LENGTH){
+ nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH,MICRO_LENGTH)) * 1000;
+ } else if (timeStr.length() == NANOS_LENGTH){
+ nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH,NANOS_LENGTH));
} else {
throw new IllegalArgumentException("Unknown time unit:startLocation=" + startLocation);
}
@@ -400,20 +327,19 @@ public static int getNanos(long startLocation) {
/**
* 将边界位置时间转换成对应饿的毫秒时间
- *
- * @param startLocation 边界位置(起始/结束)
+ * @param startLocation 边界位置(起始/结束)
* @return
*/
- public static long getMillis(long startLocation) {
+ public static long getMillis(long startLocation){
String timeStr = String.valueOf(startLocation);
long millisSecond;
- if (timeStr.length() == SECOND_LENGTH) {
+ if (timeStr.length() == SECOND_LENGTH){
millisSecond = startLocation * 1000;
- } else if (timeStr.length() == MILLIS_LENGTH) {
+ } else if (timeStr.length() == MILLIS_LENGTH){
millisSecond = startLocation;
- } else if (timeStr.length() == MICRO_LENGTH) {
+ } else if (timeStr.length() == MICRO_LENGTH){
millisSecond = startLocation / 1000;
- } else if (timeStr.length() == NANOS_LENGTH) {
+ } else if (timeStr.length() == NANOS_LENGTH){
millisSecond = startLocation / 1000000;
} else {
throw new IllegalArgumentException("Unknown time unit:startLocation=" + startLocation);
@@ -424,24 +350,23 @@ public static long getMillis(long startLocation) {
/**
* 格式化jdbc连接
- *
- * @param dbUrl 原jdbc连接
- * @param extParamMap 需要额外添加的参数
- * @return 格式化后jdbc连接URL字符串
+ * @param dbUrl 原jdbc连接
+ * @param extParamMap 需要额外添加的参数
+ * @return 格式化后jdbc连接URL字符串
*/
- public static String formatJdbcUrl(String dbUrl, Map extParamMap) {
+ public static String formatJdbcUrl(String dbUrl, Map extParamMap){
String[] splits = DB_PATTERN.split(dbUrl);
- Map paramMap = new HashMap<>(16);
- if (splits.length > 1) {
+ Map paramMap = new HashMap<>(16);
+ if(splits.length > 1) {
String[] pairs = splits[1].split("&");
- for (String pair : pairs) {
+ for(String pair : pairs) {
String[] leftRight = pair.split("=");
paramMap.put(leftRight[0], leftRight[1]);
}
}
- if (!CollectionUtil.isNullOrEmpty(extParamMap)) {
+ if(!CollectionUtil.isNullOrEmpty(extParamMap)){
paramMap.putAll(extParamMap);
}
paramMap.put("useCursorFetch", "true");
@@ -450,8 +375,8 @@ public static String formatJdbcUrl(String dbUrl, Map extParamMap
StringBuffer sb = new StringBuffer(dbUrl.length() + 128);
sb.append(splits[0]).append("?");
int index = 0;
- for (Map.Entry entry : paramMap.entrySet()) {
- if (index != 0) {
+ for(Map.Entry entry : paramMap.entrySet()) {
+ if(index != 0) {
sb.append("&");
}
sb.append(entry.getKey()).append("=").append(entry.getValue());
diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java
index 4faa32e0ce..f4442111d7 100644
--- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java
+++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java
@@ -107,8 +107,8 @@ protected DistributedJdbcInputFormatBuilder getBuilder(){
throw new RuntimeException("子类必须覆盖getBuilder方法");
}
- protected List buildConnections(){
- List sourceList = new ArrayList<>(connectionConfigs.size());
+ protected ArrayList buildConnections(){
+ ArrayList sourceList = new ArrayList<>(connectionConfigs.size());
for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) {
String curUsername = (StringUtils.isBlank(connectionConfig.getUsername())) ? username : connectionConfig.getUsername();
String curPassword = (StringUtils.isBlank(connectionConfig.getPassword())) ? password : connectionConfig.getPassword();
diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java
index 2d33a29161..6e9534b554 100644
--- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java
+++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java
@@ -30,7 +30,11 @@
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.types.Row;
-import java.io.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
@@ -65,7 +69,7 @@ public class DistributedJdbcInputFormat extends BaseRichInputFormat {
protected List descColumnTypeList;
- protected List sourceList;
+ protected ArrayList sourceList;
protected transient int sourceIndex;
@@ -197,7 +201,7 @@ protected void closeCurrentSource(){
@Override
protected void closeInternal() throws IOException {
-
+ closeCurrentSource();
}
@Override
@@ -208,7 +212,7 @@ public InputSplit[] createInputSplitsInternal(int minPart) throws IOException {
Object[][] parmeter = DbUtil.getParameterValues(numPartitions);
for (int j = 0; j < numPartitions; j++) {
DistributedJdbcInputSplit split = new DistributedJdbcInputSplit(j,numPartitions);
- List sourceCopy = deepCopyList(sourceList);
+ ArrayList sourceCopy = deepCopyList(sourceList);
for (int i = 0; i < sourceCopy.size(); i++) {
sourceCopy.get(i).setSplitByKey(true);
sourceCopy.get(i).setParameterValues(parmeter[j]);
@@ -221,7 +225,9 @@ public InputSplit[] createInputSplitsInternal(int minPart) throws IOException {
if (partNum == 0){
for (int i = 0; i < sourceList.size(); i++) {
DistributedJdbcInputSplit split = new DistributedJdbcInputSplit(i,numPartitions);
- split.setSourceList(Arrays.asList(sourceList.get(i)));
+ ArrayList arrayList = new ArrayList<>();
+ arrayList.add(sourceList.get(i));
+ split.setSourceList(arrayList);
inputSplits[i] = split;
}
} else {
@@ -250,7 +256,7 @@ public boolean reachedEnd() throws IOException {
return readNextRecord();
}
- public List deepCopyList(List src) throws IOException{
+ public ArrayList deepCopyList(ArrayList src) throws IOException{
try {
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
ObjectOutputStream out = new ObjectOutputStream(byteOut);
@@ -258,7 +264,7 @@ public List deepCopyList(List src) throws IOException{
ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
ObjectInputStream in = new ObjectInputStream(byteIn);
- List dest = (List) in.readObject();
+ ArrayList dest = (ArrayList) in.readObject();
return dest;
} catch (Exception e){
diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java
index 28f2c2c5f1..0c8b21c07f 100644
--- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java
+++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java
@@ -24,6 +24,7 @@
import com.dtstack.flinkx.reader.MetaColumn;
import org.apache.commons.lang.StringUtils;
+import java.util.ArrayList;
import java.util.List;
/**
@@ -65,7 +66,7 @@ public void setSplitKey(String splitKey){
format.splitKey = splitKey;
}
- public void setSourceList(List sourceList){
+ public void setSourceList(ArrayList sourceList){
format.sourceList = sourceList;
}
diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java
index 724f9532d5..43df7f5c88 100644
--- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java
+++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java
@@ -32,7 +32,7 @@
*/
public class DistributedJdbcInputSplit extends GenericInputSplit {
- private List sourceList;
+ private ArrayList sourceList;
public DistributedJdbcInputSplit(int partitionNumber, int totalNumberOfPartitions) {
super(partitionNumber, totalNumberOfPartitions);
@@ -54,11 +54,11 @@ public void addSource(DataSource source){
this.sourceList.add(source);
}
- public List getSourceList() {
+ public ArrayList getSourceList() {
return sourceList;
}
- public void setSourceList(List sourceList) {
+ public void setSourceList(ArrayList sourceList) {
this.sourceList = sourceList;
}
}
diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java
index 706600d712..d71fa41d16 100644
--- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java
+++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java
@@ -27,12 +27,8 @@
import com.dtstack.flinkx.rdb.type.TypeConverterInterface;
import com.dtstack.flinkx.rdb.util.DbUtil;
import com.dtstack.flinkx.reader.MetaColumn;
-import com.dtstack.flinkx.util.*;
import com.dtstack.flinkx.restore.FormatState;
-import com.dtstack.flinkx.util.ClassUtil;
-import com.dtstack.flinkx.util.DateUtil;
-import com.dtstack.flinkx.util.StringUtil;
-import com.dtstack.flinkx.util.UrlUtil;
+import com.dtstack.flinkx.util.*;
import com.google.gson.Gson;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.accumulators.LongMaximum;
@@ -48,7 +44,9 @@
import java.io.IOException;
import java.sql.*;
import java.util.Date;
-import java.util.*;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
@@ -161,7 +159,9 @@ public void openInternal(InputSplit inputSplit) throws IOException {
initMetric(inputSplit);
String startLocation = incrementConfig.getStartLocation();
if (incrementConfig.isPolling()) {
- endLocationAccumulator.add(Long.parseLong(startLocation));
+ if (StringUtils.isNotEmpty(startLocation)) {
+ endLocationAccumulator.add(Long.parseLong(startLocation));
+ }
isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType());
} else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) {
getMaxValue(inputSplit);
@@ -252,6 +252,12 @@ public Row nextRecordInternal(Row row) throws IOException {
boolean isUpdateLocation = incrementConfig.isPolling() || (incrementConfig.isIncrement() && !incrementConfig.isUseMaxFunc());
if (isUpdateLocation) {
Object incrementVal = resultSet.getObject(incrementConfig.getColumnName());
+ if(incrementVal != null) {
+ if((incrementVal instanceof java.util.Date
+ || incrementVal.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) {
+ incrementVal = resultSet.getTimestamp(incrementConfig.getColumnName());
+ }
+ }
String location;
if(incrementConfig.isPolling()){
location = String.valueOf(incrementVal);
@@ -371,7 +377,7 @@ public String getMaxValueFromApi(){
}
String url = monitorUrls;
- if (monitorUrls.startsWith("http")) {
+ if (monitorUrls.startsWith(ConstantValue.PROTOCOL_HTTP)) {
url = String.format("%s/jobs/%s/accumulators", monitorUrls, jobId);
}
@@ -707,10 +713,10 @@ private String getLocation(String columnType, Object columnVal) {
long time = ((Timestamp) columnVal).getTime() / 1000;
String nanosStr = String.valueOf(((Timestamp) columnVal).getNanos());
- if (nanosStr.length() == 9) {
+ if (nanosStr.length() == DbUtil.NANOS_PART_LENGTH) {
location = time + nanosStr;
} else {
- String fillZeroStr = StringUtils.repeat("0", 9 - nanosStr.length());
+ String fillZeroStr = StringUtils.repeat("0", DbUtil.NANOS_PART_LENGTH - nanosStr.length());
location = time + fillZeroStr + nanosStr;
}
} else {
@@ -808,7 +814,7 @@ protected void executeQuery(String startLocation) throws SQLException {
queryForPolling(startLocation);
}
} else {
- Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency);
+ statement = dbConn.createStatement(resultSetType, resultSetConcurrency);
statement.setFetchSize(fetchSize);
statement.setQueryTimeout(queryTimeOut);
resultSet = statement.executeQuery(querySql);
diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java
index cb48f98b59..ae788328f2 100644
--- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java
+++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java
@@ -18,6 +18,7 @@
package com.dtstack.flinkx.rdb.inputformat;
+import com.dtstack.flinkx.constants.ConstantValue;
import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder;
import com.dtstack.flinkx.rdb.DatabaseInterface;
import com.dtstack.flinkx.rdb.datareader.IncrementConfig;
@@ -128,6 +129,10 @@ protected void checkFormat() {
if (StringUtils.isEmpty(format.splitKey) && format.numPartitions > 1){
throw new IllegalArgumentException("Must specify the split column when the channel is greater than 1");
}
+
+ if (format.fetchSize > ConstantValue.MAX_BATCH_SIZE) {
+ throw new IllegalArgumentException("批量读取条数必须小于[200000]条");
+ }
}
}
diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java
index 2a94afb8ff..a07eb3473c 100644
--- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java
+++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java
@@ -32,7 +32,15 @@
import java.util.List;
import java.util.Map;
-import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.*;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_BATCH_SIZE;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_FULL_COLUMN;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_INSERT_SQL_MODE;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_PASSWORD;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_POST_SQL;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_PRE_SQL;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_UPDATE_KEY;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_USERNAME;
+import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_WRITE_MODE;
/**
* The Writer plugin for any database that can be connected via JDBC.
diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java
index bd2dc5afe1..47ab767ef3 100644
--- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java
+++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java
@@ -30,12 +30,19 @@
import com.google.gson.Gson;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.ObjectUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.sql.*;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -107,6 +114,7 @@ public class JdbcOutputFormat extends BaseRichOutputFormat {
"AND t.table_name = '%s'";
protected final static String CONN_CLOSE_ERROR_MSG = "No operations allowed";
+ protected static List STRING_TYPES = Arrays.asList("CHAR", "VARCHAR","TINYBLOB","TINYTEXT","BLOB","TEXT", "MEDIUMBLOB", "MEDIUMTEXT", "LONGBLOB", "LONGTEXT");
protected PreparedStatement prepareTemplates() throws SQLException {
if(CollectionUtils.isEmpty(fullColumn)) {
@@ -202,7 +210,13 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException {
int index = 0;
try {
for (; index < row.getArity(); index++) {
- preparedStatement.setObject(index+1,getField(row,index));
+ Object object = row.getField(index);
+ if( object instanceof String && StringUtils.isBlank((String) object)){
+ if(!STRING_TYPES.contains(columnType.get(index))){
+ object = null;
+ }
+ }
+ preparedStatement.setObject(index+1, object);
}
preparedStatement.execute();
@@ -233,8 +247,14 @@ protected String recordConvertDetailErrorMessage(int pos, Row row) {
protected void writeMultipleRecordsInternal() throws Exception {
try {
for (Row row : rows) {
- for (int j = 0; j < row.getArity(); ++j) {
- preparedStatement.setObject(j + 1, getField(row, j));
+ for (int index = 0; index < row.getArity(); index++) {
+ Object object = row.getField(index);
+ if( object instanceof String && StringUtils.isBlank((String) object)){
+ if(!STRING_TYPES.contains(columnType.get(index))){
+ object = null;
+ }
+ }
+ preparedStatement.setObject(index+1, object);
}
preparedStatement.addBatch();
diff --git a/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java b/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java
index 31f40ecb64..9aed6f527b 100644
--- a/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java
+++ b/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java
@@ -19,13 +19,18 @@
package com.dtstack.flinkx.redis;
import com.dtstack.flinkx.util.TelnetUtil;
-import redis.clients.jedis.*;
+import redis.clients.jedis.Jedis;
+import redis.clients.jedis.JedisPool;
+import redis.clients.jedis.JedisPoolConfig;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import static com.dtstack.flinkx.redis.RedisConfigKeys.*;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DB;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_HOST_PORT;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_PASSWORD;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TIMEOUT;
/**
* Utilities for redis database connection
diff --git a/flinkx-redis/flinkx-redis-writer/pom.xml b/flinkx-redis/flinkx-redis-writer/pom.xml
index 4a92436c6b..891f05acf5 100644
--- a/flinkx-redis/flinkx-redis-writer/pom.xml
+++ b/flinkx-redis/flinkx-redis-writer/pom.xml
@@ -56,8 +56,12 @@
shade.rediswriter.io.netty
- com.google
- shade.rediswriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java
index 7fe2fe9358..2fc85d1fd3 100644
--- a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java
+++ b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java
@@ -30,9 +30,15 @@
import java.io.IOException;
import java.text.SimpleDateFormat;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
-import static com.dtstack.flinkx.redis.RedisConfigKeys.*;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DB;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_HOST_PORT;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_PASSWORD;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TIMEOUT;
/**
* OutputFormat for writing data to redis database.
@@ -70,6 +76,8 @@ public class RedisOutputFormat extends BaseRichOutputFormat {
private static final int CRITICAL_TIME = 60 * 60 * 24 * 30;
+ private static final int KEY_VALUE_SIZE = 2;
+
@Override
public void configure(Configuration parameters) {
super.configure(parameters);
@@ -138,7 +146,7 @@ private void processTimeFormat(Row row){
}
private List getFieldAndValue(Row row){
- if(row.getArity() - keyIndexes.size() != 2){
+ if(row.getArity() - keyIndexes.size() != KEY_VALUE_SIZE){
throw new IllegalArgumentException("Each row record can have only one pair of attributes and values except key");
}
@@ -185,7 +193,7 @@ private String concatKey(Row row){
@Override
protected void writeMultipleRecordsInternal() throws Exception {
- // Still not supported
+ notSupportBatchWrite("RedisWriter");
}
@Override
diff --git a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java
index e9480525ae..3b291ece1c 100644
--- a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java
+++ b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java
@@ -95,5 +95,7 @@ protected void checkFormat() {
if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){
throw new UnsupportedOperationException("This plugin not support restore from failed state");
}
+
+ notSupportBatchWrite("RedisWriter");
}
}
diff --git a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java
index c64e2a3b29..fa33f2c739 100644
--- a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java
+++ b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java
@@ -31,7 +31,18 @@
import java.util.ArrayList;
import java.util.List;
-import static com.dtstack.flinkx.redis.RedisConfigKeys.*;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_BATCH_SIZE;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DATE_FORMAT;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DB;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_EXPIRE_TIME;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_HOST_PORT;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_KEY_FIELD_DELIMITER;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_KEY_INDEXES;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_MODE;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_PASSWORD;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TIMEOUT;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TYPE;
+import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_VALUE_FIELD_DELIMITER;
/**
* The writer plugin for redis database
diff --git a/flinkx-restapi/flinkx-restapi-core/pom.xml b/flinkx-restapi/flinkx-restapi-core/pom.xml
new file mode 100644
index 0000000000..742c77488d
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-core/pom.xml
@@ -0,0 +1,20 @@
+
+
+
+ flinkx-restapi
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-restapi-core
+
+
+ org.apache.httpcomponents
+ httpclient
+ ${http.version}
+
+
+
\ No newline at end of file
diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpMethod.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpMethod.java
new file mode 100644
index 0000000000..338ba84b1c
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpMethod.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.flinkx.restapi.common;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/13
+ */
+public enum HttpMethod {
+ // http 请求方式
+ GET,
+ POST,
+ PUT,
+ PATCH,
+ DELETE,
+ COPY,
+ HEAD,
+ OPTIONS,
+ LINK,
+ UNLINK,
+ PURGE,
+ LOCK,
+ UNLOCK,
+ PROPFIND,
+ VIEW
+ ;
+
+}
diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpUtil.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpUtil.java
new file mode 100644
index 0000000000..b37770ecb6
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpUtil.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.common;
+
+import com.dtstack.flinkx.util.GsonUtil;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpRequestBase;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/16
+ */
+public class HttpUtil {
+ protected static final Logger LOG = LoggerFactory.getLogger(HttpUtil.class);
+ private static final int COUNT = 32;
+ private static final int TOTAL_COUNT = 1000;
+ private static final int TIME_OUT = 5000;
+ private static final int EXECUTION_COUNT = 5;
+
+ public static CloseableHttpClient getHttpClient() {
+ // 设置自定义的重试策略
+ MyServiceUnavailableRetryStrategy strategy = new MyServiceUnavailableRetryStrategy
+ .Builder()
+ .executionCount(EXECUTION_COUNT)
+ .retryInterval(1000)
+ .build();
+ // 设置自定义的重试Handler
+ MyHttpRequestRetryHandler retryHandler = new MyHttpRequestRetryHandler
+ .Builder()
+ .executionCount(EXECUTION_COUNT)
+ .build();
+ // 设置超时时间
+ RequestConfig requestConfig = RequestConfig.custom()
+ .setConnectTimeout(TIME_OUT)
+ .setConnectionRequestTimeout(TIME_OUT)
+ .setSocketTimeout(TIME_OUT)
+ .build();
+ // 设置Http连接池
+ PoolingHttpClientConnectionManager pcm = new PoolingHttpClientConnectionManager();
+ pcm.setDefaultMaxPerRoute(COUNT);
+ pcm.setMaxTotal(TOTAL_COUNT);
+
+ return HttpClientBuilder.create()
+ .setServiceUnavailableRetryStrategy(strategy)
+ .setRetryHandler(retryHandler)
+ .setDefaultRequestConfig(requestConfig)
+ .setConnectionManager(pcm)
+ .build();
+// return HttpClientBuilder.create().build();
+ }
+
+ public static HttpRequestBase getRequest(String method,
+ Map requestBody,
+ Map header,
+ String url) {
+ LOG.debug("current request url: {} current method:{} \n", url, method);
+ HttpRequestBase request = null;
+
+ if (HttpMethod.GET.name().equalsIgnoreCase(method)) {
+ request = new HttpGet(url);
+ } else if (HttpMethod.POST.name().equalsIgnoreCase(method)) {
+ HttpPost post = new HttpPost(url);
+ post.setEntity(getEntityData(requestBody));
+ request = post;
+ } else {
+ throw new RuntimeException("Unsupported method:" + method);
+ }
+
+ for (Map.Entry entry : header.entrySet()) {
+ request.addHeader(entry.getKey(), entry.getValue());
+ }
+ return request;
+ }
+
+ public static void closeClient(CloseableHttpClient httpClient) {
+ try {
+ httpClient.close();
+ } catch (IOException e) {
+ throw new RuntimeException("close client error");
+ }
+ }
+
+ public static StringEntity getEntityData(Map body) {
+ StringEntity stringEntity = new StringEntity(GsonUtil.GSON.toJson(body), StandardCharsets.UTF_8);
+ stringEntity.setContentEncoding(StandardCharsets.UTF_8.name());
+ return stringEntity;
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyHttpRequestRetryHandler.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyHttpRequestRetryHandler.java
new file mode 100644
index 0000000000..e0689cb658
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyHttpRequestRetryHandler.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.common;
+
+import org.apache.http.HttpEntityEnclosingRequest;
+import org.apache.http.HttpRequest;
+import org.apache.http.NoHttpResponseException;
+import org.apache.http.client.HttpRequestRetryHandler;
+import org.apache.http.client.protocol.HttpClientContext;
+import org.apache.http.protocol.HttpContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.net.ssl.SSLException;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.net.UnknownHostException;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ */
+public class MyHttpRequestRetryHandler implements HttpRequestRetryHandler {
+ protected static final Logger LOG = LoggerFactory.getLogger(MyHttpRequestRetryHandler.class);
+
+ private int executionMaxCount;
+
+ public MyHttpRequestRetryHandler(Builder builder) {
+ this.executionMaxCount = builder.executionMaxCount;
+ }
+
+ @Override
+ public boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
+ LOG.info("第" + executionCount + "次重试");
+
+ if (executionCount >= this.executionMaxCount) {
+ // Do not retry if over max retry count
+ return false;
+ }
+ if (exception instanceof InterruptedIOException) {
+ // Timeout
+ return true;
+ }
+ if (exception instanceof UnknownHostException) {
+ // Unknown host
+ return true;
+ }
+ if (exception instanceof SSLException) {
+ // SSL handshake exception
+ return true;
+ }
+ if (exception instanceof NoHttpResponseException) {
+ // No response
+ return true;
+ }
+
+ HttpClientContext clientContext = HttpClientContext.adapt(context);
+ HttpRequest request = clientContext.getRequest();
+ boolean idempotent = !(request instanceof HttpEntityEnclosingRequest);
+ // Retry if the request is considered idempotent
+ return !idempotent;
+ }
+
+
+ public static final class Builder {
+ private int executionMaxCount;
+
+ public Builder() {
+ executionMaxCount = 5;
+ }
+
+ public Builder executionCount(int executionCount) {
+ this.executionMaxCount = executionCount;
+ return this;
+ }
+
+ public MyHttpRequestRetryHandler build() {
+ return new MyHttpRequestRetryHandler(this);
+ }
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyServiceUnavailableRetryStrategy.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyServiceUnavailableRetryStrategy.java
new file mode 100644
index 0000000000..c215926588
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyServiceUnavailableRetryStrategy.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.common;
+
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ServiceUnavailableRetryStrategy;
+import org.apache.http.protocol.HttpContext;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ * 自定义httpClient重试策略,默认重试次数为5,重试时间间隔为2s
+ */
+public class MyServiceUnavailableRetryStrategy implements ServiceUnavailableRetryStrategy {
+ private int executionCount;
+ private long retryInterval;
+
+ public MyServiceUnavailableRetryStrategy(Builder builder) {
+ this.executionCount = builder.executionCount;
+ this.retryInterval = builder.retryInterval;
+ }
+
+ @Override
+ public boolean retryRequest(HttpResponse httpResponse, int executionCount, HttpContext httpContext) {
+ int successCode = 200;
+ return httpResponse.getStatusLine().getStatusCode() != successCode
+ && executionCount < this.executionCount;
+ }
+
+ @Override
+ public long getRetryInterval() {
+ return this.retryInterval;
+ }
+
+ public static final class Builder {
+ private int executionCount;
+ private long retryInterval;
+
+ public Builder() {
+ executionCount = 5;
+ retryInterval = 2000;
+ }
+
+ public Builder executionCount(int executionCount) {
+ this.executionCount = executionCount;
+ return this;
+ }
+
+ public Builder retryInterval(long retryInterval) {
+ this.retryInterval = retryInterval;
+ return this;
+ }
+
+ public MyServiceUnavailableRetryStrategy build() {
+ return new MyServiceUnavailableRetryStrategy(this);
+ }
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/RestapiKeys.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/RestapiKeys.java
new file mode 100644
index 0000000000..8821e3777c
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/RestapiKeys.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.common;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/19
+ */
+public class RestapiKeys {
+ public static final String KEY_METHOD = "method";
+ public static final String KEY_HEADER = "header";
+ public static final String KEY_BODY = "body";
+ public static final String KEY_PARAMS = "params";
+ public static final String KEY_COLUMN = "column";
+ public static final String KEY_URL = "url";
+ public static final String KEY_BATCH_INTERVAL = "batchInterval";
+}
diff --git a/flinkx-restapi/flinkx-restapi-reader/pom.xml b/flinkx-restapi/flinkx-restapi-reader/pom.xml
new file mode 100644
index 0000000000..8e27c23493
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-reader/pom.xml
@@ -0,0 +1,99 @@
+
+
+
+ flinkx-restapi
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-restapi-reader
+
+
+
+ com.dtstack.flinkx
+ flinkx-restapi-core
+ 1.6
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.1.0
+
+
+ package
+
+ shade
+
+
+ false
+
+
+ org.slf4j:slf4j-api
+ log4j:log4j
+ ch.qos.logback:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+ io.netty
+ shade.restapireader.io.netty
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
+
+
+
+
+
+
+ maven-antrun-plugin
+ 1.2
+
+
+ copy-resources
+
+ package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormat.java b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormat.java
new file mode 100644
index 0000000000..59c6b535de
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormat.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.inputformat;
+
+import com.dtstack.flinkx.inputformat.BaseRichInputFormat;
+import com.dtstack.flinkx.restapi.common.HttpUtil;
+import com.dtstack.flinkx.util.GsonUtil;
+import org.apache.flink.core.io.GenericInputSplit;
+import org.apache.flink.core.io.InputSplit;
+import org.apache.flink.types.Row;
+import org.apache.http.HttpEntity;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.util.EntityUtils;
+
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ */
+public class RestapiInputFormat extends BaseRichInputFormat {
+
+ protected String url;
+
+ protected String method;
+
+ protected transient CloseableHttpClient httpClient;
+
+ protected Map header;
+
+ protected Map entityDataToMap;
+
+ protected boolean getData;
+
+ @Override
+ public void openInputFormat() throws IOException {
+ super.openInputFormat();
+ httpClient = HttpUtil.getHttpClient();
+ }
+
+ @Override
+ public void closeInputFormat() {
+ HttpUtil.closeClient(httpClient);
+ }
+
+
+ @Override
+ @SuppressWarnings("unchecked")
+ protected void openInternal(InputSplit inputSplit) throws IOException {
+ HttpUriRequest request = HttpUtil.getRequest(method, header,null, url);
+ try {
+ CloseableHttpResponse httpResponse = httpClient.execute(request);
+ HttpEntity entity = httpResponse.getEntity();
+ if (entity != null) {
+ String entityData = EntityUtils.toString(entity);
+ entityDataToMap = GsonUtil.GSON.fromJson(entityData, Map.class);
+ getData = true;
+ } else {
+ throw new RuntimeException("entity is null");
+ }
+ } catch (Exception e) {
+ throw new RuntimeException("get entity error");
+ }
+ }
+
+ @Override
+ protected InputSplit[] createInputSplitsInternal(int minNumSplits) throws Exception {
+ InputSplit[] inputSplits = new InputSplit[minNumSplits];
+ for (int i = 0; i < minNumSplits; i++) {
+ inputSplits[i] = new GenericInputSplit(i, minNumSplits);
+ }
+ return inputSplits;
+ }
+
+ @Override
+ protected Row nextRecordInternal(Row row) throws IOException {
+ row = new Row(1);
+ row.setField(0, entityDataToMap);
+ getData = false;
+ return row;
+ }
+
+ @Override
+ protected void closeInternal() throws IOException {
+ }
+
+ @Override
+ public boolean reachedEnd() throws IOException {
+ return !getData;
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormatBuilder.java b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormatBuilder.java
new file mode 100644
index 0000000000..ac636b4038
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormatBuilder.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.inputformat;
+
+import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder;
+
+import java.util.Map;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ */
+public class RestapiInputFormatBuilder extends BaseRichInputFormatBuilder {
+ protected RestapiInputFormat format;
+
+ public RestapiInputFormatBuilder(){ super.format = format = new RestapiInputFormat();}
+
+ public void setUrl(String url){this.format.url = url;}
+ public void setHeader(Map header){ this.format.header = header;}
+ public void setMethod(String method){ this.format.method = method;}
+
+ @Override
+ protected void checkFormat() {
+ if(format.url.isEmpty()){
+ throw new IllegalArgumentException("缺少url");
+ }
+ if (format.method.isEmpty()) {
+ throw new IllegalArgumentException("缺少method");
+ }
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/reader/RestapiReader.java b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/reader/RestapiReader.java
new file mode 100644
index 0000000000..162de36054
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/reader/RestapiReader.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.reader;
+
+import com.dtstack.flinkx.config.DataTransferConfig;
+import com.dtstack.flinkx.config.ReaderConfig;
+import com.dtstack.flinkx.reader.BaseDataReader;
+import com.dtstack.flinkx.restapi.inputformat.RestapiInputFormatBuilder;
+import com.google.common.collect.Maps;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.types.Row;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ */
+public class RestapiReader extends BaseDataReader {
+
+ private String url;
+
+ private String method;
+
+ private Map header = Maps.newHashMap();
+
+ private ArrayList> temp;
+
+ @SuppressWarnings("unchecked")
+ public RestapiReader(DataTransferConfig config, StreamExecutionEnvironment env) {
+ super(config, env);
+ ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader();
+
+ url = readerConfig.getParameter().getStringVal("url");
+ method = readerConfig.getParameter().getStringVal("method");
+ temp = (ArrayList>) readerConfig.getParameter().getVal("header");
+ if (temp != null) {
+ for (Map map : temp) {
+ header.putAll(map);
+ }
+ }
+ }
+
+ @Override
+ public DataStream readData() {
+ RestapiInputFormatBuilder builder = new RestapiInputFormatBuilder();
+
+ builder.setHeader(header);
+ builder.setMethod(method);
+ builder.setUrl(url);
+
+ return createInput(builder.finish());
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-writer/pom.xml b/flinkx-restapi/flinkx-restapi-writer/pom.xml
new file mode 100644
index 0000000000..00a801c2b1
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-writer/pom.xml
@@ -0,0 +1,99 @@
+
+
+
+ flinkx-restapi
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-restapi-writer
+
+
+
+ com.dtstack.flinkx
+ flinkx-restapi-core
+ 1.6
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.1.0
+
+
+ package
+
+ shade
+
+
+ false
+
+
+ org.slf4j:slf4j-api
+ log4j:log4j
+ ch.qos.logback:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+ io.netty
+ shade.restapiwriter.io.netty
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
+
+
+
+
+
+
+ maven-antrun-plugin
+ 1.2
+
+
+ copy-resources
+
+ package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormat.java b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormat.java
new file mode 100644
index 0000000000..ce96f57a74
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormat.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.outputformat;
+
+import com.dtstack.flinkx.exception.WriteRecordException;
+import com.dtstack.flinkx.outputformat.BaseRichOutputFormat;
+import com.dtstack.flinkx.restapi.common.HttpUtil;
+import com.dtstack.flinkx.util.GsonUtil;
+import com.google.common.collect.Maps;
+import org.apache.flink.types.Row;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpRequestBase;
+import org.apache.http.impl.client.CloseableHttpClient;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ * 当前只考虑了元数据读取,和带有字段名column读取的情况,其他情况暂未考虑
+ */
+public class RestapiOutputFormat extends BaseRichOutputFormat {
+
+ protected String url;
+
+ protected String method;
+
+ protected ArrayList column;
+
+ protected Map params;
+
+ protected Map body;
+
+ protected Map header;
+
+ @Override
+ protected void openInternal(int taskNumber, int numTasks) throws IOException {
+ // Nothing to do
+ }
+
+ @Override
+ protected void writeSingleRecordInternal(Row row) throws WriteRecordException {
+ LOG.info("start write single record");
+ CloseableHttpClient httpClient = HttpUtil.getHttpClient();
+ int index = 0;
+ Map requestBody = Maps.newHashMap();
+ Object dataRow;
+ try {
+ dataRow = getDataFromRow(row, column);
+ if (!params.isEmpty()) {
+ Iterator iterator = params.entrySet().iterator();
+ while (iterator.hasNext()) {
+ Map.Entry entry = (Map.Entry) iterator.next();
+ body.put((String) entry.getKey(), entry.getValue());
+ }
+ }
+ body.put("data", dataRow);
+ requestBody.put("json", body);
+ LOG.debug("当前发送的数据为:{}", GsonUtil.GSON.toJson(requestBody));
+ sendRequest(httpClient, requestBody, method, header, url);
+ } catch (Exception e) {
+ requestErrorMessage(e, index, row);
+ } finally {
+ // 最后不管发送是否成功,都要关闭client
+ HttpUtil.closeClient(httpClient);
+ }
+ }
+
+ @Override
+ protected void writeMultipleRecordsInternal() throws Exception {
+ LOG.info("start write multiple records");
+ try {
+ CloseableHttpClient httpClient = HttpUtil.getHttpClient();
+ List dataRow = new ArrayList<>();
+ Map requestBody = Maps.newHashMap();
+ for (Row row : rows) {
+ dataRow.add(getDataFromRow(row, column));
+ }
+ if (!params.isEmpty()) {
+ Iterator iterator = params.entrySet().iterator();
+ while (iterator.hasNext()) {
+ Map.Entry entry = (Map.Entry) iterator.next();
+ body.put((String) entry.getKey(), entry.getValue());
+ }
+ }
+ body.put("data", dataRow);
+ requestBody.put("json", body);
+ LOG.debug("当前发送的数据为:{}", GsonUtil.GSON.toJson(requestBody));
+ sendRequest(httpClient, requestBody, method, header, url);
+ } catch (Exception e) {
+ LOG.warn("write record error !", e);
+ }
+ }
+
+ private void requestErrorMessage(Exception e, int index, Row row) {
+ if (index < row.getArity()) {
+ recordConvertDetailErrorMessage(index, row);
+ LOG.warn("添加脏数据:" + row.getField(index));
+ }
+ }
+
+ private Object getDataFromRow(Row row, List column) throws IOException {
+ Map columnData = Maps.newHashMap();
+ int index = 0;
+ if (!column.isEmpty()) {
+ // 如果column不为空,那么将数据和字段名一一对应
+ for (; index < row.getArity(); index++) {
+ columnData.put(column.get(index), row.getField(index));
+ }
+ return GsonUtil.GSON.toJson(columnData);
+ } else {
+ return row.getField(index);
+ }
+ }
+
+
+ private void sendRequest(CloseableHttpClient httpClient,
+ Map requestBody,
+ String method,
+ Map header,
+ String url) throws IOException {
+ LOG.debug("当前发送的数据为:{}", GsonUtil.GSON.toJson(requestBody));
+ HttpRequestBase request = HttpUtil.getRequest(method, requestBody, header, url);
+ CloseableHttpResponse httpResponse = httpClient.execute(request);
+ // 重试之后返回状态码不为200
+ if (httpResponse.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
+ LOG.warn("重试之后当前请求状态码为" + httpResponse.getStatusLine().getStatusCode());
+ }
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormatBuilder.java b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormatBuilder.java
new file mode 100644
index 0000000000..3ab4751ea0
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormatBuilder.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.outputformat;
+
+import com.dtstack.flinkx.outputformat.BaseRichOutputFormatBuilder;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ */
+public class RestapiOutputFormatBuilder extends BaseRichOutputFormatBuilder {
+
+ private RestapiOutputFormat format;
+
+ public RestapiOutputFormatBuilder() {
+ super.format = format = new RestapiOutputFormat();
+ }
+
+ public void setUrl(String url) {
+ this.format.url = url;
+ }
+
+ public void setHeader(Map header) {
+ this.format.header = header;
+ }
+
+ public void setMethod(String method) {
+ this.format.method = method;
+ }
+
+ public void setBody(Map body) {
+ this.format.body = body;
+ }
+
+ public void setColumn(ArrayList column) {
+ format.column = column;
+ }
+
+ public void setParams(Map params){
+ format.params = params;
+ }
+
+
+ @Override
+ protected void checkFormat() {
+ if (format.url.isEmpty()) {
+ throw new IllegalArgumentException("缺少url");
+ }
+ if (format.method.isEmpty()) {
+ throw new IllegalArgumentException("缺少method");
+ }
+ }
+}
diff --git a/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/writer/RestapiWriter.java b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/writer/RestapiWriter.java
new file mode 100644
index 0000000000..f050abb1b9
--- /dev/null
+++ b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/writer/RestapiWriter.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.dtstack.flinkx.restapi.writer;
+
+import com.dtstack.flinkx.config.DataTransferConfig;
+import com.dtstack.flinkx.config.WriterConfig;
+import com.dtstack.flinkx.restapi.common.RestapiKeys;
+import com.dtstack.flinkx.restapi.outputformat.RestapiOutputFormatBuilder;
+import com.dtstack.flinkx.writer.BaseDataWriter;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.types.Row;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author : tiezhu
+ * @date : 2020/3/12
+ */
+public class RestapiWriter extends BaseDataWriter {
+
+ protected String url;
+
+ protected String method;
+
+ protected Map header = Maps.newHashMap();
+
+ protected Map body =Maps.newHashMap();
+
+ protected ArrayList column = Lists.newArrayList();
+
+ protected Map params = Maps.newHashMap();
+
+ protected int batchInterval;
+
+ @SuppressWarnings("unchecked")
+ public RestapiWriter(DataTransferConfig config) {
+ super(config);
+ Object tempObj;
+
+ WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter();
+
+ url = writerConfig.getParameter().getStringVal(RestapiKeys.KEY_URL);
+ method = writerConfig.getParameter().getStringVal(RestapiKeys.KEY_METHOD);
+ batchInterval = writerConfig.getParameter().getIntVal(RestapiKeys.KEY_BATCH_INTERVAL, 1);
+ tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_COLUMN);
+ if (tempObj != null) {
+ column.addAll((ArrayList) tempObj);
+ }
+
+ tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_HEADER);
+ if (tempObj != null) {
+ for (Map map : (ArrayList>) tempObj) {
+ header.putAll(map);
+ }
+ }
+
+ tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_BODY);
+ if (tempObj != null) {
+ for (Map map : (ArrayList>) tempObj) {
+ body.putAll(map);
+ }
+ }
+ tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_PARAMS);
+ if (tempObj != null) {
+ params = (HashMap) tempObj;
+ }
+ }
+
+ @Override
+ public DataStreamSink> writeData(DataStream dataSet) {
+ RestapiOutputFormatBuilder builder = new RestapiOutputFormatBuilder();
+
+ builder.setHeader(header);
+ builder.setMethod(method);
+ builder.setUrl(url);
+ builder.setBody(body);
+ builder.setColumn(column);
+ builder.setParams(params);
+ builder.setBatchInterval(batchInterval);
+
+ return createOutput(dataSet, builder.finish());
+ }
+}
diff --git a/flinkx-restapi/pom.xml b/flinkx-restapi/pom.xml
new file mode 100644
index 0000000000..876877c87a
--- /dev/null
+++ b/flinkx-restapi/pom.xml
@@ -0,0 +1,28 @@
+
+
+
+ flinkx-all
+ com.dtstack.flinkx
+ 1.6
+
+ 4.0.0
+
+ flinkx-restapi
+ pom
+
+ flinkx-restapi-core
+ flinkx-restapi-writer
+ flinkx-restapi-reader
+
+
+
+
+ com.dtstack.flinkx
+ flinkx-core
+ 1.6
+ provided
+
+
+
\ No newline at end of file
diff --git a/flinkx-saphana/flinkx-saphana-core/pom.xml b/flinkx-saphana/flinkx-saphana-core/pom.xml
index bed09b714f..5ac7f520b5 100644
--- a/flinkx-saphana/flinkx-saphana-core/pom.xml
+++ b/flinkx-saphana/flinkx-saphana-core/pom.xml
@@ -1,6 +1,6 @@
-flinkx-saphana
diff --git a/flinkx-saphana/flinkx-saphana-reader/pom.xml b/flinkx-saphana/flinkx-saphana-reader/pom.xml
index ba9e5c4375..8b170aecf7 100644
--- a/flinkx-saphana/flinkx-saphana-reader/pom.xml
+++ b/flinkx-saphana/flinkx-saphana-reader/pom.xml
@@ -1,6 +1,6 @@
-flinkx-saphana
diff --git a/flinkx-saphana/flinkx-saphana-writer/pom.xml b/flinkx-saphana/flinkx-saphana-writer/pom.xml
index 8c19fad040..233c1fc5ae 100644
--- a/flinkx-saphana/flinkx-saphana-writer/pom.xml
+++ b/flinkx-saphana/flinkx-saphana-writer/pom.xml
@@ -1,6 +1,6 @@
-flinkx-saphana
diff --git a/flinkx-saphana/pom.xml b/flinkx-saphana/pom.xml
index a9ecb0501e..e36168c903 100644
--- a/flinkx-saphana/pom.xml
+++ b/flinkx-saphana/pom.xml
@@ -1,6 +1,6 @@
-flinkx-all
diff --git a/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml b/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml
index 7300b9251f..1cd8f983b1 100644
--- a/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml
+++ b/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml
@@ -62,8 +62,12 @@
shade.sqlserverreader.io.netty
- com.google
- shade.sqlserverreader.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml b/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml
index e4f6ed9b36..f9d45aeb84 100644
--- a/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml
+++ b/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml
@@ -63,8 +63,12 @@
shade.sqlserverwriter.io.netty
- com.google
- shade.sqlserverwriter.com.google
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
diff --git a/flinkx-stream/flinkx-stream-reader/pom.xml b/flinkx-stream/flinkx-stream-reader/pom.xml
index d2dfa853cc..3586e32383 100644
--- a/flinkx-stream/flinkx-stream-reader/pom.xml
+++ b/flinkx-stream/flinkx-stream-reader/pom.xml
@@ -49,6 +49,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-stream/flinkx-stream-writer/pom.xml b/flinkx-stream/flinkx-stream-writer/pom.xml
index f106065763..8466152676 100644
--- a/flinkx-stream/flinkx-stream-writer/pom.xml
+++ b/flinkx-stream/flinkx-stream-writer/pom.xml
@@ -45,6 +45,16 @@
+
+
+ com.google.common
+ shade.core.com.google.common
+
+
+ com.google.thirdparty
+ shade.core.com.google.thirdparty
+
+
diff --git a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java
index da9a167b59..ac578d2c86 100644
--- a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java
+++ b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java
@@ -48,7 +48,12 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException {
@Override
protected void writeSingleRecordInternal(Row row) throws WriteRecordException {
if (print) {
- LOG.info("subTaskIndex[{}]:{}", taskNumber, row);
+ LOG.info("subTaskIndex[{}]:{}", taskNumber, rowToStringWithDelimiter(row, writeDelimiter));
+ }
+
+ if (restoreConfig.isRestore()) {
+ formatState.setState(row.getField(restoreConfig.getRestoreColumnIndex()));
+ LOG.info("print data subTaskIndex[{}]:{}", taskNumber, rowToStringWithDelimiter(row, writeDelimiter));
}
}
@@ -56,8 +61,19 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException {
protected void writeMultipleRecordsInternal() throws Exception {
if (print) {
for (Row row : rows) {
- LOG.info(String.valueOf(row));
+ LOG.info(rowToStringWithDelimiter(row, writeDelimiter));
+ }
+ }
+ }
+
+ public String rowToStringWithDelimiter(Row row, String writeDelimiter) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < row.getArity(); i++) {
+ if (i > 0) {
+ sb.append(writeDelimiter);
}
+ sb.append(StringUtils.arrayAwareToString(row.getField(i)));
}
+ return sb.toString();
}
}
diff --git a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java
index 246a4eb8bc..05344b6494 100644
--- a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java
+++ b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java
@@ -46,7 +46,7 @@ public StreamWriter(DataTransferConfig config) {
super(config);
print = config.getJob().getContent().get(0).getWriter().getParameter().getBooleanVal("print",false);
writeDelimiter = config.getJob().getContent().get(0).getWriter().getParameter().getStringVal("writeDelimiter", "|");
- batchInterval = config.getJob().getContent().get(0).getWriter().getParameter().getIntVal("batchInterval", 20);
+ batchInterval = config.getJob().getContent().get(0).getWriter().getParameter().getIntVal("batchInterval", 1);
List column = config.getJob().getContent().get(0).getWriter().getParameter().getColumn();
metaColumns = MetaColumn.getMetaColumns(column);
diff --git a/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java b/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java
index 3b42c064c3..1c84adcfa4 100644
--- a/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java
+++ b/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java
@@ -1,15 +1,10 @@
package com.dtstack.flinkx.teradata.util;
-import com.dtstack.flinkx.rdb.DatabaseInterface;
-import com.dtstack.flinkx.rdb.util.DbUtil;
-import com.dtstack.flinkx.reader.MetaColumn;
import com.dtstack.flinkx.util.ClassUtil;
-import java.sql.*;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
/**
* @author wuhui
@@ -37,38 +32,4 @@ public static Connection getConnection(String url, String username, String passw
return dbConn;
}
-
- /**
- * 获取表列名类型列表
- * @param dbURL jdbc url
- * @param username 数据库账号
- * @param password 数据库密码
- * @param databaseInterface DatabaseInterface
- * @param table 表名
- * @param sql sql
- * @return
- */
- public static List analyzeTable(String dbURL, String username, String password, DatabaseInterface databaseInterface,
- String table, String sql) {
- List descColumnTypeList = new ArrayList<>();
- Connection dbConn = null;
- Statement stmt = null;
- ResultSet rs = null;
- try {
- dbConn = getConnection(dbURL, username, password);
- stmt = dbConn.createStatement();
- rs = stmt.executeQuery(databaseInterface.getSqlQuerySqlFields(sql));
- ResultSetMetaData rd = rs.getMetaData();
-
- for (int i = 1; i <= rd.getColumnCount(); i++) {
- descColumnTypeList.add(rd.getColumnTypeName(i));
- }
- } catch (SQLException e) {
- throw new RuntimeException(e);
- } finally {
- DbUtil.closeDbResources(rs, stmt, dbConn, false);
- }
-
- return descColumnTypeList;
- }
}
diff --git a/flinkx-test/pom.xml b/flinkx-test/pom.xml
index 0d36c0ef17..b37ab81b88 100644
--- a/flinkx-test/pom.xml
+++ b/flinkx-test/pom.xml
@@ -16,351 +16,432 @@
+
ch.qos.logbacklogback-classic1.1.7
+
com.google.guavaguava19.0
+
com.google.code.gsongson2.7
+
hadoop-mapreduce-client-coreorg.apache.hadoop${hadoop.version}
+
hadoop-commonorg.apache.hadoop${hadoop.version}
+
hadoop-hdfsorg.apache.hadoop${hadoop.version}
+
org.apache.flinkflink-metrics-core
- ${flink.version}
+ 1.8.1
+
org.apache.flinkflink-metrics-prometheus_2.11${flink.version}
+
io.prometheussimpleclient${prometheus.version}
+
io.prometheussimpleclient_httpserver${prometheus.version}
+
io.prometheussimpleclient_pushgateway${prometheus.version}
+
com.dtstack.flinkxflinkx-core
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-stream-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-stream-writer
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-carbondata-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-carbondata-writer
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-db2-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-db2-writer
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-es-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-es-writer
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-ftp-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-ftp-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-hbase-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-hbase-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-hdfs-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-hdfs-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-mongodb-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-mongodb-writer
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-mysql-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-mysql-dreader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-mysql-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-odps-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-odps-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-oracle-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-oracle-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-postgresql-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-postgresql-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-sqlserver-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-sqlserver-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-redis-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-rdb-core
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-rdb-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-rdb-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-gbase-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-gbase-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-binlog-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-hive-writer
- ${flinkx.version}
+ 1.6
+
org.apache.hivehive-serde2.1.0
+
com.dtstack.flinkxflinkx-kafka11-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kafka11-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kafka10-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kafka10-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kafka09-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kafka09-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kudu-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kudu-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kafka-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-kafka-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-clickhouse-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-clickhouse-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-polardb-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-polardb-writer
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-phoenix-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-phoenix-writer
- ${flinkx.version}
+ 1.6
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+ test-jar
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop.version}
+ test-jar
+
+
+
+ org.apache.derby
+ derby
+ 10.14.2.0
+ test
+ com.dtstack.flinkxflinkx-emqx-reader
- ${flinkx.version}
+ 1.6com.dtstack.flinkxflinkx-emqx-writer
- ${flinkx.version}
+ 1.6
+
+ com.dtstack.flinkx
+ flinkx-restapi-writer
+ 1.6
+
+
com.dtstack.flinkxflinkx-dm-reader
- ${flinkx.version}
+ 1.6
+
com.dtstack.flinkxflinkx-dm-writer
- ${flinkx.version}
+ 1.6
+
+
+ com.dtstack.flinkx
+ flinkx-greenplum-reader
+ 1.6
+
+
+ com.dtstack.flinkx
+ flinkx-greenplum-writer
+ 1.6
diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java
index 33fc1dd035..27883b37a4 100644
--- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java
+++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java
@@ -41,6 +41,8 @@
import com.dtstack.flinkx.ftp.writer.FtpWriter;
import com.dtstack.flinkx.gbase.reader.GbaseReader;
import com.dtstack.flinkx.gbase.writer.GbaseWriter;
+import com.dtstack.flinkx.greenplum.reader.GreenplumReader;
+import com.dtstack.flinkx.greenplum.writer.GreenplumWriter;
import com.dtstack.flinkx.hbase.reader.HbaseReader;
import com.dtstack.flinkx.hbase.writer.HbaseWriter;
import com.dtstack.flinkx.hdfs.reader.HdfsReader;
@@ -73,6 +75,7 @@
import com.dtstack.flinkx.postgresql.writer.PostgresqlWriter;
import com.dtstack.flinkx.reader.BaseDataReader;
import com.dtstack.flinkx.redis.writer.RedisWriter;
+import com.dtstack.flinkx.restapi.writer.RestapiWriter;
import com.dtstack.flinkx.sqlserver.reader.SqlserverReader;
import com.dtstack.flinkx.sqlserver.writer.SqlserverWriter;
import com.dtstack.flinkx.stream.reader.StreamReader;
@@ -84,7 +87,9 @@
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings;
+import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
@@ -112,21 +117,18 @@ public class LocalTest {
public static Logger LOG = LoggerFactory.getLogger(LocalTest.class);
public static Configuration conf = new Configuration();
- public static void main(String[] args) throws Exception {
- setLogLevel(Level.INFO.toString());
-
+ public static void main(String[] args) throws Exception{
+ setLogLevel(Level.DEBUG.toString());
Properties confProperties = new Properties();
// confProperties.put("flink.checkpoint.interval", "10000");
// confProperties.put("flink.checkpoint.stateBackend", "file:///tmp/flinkx_checkpoint");
-//
- conf.setString("akka.ask.timeout", "180 s");
- conf.setString("web.timeout", "100000");
+
// conf.setString("metrics.reporter.promgateway.class","org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporter");
-// conf.setString("metrics.reporter.promgateway.host","172.16.8.178");
+// conf.setString("metrics.reporter.promgateway.host","127.0.0.1");
// conf.setString("metrics.reporter.promgateway.port","9091");
-// conf.setString("metrics.reporter.promgateway.jobName","kanata");
+// conf.setString("metrics.reporter.promgateway.jobName","108job");
// conf.setString("metrics.reporter.promgateway.randomJobNameSuffix","true");
-// conf.setString("metrics.reporter.promgateway.deleteOnShutdown","false");
+// conf.setString("metrics.reporter.promgateway.deleteOnShutdown","true");
String jobPath = "D:\\dtstack\\flinkx-all\\flinkx-examples\\examples\\clickhouse_stream.json";
String savePointPath = "";
@@ -134,17 +136,29 @@ public static void main(String[] args) throws Exception {
ResultPrintUtil.printResult(result);
}
- public static JobExecutionResult runJob(File jobFile, Properties confProperties, String savePointPath) throws Exception {
+ public static JobExecutionResult runJob(File jobFile, Properties confProperties, String savepointPath) throws Exception{
String jobContent = readJob(jobFile);
- return runJob(jobContent, confProperties, savePointPath);
+ return runJob(jobContent, confProperties, savepointPath);
}
- public static JobExecutionResult runJob(String job, Properties confProperties, String savePointPath) throws Exception {
+ public static JobExecutionResult runJob(String job, Properties confProperties, String savepointPath) throws Exception{
DataTransferConfig config = DataTransferConfig.parse(job);
+
+ conf.setString("akka.ask.timeout", "180 s");
+ conf.setString("web.timeout", String.valueOf(100000));
+
MyLocalStreamEnvironment env = new MyLocalStreamEnvironment(conf);
+
openCheckpointConf(env, confProperties);
+
env.setParallelism(config.getJob().getSetting().getSpeed().getChannel());
- env.setRestartStrategy(RestartStrategies.noRestart());
+
+ if (needRestart(config)) {
+ env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
+ 10,
+ Time.of(10, TimeUnit.SECONDS)
+ ));
+ }
BaseDataReader reader = buildDataReader(config, env);
DataStream dataStream = reader.readData();
@@ -163,209 +177,110 @@ public static JobExecutionResult runJob(String job, Properties confProperties, S
dataStreamSink.setParallelism(speedConfig.getWriterChannel());
}
- if (StringUtils.isNotEmpty(savePointPath)) {
- env.setSettings(SavepointRestoreSettings.forPath(savePointPath));
+ if(StringUtils.isNotEmpty(savepointPath)){
+ env.setSettings(SavepointRestoreSettings.forPath(savepointPath));
}
return env.execute();
}
+ private static boolean needRestart(DataTransferConfig config){
+ return config.getJob().getSetting().getRestoreConfig().isRestore();
+ }
+
private static String readJob(File file) {
- try(FileInputStream in = new FileInputStream(file);) {
+ try(FileInputStream in = new FileInputStream(file)) {
byte[] fileContent = new byte[(int) file.length()];
in.read(fileContent);
return new String(fileContent, StandardCharsets.UTF_8);
- } catch (Exception e) {
+ } catch (Exception e){
throw new RuntimeException(e);
}
}
- private static BaseDataReader buildDataReader(DataTransferConfig config, StreamExecutionEnvironment env) {
+ private static BaseDataReader buildDataReader(DataTransferConfig config, StreamExecutionEnvironment env){
String readerName = config.getJob().getContent().get(0).getReader().getName();
- BaseDataReader reader;
- switch (readerName) {
- case PluginNameConstants.STREAM_READER:
- reader = new StreamReader(config, env);
- break;
- case PluginNameConstants.CARBONDATA_READER:
- reader = new CarbondataReader(config, env);
- break;
- case PluginNameConstants.ORACLE_READER:
- reader = new OracleReader(config, env);
- break;
- case PluginNameConstants.POSTGRESQL_READER:
- reader = new PostgresqlReader(config, env);
- break;
- case PluginNameConstants.SQLSERVER_READER:
- reader = new SqlserverReader(config, env);
- break;
- case PluginNameConstants.MYSQLD_READER:
- reader = new MysqldReader(config, env);
- break;
- case PluginNameConstants.MYSQL_READER:
- reader = new MysqlReader(config, env);
- break;
- case PluginNameConstants.DB2_READER:
- reader = new Db2Reader(config, env);
- break;
- case PluginNameConstants.GBASE_READER:
- reader = new GbaseReader(config, env);
- break;
- case PluginNameConstants.ES_READER:
- reader = new EsReader(config, env);
- break;
- case PluginNameConstants.FTP_READER:
- reader = new FtpReader(config, env);
- break;
- case PluginNameConstants.HBASE_READER:
- reader = new HbaseReader(config, env);
- break;
- case PluginNameConstants.HDFS_READER:
- reader = new HdfsReader(config, env);
- break;
- case PluginNameConstants.MONGODB_READER:
- reader = new MongodbReader(config, env);
- break;
- case PluginNameConstants.ODPS_READER:
- reader = new OdpsReader(config, env);
- break;
- case PluginNameConstants.BINLOG_READER:
- reader = new BinlogReader(config, env);
- break;
- case PluginNameConstants.KAFKA09_READER:
- reader = new Kafka09Reader(config, env);
- break;
- case PluginNameConstants.KAFKA10_READER:
- reader = new Kafka10Reader(config, env);
- break;
- case PluginNameConstants.KAFKA11_READER:
- reader = new Kafka11Reader(config, env);
- break;
- case PluginNameConstants.KAFKA_READER:
- reader = new KafkaReader(config, env);
- break;
- case PluginNameConstants.KUDU_READER:
- reader = new KuduReader(config, env);
- break;
- case PluginNameConstants.CLICKHOUSE_READER:
- reader = new ClickhouseReader(config, env);
- break;
- case PluginNameConstants.POLARDB_READER:
- reader = new PolardbReader(config, env);
- break;
- case PluginNameConstants.PHOENIX_READER:
- reader = new PhoenixReader(config, env);
- break;
- case PluginNameConstants.EMQX_READER:
- reader = new EmqxReader(config, env);
- break;
- case PluginNameConstants.DM_READER:
- reader = new DmReader(config, env);
- break;
- default:
- throw new IllegalArgumentException("Can not find reader by name:" + readerName);
+ BaseDataReader reader ;
+ switch (readerName){
+ case PluginNameConstants.STREAM_READER : reader = new StreamReader(config, env); break;
+ case PluginNameConstants.CARBONDATA_READER : reader = new CarbondataReader(config, env); break;
+ case PluginNameConstants.ORACLE_READER : reader = new OracleReader(config, env); break;
+ case PluginNameConstants.POSTGRESQL_READER : reader = new PostgresqlReader(config, env); break;
+ case PluginNameConstants.SQLSERVER_READER : reader = new SqlserverReader(config, env); break;
+ case PluginNameConstants.MYSQLD_READER : reader = new MysqldReader(config, env); break;
+ case PluginNameConstants.MYSQL_READER : reader = new MysqlReader(config, env); break;
+ case PluginNameConstants.DB2_READER : reader = new Db2Reader(config, env); break;
+ case PluginNameConstants.GBASE_READER : reader = new GbaseReader(config, env); break;
+ case PluginNameConstants.ES_READER : reader = new EsReader(config, env); break;
+ case PluginNameConstants.FTP_READER : reader = new FtpReader(config, env); break;
+ case PluginNameConstants.HBASE_READER : reader = new HbaseReader(config, env); break;
+ case PluginNameConstants.HDFS_READER : reader = new HdfsReader(config, env); break;
+ case PluginNameConstants.MONGODB_READER : reader = new MongodbReader(config, env); break;
+ case PluginNameConstants.ODPS_READER : reader = new OdpsReader(config, env); break;
+ case PluginNameConstants.BINLOG_READER : reader = new BinlogReader(config, env); break;
+ case PluginNameConstants.KAFKA09_READER : reader = new Kafka09Reader(config, env); break;
+ case PluginNameConstants.KAFKA10_READER : reader = new Kafka10Reader(config, env); break;
+ case PluginNameConstants.KAFKA11_READER : reader = new Kafka11Reader(config, env); break;
+ case PluginNameConstants.KAFKA_READER : reader = new KafkaReader(config, env); break;
+ case PluginNameConstants.KUDU_READER : reader = new KuduReader(config, env); break;
+ case PluginNameConstants.CLICKHOUSE_READER : reader = new ClickhouseReader(config, env); break;
+ case PluginNameConstants.POLARDB_READER : reader = new PolardbReader(config, env); break;
+ case PluginNameConstants.PHOENIX_READER : reader = new PhoenixReader(config, env); break;
+ case PluginNameConstants.EMQX_READER : reader = new EmqxReader(config, env); break;
+ case PluginNameConstants.DM_READER : reader = new DmReader(config, env); break;
+ case PluginNameConstants.GREENPLUM_READER : reader = new GreenplumReader(config, env); break;
+ default:throw new IllegalArgumentException("Can not find reader by name:" + readerName);
}
return reader;
}
- private static BaseDataWriter buildDataWriter(DataTransferConfig config) {
+ private static BaseDataWriter buildDataWriter(DataTransferConfig config){
String writerName = config.getJob().getContent().get(0).getWriter().getName();
BaseDataWriter writer;
- switch (writerName) {
- case PluginNameConstants.STREAM_WRITER:
- writer = new StreamWriter(config);
- break;
- case PluginNameConstants.CARBONDATA_WRITER:
- writer = new CarbondataWriter(config);
- break;
- case PluginNameConstants.MYSQL_WRITER:
- writer = new MysqlWriter(config);
- break;
- case PluginNameConstants.SQLSERVER_WRITER:
- writer = new SqlserverWriter(config);
- break;
- case PluginNameConstants.ORACLE_WRITER:
- writer = new OracleWriter(config);
- break;
- case PluginNameConstants.POSTGRESQL_WRITER:
- writer = new PostgresqlWriter(config);
- break;
- case PluginNameConstants.DB2_WRITER:
- writer = new Db2Writer(config);
- break;
- case PluginNameConstants.GBASE_WRITER:
- writer = new GbaseWriter(config);
- break;
- case PluginNameConstants.ES_WRITER:
- writer = new EsWriter(config);
- break;
- case PluginNameConstants.FTP_WRITER:
- writer = new FtpWriter(config);
- break;
- case PluginNameConstants.HBASE_WRITER:
- writer = new HbaseWriter(config);
- break;
- case PluginNameConstants.HDFS_WRITER:
- writer = new HdfsWriter(config);
- break;
- case PluginNameConstants.MONGODB_WRITER:
- writer = new MongodbWriter(config);
- break;
- case PluginNameConstants.ODPS_WRITER:
- writer = new OdpsWriter(config);
- break;
- case PluginNameConstants.REDIS_WRITER:
- writer = new RedisWriter(config);
- break;
- case PluginNameConstants.HIVE_WRITER:
- writer = new HiveWriter(config);
- break;
- case PluginNameConstants.KAFKA09_WRITER:
- writer = new Kafka09Writer(config);
- break;
- case PluginNameConstants.KAFKA10_WRITER:
- writer = new Kafka10Writer(config);
- break;
- case PluginNameConstants.KAFKA11_WRITER:
- writer = new Kafka11Writer(config);
- break;
- case PluginNameConstants.KUDU_WRITER:
- writer = new KuduWriter(config);
- break;
- case PluginNameConstants.CLICKHOUSE_WRITER:
- writer = new ClickhouseWriter(config);
- break;
- case PluginNameConstants.POLARDB_WRITER:
- writer = new PolardbWriter(config);
- break;
- case PluginNameConstants.KAFKA_WRITER:
- writer = new KafkaWriter(config);
- break;
- case PluginNameConstants.PHOENIX_WRITER:
- writer = new PhoenixWriter(config);
- break;
- case PluginNameConstants.EMQX_WRITER:
- writer = new EmqxWriter(config);
- break;
- case PluginNameConstants.DM_WRITER:
- writer = new DmWriter(config);
- break;
- default:
- throw new IllegalArgumentException("Can not find writer by name:" + writerName);
+ switch (writerName){
+ case PluginNameConstants.STREAM_WRITER : writer = new StreamWriter(config); break;
+ case PluginNameConstants.CARBONDATA_WRITER : writer = new CarbondataWriter(config); break;
+ case PluginNameConstants.MYSQL_WRITER : writer = new MysqlWriter(config); break;
+ case PluginNameConstants.SQLSERVER_WRITER : writer = new SqlserverWriter(config); break;
+ case PluginNameConstants.ORACLE_WRITER : writer = new OracleWriter(config); break;
+ case PluginNameConstants.POSTGRESQL_WRITER : writer = new PostgresqlWriter(config); break;
+ case PluginNameConstants.DB2_WRITER : writer = new Db2Writer(config); break;
+ case PluginNameConstants.GBASE_WRITER : writer = new GbaseWriter(config); break;
+ case PluginNameConstants.ES_WRITER : writer = new EsWriter(config); break;
+ case PluginNameConstants.FTP_WRITER : writer = new FtpWriter(config); break;
+ case PluginNameConstants.HBASE_WRITER : writer = new HbaseWriter(config); break;
+ case PluginNameConstants.HDFS_WRITER : writer = new HdfsWriter(config); break;
+ case PluginNameConstants.MONGODB_WRITER : writer = new MongodbWriter(config); break;
+ case PluginNameConstants.ODPS_WRITER : writer = new OdpsWriter(config); break;
+ case PluginNameConstants.REDIS_WRITER : writer = new RedisWriter(config); break;
+ case PluginNameConstants.HIVE_WRITER : writer = new HiveWriter(config); break;
+ case PluginNameConstants.KAFKA09_WRITER : writer = new Kafka09Writer(config); break;
+ case PluginNameConstants.KAFKA10_WRITER : writer = new Kafka10Writer(config); break;
+ case PluginNameConstants.KAFKA11_WRITER : writer = new Kafka11Writer(config); break;
+ case PluginNameConstants.KUDU_WRITER : writer = new KuduWriter(config); break;
+ case PluginNameConstants.CLICKHOUSE_WRITER : writer = new ClickhouseWriter(config); break;
+ case PluginNameConstants.POLARDB_WRITER : writer = new PolardbWriter(config); break;
+ case PluginNameConstants.KAFKA_WRITER : writer = new KafkaWriter(config); break;
+ case PluginNameConstants.PHOENIX_WRITER : writer = new PhoenixWriter(config); break;
+ case PluginNameConstants.EMQX_WRITER : writer = new EmqxWriter(config); break;
+ case PluginNameConstants.RESTAPI_WRITER : writer = new RestapiWriter(config);break;
+ case PluginNameConstants.DM_WRITER : writer = new DmWriter(config); break;
+ case PluginNameConstants.GREENPLUM_WRITER : writer = new GreenplumWriter(config); break;
+ default:throw new IllegalArgumentException("Can not find writer by name:" + writerName);
}
return writer;
}
- private static void openCheckpointConf(StreamExecutionEnvironment env, Properties properties) {
- if (properties == null) {
+ private static void openCheckpointConf(StreamExecutionEnvironment env, Properties properties){
+ if(properties == null){
return;
}
- if (properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_INTERVAL_KEY) == null) {
+ if(properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_INTERVAL_KEY) == null){
return;
- } else {
+ }else{
long interval = Long.parseLong(properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_INTERVAL_KEY).trim());
//start checkpoint every ${interval}
@@ -375,7 +290,7 @@ private static void openCheckpointConf(StreamExecutionEnvironment env, Propertie
}
String checkpointTimeoutStr = properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_TIMEOUT_KEY);
- if (checkpointTimeoutStr != null) {
+ if(checkpointTimeoutStr != null){
long checkpointTimeout = Long.parseLong(checkpointTimeoutStr);
//checkpoints have to complete within one min,or are discard
env.getCheckpointConfig().setCheckpointTimeout(checkpointTimeout);
@@ -387,6 +302,8 @@ private static void openCheckpointConf(StreamExecutionEnvironment env, Propertie
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
env.getCheckpointConfig().enableExternalizedCheckpoints(
CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
+
+ env.setStateBackend(new FsStateBackend(new Path("file:///tmp/flinkx_checkpoint")));
env.setRestartStrategy(RestartStrategies.failureRateRestart(
FAILURE_RATE,
Time.of(FAILURE_INTERVAL, TimeUnit.MINUTES),
diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java
index f0f047e6b5..21d07e7c9e 100644
--- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java
+++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java
@@ -53,6 +53,10 @@ public class PluginNameConstants {
public static final String SQLSERVER_CDC_READER = "sqlservercdcreader";
public static final String METADATAHIVE2_READER = "metadatahive2reader";
public static final String DM_READER = "dmreader";
+ public static final String METADATATIDB_READER = "metadatatidbreader";
+ public static final String METADATAORACLE_READER = "metadataoraclereader";
+ public static final String METADATAMYSQL_READER = "metadatamysqlreader";
+ public static final String GREENPLUM_READER = "greenplumreader";
public static final String STREAM_WRITER = "streamwriter";
public static final String CARBONDATA_WRITER = "carbondatawriter";
@@ -81,4 +85,5 @@ public class PluginNameConstants {
public static final String EMQX_WRITER = "emqxwriter";
public static final String RESTAPI_WRITER = "restapiwriter";
public static final String DM_WRITER = "dmwriter";
+ public static final String GREENPLUM_WRITER = "greenplumwriter";
}
diff --git a/pom.xml b/pom.xml
index 8524112890..821397ba25 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,7 +29,7 @@
flinkx-clickhouseflinkx-saphanaflinkx-teradata
- flinkx-cassandra
+ flinkx-greenplumflinkx-hdfsflinkx-hive
@@ -40,22 +40,22 @@
flinkx-phoenixflinkx-carbondataflinkx-kudu
+ flinkx-cassandraflinkx-redisflinkx-mongodbflinkx-binlog
+ flinkx-pgwalflinkx-kbflinkx-kafka09flinkx-kafka10flinkx-kafka11flinkx-kafka
-
flinkx-emqx
- flinkx-pgwalflinkx-pulsar
-
+ flinkx-restapi
@@ -66,7 +66,7 @@
2.7.34.5.3${basedir}/dev
- release_1.8.6
+ release_1.8.7
@@ -125,9 +125,16 @@
- org.testng
- testng
- 6.14.3
+ junit
+ junit
+ 4.12
+ test
+
+
+
+ org.mockito
+ mockito-core
+ 3.0.0test
@@ -143,9 +150,24 @@
+
+ org.jacoco
+ jacoco-maven-plugin
+ 0.7.8
+
+
+
+ prepare-agent
+ report
+
+
+
+
+
org.apache.maven.pluginsmaven-source-plugin
+ 3.2.1attach-sources
@@ -168,4 +190,20 @@
+
+
+
+ org.jacoco
+ jacoco-maven-plugin
+
+
+
+ report
+
+
+
+
+
+
+
\ No newline at end of file