diff --git a/.gitignore b/.gitignore index d8cb487ce5..1e7d443821 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Created by .ignore support plugin (hsz.mobi) .idea/ plugins/ +syncplugins/ *.iml target/ lib/ @@ -8,4 +9,5 @@ jobs/ nohup.out flinkconf/ hadoopconf/ -/default_task_id_output \ No newline at end of file +/default_task_id_output +/syncplugins \ No newline at end of file diff --git a/README.md b/README.md index c2a3a77d5e..c391b42f5c 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ The following databases are currently supported: | | Teradata | [doc](docs/offline/reader/teradatareader.md) | [doc](docs/offline/writer/teradatawriter.md) | | | Phoenix | [doc](docs/offline/reader/phoenixreader.md) | [doc](docs/offline/writer/phoenixwriter.md) | | | 达梦 | [doc](docs/offline/reader/dmreader.md) | [doc](docs/offline/writer/dmwriter.md) | +| | Greenplum | [doc](docs/offline/reader/greenplumreader.md) | [doc](docs/offline/writer/greenplumwriter.md) | | | Cassandra | [doc](docs/offline/reader/cassandrareader.md) | [doc](docs/offline/writer/cassandrawriter.md) | | | ODPS | [doc](docs/offline/reader/odpsreader.md) | [doc](docs/offline/writer/odpswriter.md) | | | HBase | [doc](docs/offline/reader/hbasereader.md) | [doc](docs/offline/writer/hbasewriter.md) | @@ -65,11 +66,12 @@ The following databases are currently supported: | | Hive | | [doc](docs/offline/writer/hivewriter.md) | | Stream Synchronization | Kafka | [doc](docs/realTime/reader/kafkareader.md) | [doc](docs/realTime/writer/kafkawriter.md) | | | EMQX | [doc](docs/realTime/reader/emqxreader.md) | [doc](docs/realTime/writer/emqxwriter.md) | +| | RestApi | [doc](docs/realTime/reader/restapireader.md) | [doc](docs/realTime/writer/restapiwriter.md) | | | MySQL Binlog | [doc](docs/realTime/reader/binlogreader.md) | | | | MongoDB Oplog | [doc](docs/realTime/reader/mongodboplogreader.md)| | | | PostgreSQL WAL | [doc](docs/realTime/reader/pgwalreader.md) | | -| | Oracle Logminer| Coming Soon| | -| | SqlServer CDC | Coming Soon | | +| | Oracle Logminer| Coming Soon | | +| | SqlServer CDC | Coming Soon | | # Quick Start diff --git a/README_CH.md b/README_CH.md index 28ac7c27b2..8bb8c11dd0 100644 --- a/README_CH.md +++ b/README_CH.md @@ -51,6 +51,7 @@ FlinkX目前支持下面这些数据库: | | Teradata | [doc](docs/offline/reader/teradatareader.md) | [doc](docs/offline/writer/teradatawriter.md) | | | Phoenix | [doc](docs/offline/reader/phoenixreader.md) | [doc](docs/offline/writer/phoenixwriter.md) | | | 达梦 | [doc](docs/offline/reader/dmreader.md) | [doc](docs/offline/writer/dmwriter.md) | +| | Greenplum | [doc](docs/offline/reader/greenplumreader.md) | [doc](docs/offline/writer/greenplumwriter.md) | | | Cassandra | [doc](docs/offline/reader/cassandrareader.md) | [doc](docs/offline/writer/cassandrawriter.md) | | | ODPS | [doc](docs/offline/reader/odpsreader.md) | [doc](docs/offline/writer/odpswriter.md) | | | HBase | [doc](docs/offline/reader/hbasereader.md) | [doc](docs/offline/writer/hbasewriter.md) | @@ -65,11 +66,12 @@ FlinkX目前支持下面这些数据库: | | Hive | | [doc](docs/offline/writer/hivewriter.md) | | Stream Synchronization | Kafka | [doc](docs/realTime/reader/kafkareader.md) | [doc](docs/realTime/writer/kafkawriter.md) | | | EMQX | [doc](docs/realTime/reader/emqxreader.md) | [doc](docs/realTime/writer/emqxwriter.md) | +| | RestApi | [doc](docs/realTime/reader/restapireader.md) | [doc](docs/realTime/writer/restapiwriter.md) | | | MySQL Binlog | [doc](docs/realTime/reader/binlogreader.md) | | | | MongoDB Oplog | [doc](docs/realTime/reader/mongodboplogreader.md)| | | | PostgreSQL WAL | [doc](docs/realTime/reader/pgwalreader.md) | | -| | Oracle Logminer| Coming Soon | | -| | SqlServer CDC | Coming Soon | | +| | Oracle Logminer| Coming Soon | | +| | SqlServer CDC | Coming Soon | | # 快速开始 diff --git a/docs/offline/reader/greenplumreader.md b/docs/offline/reader/greenplumreader.md new file mode 100644 index 0000000000..a8d89386e7 --- /dev/null +++ b/docs/offline/reader/greenplumreader.md @@ -0,0 +1,340 @@ +# Greenplum Reader + + +## 一、插件名称 +名称:**greenplumreader**
+ +## 二、支持的数据源版本 +**Greenplum 5及以上**
+ + +## 三、参数说明 + +- **jdbcUrl** + - 描述:针对关系型数据库的jdbc连接字符串
jdbcUrl参考文档:[greenplum官方文档](https://gpdb.docs.pivotal.io/590/datadirect/datadirect_jdbc.html) + - 必选:是 + - 默认值:无 + + + +- **username** + - 描述:数据源的用户名 + - 必选:是 + - 默认值:无 + + + +- **password** + - 描述:数据源指定用户名的密码 + - 必选:是 + - 默认值:无 + + + +- **where** + - 描述:筛选条件,reader插件根据指定的column、table、where条件拼接SQL,并根据这个SQL进行数据抽取。在实际业务场景中,往往会选择当天的数据进行同步,可以将where条件指定为gmt_create > time。 + - 注意:不可以将where条件指定为limit 10,limit不是SQL的合法where子句。 + - 必选:否 + - 默认值:无 + + + +- **splitPk** + - 描述:当speed配置中的channel大于1时指定此参数,Reader插件根据并发数和此参数指定的字段拼接sql,使每个并发读取不同的数据,提升读取速率。 + - 注意: + - 推荐splitPk使用表主键,因为表主键通常情况下比较均匀,因此切分出来的分片也不容易出现数据热点。 + - 目前splitPk仅支持整形数据切分,`不支持浮点、字符串、日期等其他类型`。如果用户指定其他非支持类型,FlinkX将报错! + - 如果channel大于1但是没有配置此参数,任务将置为失败。 + - 必选:否 + - 默认值:无 + + + +- **fetchSize** + - 描述:读取时每批次读取的数据条数。 + - 注意:此参数的值不可设置过大,否则会读取超时,导致任务失败。 + - 必选:否 + - 默认值:1000 + + + +- **queryTimeOut** + - 描述:查询超时时间,单位秒。 + - 注意:当数据量很大,或者从视图查询,或者自定义sql查询时,可通过此参数指定超时时间。 + - 必选:否 + - 默认值:1000 + + + +- **customSql** + - 描述:自定义的查询语句,如果只指定字段不能满足需求时,可通过此参数指定查询的sql,可以是任意复杂的查询语句。 + - 注意: + - 只能是查询语句,否则会导致任务失败; + - 查询语句返回的字段需要和column列表里的字段对应; + - 当指定了此参数时,connection里指定的table无效; + - 当指定此参数时,column必须指定具体字段信息,不能以*号代替; + - 必选:否 + - 默认值:无 + + + +- **column** + - 描述:需要读取的字段。 + - 格式:支持3种格式
1.读取全部字段,如果字段数量很多,可以使用下面的写法: +```bash +"column":["*"] +``` +2.指定字段名称: +``` +"column":["id","name"] +``` +3.指定具体信息: +```json +"column": [{ + "name": "col", + "type": "datetime", + "format": "yyyy-MM-dd hh:mm:ss", + "value": "value" +}] +``` + + - 属性说明: + - name:字段名称 + - type:字段类型,可以和数据库里的字段类型不一样,程序会做一次类型转换 + - format:如果字段是时间字符串,可以指定时间的格式,将字段类型转为日期格式返回 + - value:如果数据库里不存在指定的字段,则会把value的值作为常量列返回,如果指定的字段存在,当指定字段的值为null时,会以此value值作为默认值返回 + - 必选:是 + - 默认值:无 + + + +- **polling** + - 描述:是否开启间隔轮询,开启后会根据`pollingInterval`轮询间隔时间周期性的从数据库拉取数据。开启间隔轮询还需配置参数`pollingInterval`,`increColumn`,可以选择配置参数`startLocation`。若不配置参数`startLocation`,任务启动时将会从数据库中查询增量字段最大值作为轮询的开始位置。 + - 必选:否 + - 默认值:false + + + +- **pollingInterval** + - 描述:轮询间隔时间,从数据库中拉取数据的间隔时间,默认为5000毫秒。 + - 必选:否 + - 默认值:5000 + + + +- **requestAccumulatorInterval** + - 描述:发送查询累加器请求的间隔时间。 + - 必选:否 + - 默认值:2 + +** + +## 四、配置示例 + +#### 1、基础配置 +```json +{ + "job": { + "content": [{ + "reader": { + "parameter" : { + "column" : [ {"name" : "id", "type": "int"}], + "username" : "gpadmin", + "password" : "gpadmin", + "connection" : [ { + "jdbcUrl" : [ "jdbc:pivotal:greenplum://localhost:5432;DatabaseName=exampledb" ], + "table" : [ "performance" ] + } ], + "where": "", + "customSql": "", + "requestAccumulatorInterval": 2 + }, + "name" : "greenplumreader" + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": true + } + } + }], + "setting": { + "speed": { + "channel": 1, + "bytes": 0 + }, + "errorLimit": { + "record": 100 + } + } + } +} +``` + +#### 2、多通道 +```json +{ + "job": { + "content": [{ + "reader": { + "parameter" : { + "column" : [ {"name" : "id", "type": "int"}], + "username" : "gpadmin", + "password" : "gpadmin", + "connection" : [ { + "jdbcUrl" : [ "jdbc:pivotal:greenplum://localhost:5432;DatabaseName=exampledb" ], + "table" : [ "performance" ] + } ], + "where": "", + "customSql": "", + "requestAccumulatorInterval": 2 + }, + "name" : "greenplumreader" + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": true + } + } + }], + "setting": { + "speed": { + "channel": 3, + "bytes": 0 + }, + "errorLimit": { + "record": 100 + } + } + } +} +``` + +#### 3、 指定customsql +```json +{ + "job": { + "content": [{ + "reader": { + "parameter" : { + "column" : [ {"name" : "id", "type": "int"}], + "username" : "gpadmin", + "password" : "gpadmin", + "connection" : [ { + "jdbcUrl" : [ "jdbc:pivotal:greenplum://localhost:5432;DatabaseName=exampledb" ], + "table" : [ "performance" ] + } ], + "where": "", + "customSql": "select id from performance", + "requestAccumulatorInterval": 2 + }, + "name" : "greenplumreader" + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": true + } + } + }], + "setting": { + "speed": { + "channel": 1, + "bytes": 0 + }, + "errorLimit": { + "record": 100 + } + } + } +} +``` + +#### 4、增量同步指定startLocation +```json +{ + "job": { + "content": [{ + "reader": { + "parameter" : { + "column" : [ {"name" : "id", "type": "int"}], + "username" : "gpadmin", + "password" : "gpadmin", + "connection" : [ { + "jdbcUrl" : [ "jdbc:pivotal:greenplum://localhost:5432;DatabaseName=exampledb" ], + "table" : [ "performance" ] + } ], + "increColumn": "id", + "startLocation": "20", + "where": "", + "customSql": "", + "requestAccumulatorInterval": 2 + }, + "name" : "greenplumreader" + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": true + } + } + }], + "setting": { + "speed": { + "channel": 1, + "bytes": 0 + }, + "errorLimit": { + "record": 100 + } + } + } +} +``` + +#### 5、间隔轮询 +```json +{ + "job": { + "content": [{ + "reader": { + "parameter" : { + "column" : [ {"name" : "id", "type": "int"}], + "username" : "gpadmin", + "password" : "gpadmin", + "connection" : [ { + "jdbcUrl" : [ "jdbc:pivotal:greenplum://localhost:5432;DatabaseName=exampledb" ], + "table" : [ "performance" ] + } ], + "polling": true, + "pollingInterval": 3000, + "customSql": "", + "increColumn": "id", + "startLocation": "20", + "where": "", + "requestAccumulatorInterval": 2 + }, + "name" : "greenplumreader" + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": true + } + } + }], + "setting": { + "speed": { + "channel": 1, + "bytes": 0 + }, + "errorLimit": { + "record": 100 + } + } + } +} +``` + +
说明:表名不应该有'.'号,否则会报如下错误:
`java.sql.SQLException: SQLException: [Pivotal][Greenplum JDBC Driver][Greenplum]cross-database references are not implemented: "public.public.test". ` diff --git a/docs/offline/writer/greenplumwriter.md b/docs/offline/writer/greenplumwriter.md new file mode 100644 index 0000000000..57a069ce88 --- /dev/null +++ b/docs/offline/writer/greenplumwriter.md @@ -0,0 +1,139 @@ +# Greenplum Writer + + +## 一、插件名称 +名称:**greenplumwriter**
+ +## 二、支持的数据源版本 +**Greenplum 5及以上**
** + +## 三、参数说明 + +- **jdbcUrl** + - 描述:针对关系型数据库的jdbc连接字符串 + - 必选:是 + - 默认值:无 + + + +- **username** + - 描述:数据源的用户名 + - 必选:是 + - 默认值:无 + + + +- **password** + - 描述:数据源指定用户名的密码 + - 必选:是 + - 默认值:无 + + + +- **column** + - 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"] + - 必选:是 + - 默认值:否 + - 默认值:无 + + + +- **preSql** + - 描述:写入数据到目的表前,会先执行这里的一组标准语句 + - 必选:否 + - 默认值:无 + + + +- **postSql** + - 描述:写入数据到目的表后,会执行这里的一组标准语句 + - 必选:否 + - 默认值:无 + + + +- **table** + - 描述:目的表的表名称。目前只支持配置单个表,后续会支持多表 + - 必选:是 + - 默认值:无 + + + +- **writeMode** + - 描述:仅支持`insert`操作,可以搭配insertSqlMode使用 + - 必选:是 + - 默认值:无, + + + +- **insertSqlMode** + - 描述:控制写入数据到目标表采用  `COPY table_name [ ( column_name [, ...] ) ] FROM STDIN DELIMITER 'delimiter_character'`语句,提高数据的插入效率 + - 注意: + - 为了避免`insert`过慢带来的问题,此参数被固定为`copy` + - 当指定此参数时,writeMode的值必须为 `insert`,否则设置无效 + - 必选:否 + - 默认值:无 + + + +- **batchSize** + - 描述:一次性批量提交的记录数大小,该值可以极大减少FlinkX与数据库的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成FlinkX运行进程OOM情况 + - 必选:否 + - 默认值:1024 + +** + +## 四、配置示例 + +#### 1、insert with copy mode +```json +{ + "job": { + "content": [{ + "reader": { + "parameter": { + "column": [ + { + "name": "id", + "type": "int", + "value": 1 + } + ], + "sliceRecordCount": ["100"] + }, + "name" : "streamreader" + }, + "writer": { + "name": "greenplumwriter", + "parameter": { + "connection": [{ + "jdbcUrl": "jdbc:pivotal:greenplum://localhost:5432;DatabaseName=exampledb", + "table": ["tbl_pay_log_copy"] + }], + "username": "gpadmin", + "password": "gpadmin", + "column": [ + { + "name": "id", + "type": "int" + }], + "writeMode": "insert", + "insertSqlMode": "copy", + "batchSize": 100, + "preSql": ["TRUNCATE tbl_pay_log_copy"], + "postSql": [] + } + } + }], + "setting": { + "speed": { + "channel": 1, + "bytes": 0 + }, + "errorLimit": { + "record": 100 + } + } + } +} +``` diff --git a/docs/offline/writer/pulsarwriter.md b/docs/offline/writer/pulsarwriter.md deleted file mode 100644 index 413e1e0f30..0000000000 --- a/docs/offline/writer/pulsarwriter.md +++ /dev/null @@ -1,76 +0,0 @@ -# Pulsar写入插件(**writer) - -## 1. 配置样例 - -```json -{ - "job": { - "content": [ - { - "reader": { - - }, - "writer": { - "parameter": { - "producerSettings" : { - "producerName":"test-producer" - }, - "topic" : "pulsar_test", - "pulsarServiceUrl" : "pulsar://127.0.0.1:6650" - - }, - "name": "pulsarwriter" - } - } - ], - "setting": { - "errorLimit": { - "record": 1 - }, - "speed": { - "bytes": 1048576, - "channel": 1 - } - } - } -} -``` - -## 2. 参数说明 - -* **name** - - * 描述:插件名,pulsarwriter。 - - * 必选:是 - - * 默认值:无 - -* **topic** - - * 描述:topic。 - - * 必选:是 - - * 默认值:无 - - -* **pulsarServiceUrl** - - * 描述:pulsar地址列表 - - * 必选:是 - - * 默认值:无 - - - -* **producerSettings** - - * 描述:pulsar生产者配置 - - * 必选:是 - - * 默认值:无 - -参考: https://pulsar.apache.org/docs/en/client-libraries-java/#configure-producer \ No newline at end of file diff --git a/docs/quickstart.md b/docs/quickstart.md index fe581442a0..71779f930d 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -18,7 +18,7 @@ cd flink-1.8.5 ## 编译插件 ```bash -mvn clean package -DskipTests +mvn clean package -Dmaven.test.skip=true ``` ## 运行任务 @@ -232,6 +232,7 @@ bin/flinkx -mode yarnPer \ | **pluginLoadMode** | yarn session模式插件加载方式 | 1.**classpath**:提交任务时不上传插件包,需要在yarn-node节点pluginRoot目录下部署插件包,但任务启动速度较快
2.**shipfile**:提交任务时上传pluginRoot目录下部署插件包的插件包,yarn-node节点不需要部署插件包,任务启动速度取决于插件包的大小及网络环境 | 否 | shipfile | | **confProp** | checkpoint配置 | **flink.checkpoint.interval**:快照生产频率
**flink.checkpoint.stateBackend**:快照存储路径 | 否 | 无 | | **s** | checkpoint快照路径 | | 否 | 无 | +| **p** | 自定义入参,用于替换脚本中的占位符,如脚本中存在占位符${pt1},${pt2},则该参数可配置为pt1=20200101,pt2=20200102| | 否 | 无 | ## 常见问题 diff --git a/docs/realTime/reader/restapireader.md b/docs/realTime/reader/restapireader.md new file mode 100644 index 0000000000..f354c823db --- /dev/null +++ b/docs/realTime/reader/restapireader.md @@ -0,0 +1,75 @@ +# Restapi Reader + + +## 一、插件名称 +名称:restapireader
+ + +## 二、参数说明 + +- **url** + - 描述:连接的url + - 必选:是 + - 默认值:无 + + + +- **method** + - 描述:request的类型,`post`、`get` + - 必选:是 + - 默认值:无 + + + +- header + - 描述:需要添加的报头信息 + - 必选:否 + - 默认值:无 + + + + +## 三、配置示例 +```json +{ + "job": { + "content": [ + { + "reader": { + "parameter": { + "url": "http://kudu3/server/index.php?g=Web&c=Mock&o=mock&projectID=58&uri=/api/tiezhu/test/get", + "body": "", + "method": "get", + "params": "" + } + }, + "writer": { + "parameter": { + "print": true + }, + "name": "streamwriter" + } + } + ], + "setting": { + "restore": { + "isRestore": false, + "isStream": true + }, + "errorLimit": {}, + "speed": { + "bytes": 0, + "channel": 1 + }, + "log": { + "isLogger": false, + "level": "trace", + "path": "", + "pattern": "" + } + } + } +} +``` + + diff --git a/docs/realTime/writer/restapiwriter.md b/docs/realTime/writer/restapiwriter.md new file mode 100644 index 0000000000..6a515b9a1e --- /dev/null +++ b/docs/realTime/writer/restapiwriter.md @@ -0,0 +1,115 @@ +# Restapi Writer + + +## 一、插件名称 +**名称:restapiwriter**
+ +## 二、参数说明 + +- **url** + - 描述:连接的url + - 必选:是 + - 默认值:无 + + + +- **method** + - 描述:request的类型,`post`、`get` + - 必选:是 + - 默认值:无 + + + +- **header** + - 描述:需要添加的报头信息 + - 必选:否 + - 默认值:无 + + + +- **body** + - 描述:发送的数据中包括params + - 必选:否 + - 默认值:无 + + + +- **params** + - 描述:发送的数据中包括params + - 必选:否 + - 默认值:无 + + + +- **column** + - 描述:如果column不为空,那么将数据和字段名一一对应。如果column为空,则返回每个数据的第一个字段。 + - 必选:否 + - 默认值:无 + + + + +## 三、使用示例 +```json +{ + "job": { + "content": [ + { + "reader": { + "parameter": { + "column": [ + { + "name": "id", + "type": "id" + }, + { + "name": "data", + "type": "string" + } + ], + "sliceRecordCount": [ + "100" + ] + }, + "name": "streamreader" + }, + "writer": { + "parameter": { + "url": "http://kudu3/server/index.php?g=Web&c=Mock&o=mock&projectID=58&uri=/api/tiezhu/test/get", + "header": [], + "body": [], + "method": "post", + "params": {}, + "column": ["id","data"] + }, + "name": "restapiwriter" + } + } + ], + "setting": { + "restore": { + "maxRowNumForCheckpoint": 0, + "isRestore": false, + "isStream": true, + "restoreColumnName": "", + "restoreColumnIndex": 0 + }, + "errorLimit": { + "record": 100 + }, + "speed": { + "bytes": 0, + "channel": 1 + }, + "log": { + "isLogger": false, + "level": "debug", + "path": "", + "pattern": "" + } + } + } +} +``` + + diff --git a/flinkx-binlog/flinkx-binlog-core/pom.xml b/flinkx-binlog/flinkx-binlog-core/pom.xml index d16755f37d..ba3a2485dd 100644 --- a/flinkx-binlog/flinkx-binlog-core/pom.xml +++ b/flinkx-binlog/flinkx-binlog-core/pom.xml @@ -15,12 +15,6 @@ - - com.google.guava - guava - 19.0 - - com.alibaba.otter canal.parse diff --git a/flinkx-binlog/flinkx-binlog-reader/pom.xml b/flinkx-binlog/flinkx-binlog-reader/pom.xml index ad9b9893df..302234b486 100644 --- a/flinkx-binlog/flinkx-binlog-reader/pom.xml +++ b/flinkx-binlog/flinkx-binlog-reader/pom.xml @@ -69,6 +69,10 @@ slf4j-api org.slf4j + + guava + com.google.guava + @@ -100,7 +104,11 @@ com.google.common - shade.binlog.com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-binlog/flinkx-binlog-reader/src/main/java/com/dtstack/flinkx/binlog/reader/BinlogConfig.java b/flinkx-binlog/flinkx-binlog-reader/src/main/java/com/dtstack/flinkx/binlog/reader/BinlogConfig.java index 69d6d51add..0fcbdade5b 100644 --- a/flinkx-binlog/flinkx-binlog-reader/src/main/java/com/dtstack/flinkx/binlog/reader/BinlogConfig.java +++ b/flinkx-binlog/flinkx-binlog-reader/src/main/java/com/dtstack/flinkx/binlog/reader/BinlogConfig.java @@ -32,7 +32,7 @@ public class BinlogConfig implements Serializable { public long period = 1000L; - public int bufferSize = 1024; + public int bufferSize = 256; public boolean pavingData = true; diff --git a/flinkx-carbondata/flinkx-carbondata-reader/pom.xml b/flinkx-carbondata/flinkx-carbondata-reader/pom.xml index 111fc621c3..7b38ba5c8b 100644 --- a/flinkx-carbondata/flinkx-carbondata-reader/pom.xml +++ b/flinkx-carbondata/flinkx-carbondata-reader/pom.xml @@ -61,8 +61,12 @@ shade.carbondatareader.io.netty - com.google - shade.carbondatareader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonExpressUtil.java b/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonExpressUtil.java index f655d14934..77dd6dedba 100644 --- a/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonExpressUtil.java +++ b/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonExpressUtil.java @@ -19,7 +19,6 @@ import org.apache.carbondata.core.metadata.datatype.DataType; -import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.expression.ColumnExpression; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.LiteralExpression; @@ -30,7 +29,6 @@ import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression; import org.apache.commons.lang3.StringUtils; -import java.util.ArrayList; import java.util.List; diff --git a/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonFlinkInputSplit.java b/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonFlinkInputSplit.java index 5f440b5f33..91547fdfed 100644 --- a/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonFlinkInputSplit.java +++ b/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbonFlinkInputSplit.java @@ -28,7 +28,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; /** @@ -46,15 +45,24 @@ public class CarbonFlinkInputSplit implements InputSplit { public CarbonFlinkInputSplit(List carbonInputSplits, int splitNumber) throws IOException { this.splitNumber = splitNumber; rawSplits = new ArrayList<>(); - rawSplits.addAll(carbonInputSplits.stream().map(this::carbonSplitToRawSplit).collect(Collectors.toList())); + List list = new ArrayList<>(); + for (CarbonInputSplit carbonInputSplit : carbonInputSplits) { + byte[] bytes = carbonSplitToRawSplit(carbonInputSplit); + list.add(bytes); + } + rawSplits.addAll(list); } public List getCarbonInputSplits() throws IOException { - List carbonInputSplits = rawSplits.stream().map(this::rawSplitToCarbonSplit).collect(Collectors.toList()); + List carbonInputSplits = new ArrayList<>(); + for (byte[] rawSplit : rawSplits) { + CarbonInputSplit carbonInputSplit = rawSplitToCarbonSplit(rawSplit); + carbonInputSplits.add(carbonInputSplit); + } return carbonInputSplits; } - private byte[] carbonSplitToRawSplit(CarbonInputSplit carbonInputSplit) { + private byte[] carbonSplitToRawSplit(CarbonInputSplit carbonInputSplit) throws IOException{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); try { @@ -62,17 +70,14 @@ private byte[] carbonSplitToRawSplit(CarbonInputSplit carbonInputSplit) { } catch (IOException e) { throw new RuntimeException(e); } finally { - try { - baos.close(); - dos.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } + baos.close(); + dos.close(); } + return baos.toByteArray(); } - private CarbonInputSplit rawSplitToCarbonSplit(byte[] rawSplit) { + private CarbonInputSplit rawSplitToCarbonSplit(byte[] rawSplit) throws IOException{ ByteArrayInputStream bais = new ByteArrayInputStream(rawSplit); DataInputStream dis = new DataInputStream(bais); CarbonInputSplit carbonInputSplit = new CarbonInputSplit(); @@ -81,13 +86,10 @@ private CarbonInputSplit rawSplitToCarbonSplit(byte[] rawSplit) { } catch (IOException e) { throw new RuntimeException(e); } finally { - try { - bais.close(); - dis.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } + bais.close(); + dis.close(); } + return carbonInputSplit; } diff --git a/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbondataReader.java b/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbondataReader.java index 6b009c38f8..4942980eeb 100644 --- a/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbondataReader.java +++ b/flinkx-carbondata/flinkx-carbondata-reader/src/main/java/com/dtstack/flinkx/carbondata/reader/CarbondataReader.java @@ -25,6 +25,9 @@ import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -37,6 +40,8 @@ */ public class CarbondataReader extends BaseDataReader { + private static Logger LOG = LoggerFactory.getLogger(CarbondataReader.class); + protected String table; protected String database; @@ -78,7 +83,7 @@ public CarbondataReader(DataTransferConfig config, StreamExecutionEnvironment en columnValue.add((String) sm.get("value")); columnName.add((String) sm.get("name")); } - System.out.println("init column finished"); + LOG.info("init column finished"); } else if (!ConstantValue.STAR_SYMBOL.equals(columns.get(0)) || columns.size() != 1) { throw new IllegalArgumentException("column argument error"); } diff --git a/flinkx-carbondata/flinkx-carbondata-writer/pom.xml b/flinkx-carbondata/flinkx-carbondata-writer/pom.xml index 9b414eafd3..1f45de883a 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/pom.xml +++ b/flinkx-carbondata/flinkx-carbondata-writer/pom.xml @@ -63,8 +63,12 @@ shade.carbondatawriter.io.netty - com.google - shade.carbondatawriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbonOutputFormat.java b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbonOutputFormat.java index cb78b18f62..0dece097c9 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbonOutputFormat.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbonOutputFormat.java @@ -24,7 +24,6 @@ import com.dtstack.flinkx.carbondata.writer.recordwriter.RecordWriterFactory; import com.dtstack.flinkx.exception.WriteRecordException; import com.dtstack.flinkx.outputformat.BaseRichOutputFormat; -import com.dtstack.flinkx.util.DateUtil; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; @@ -215,8 +214,7 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException { @Override protected void writeMultipleRecordsInternal() throws Exception { - // CAN NOT HAPPEN - throw new IllegalArgumentException("It can not happen."); + notSupportBatchWrite("CarbondataWriter"); } @Override diff --git a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbondataOutputFormatBuilder.java b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbondataOutputFormatBuilder.java index b621038fd4..f2ef5d821c 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbondataOutputFormatBuilder.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/CarbondataOutputFormatBuilder.java @@ -89,5 +89,7 @@ protected void checkFormat() { if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){ throw new UnsupportedOperationException("This plugin not support restore from failed state"); } + + notSupportBatchWrite("CarbondataWriter"); } } diff --git a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/DateTimeUtils.java b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/DateTimeUtils.java index 743acbcb23..6999225271 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/DateTimeUtils.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/DateTimeUtils.java @@ -58,6 +58,24 @@ public class DateTimeUtils { public static final int TO_YEAR_ZERO = TO_2001 + 7304850; + public static final int TIMESTAMP_STRING_LENGTH = 19; + + public static final String TIMESTAMP_ZERO_SUFFIX = ".0"; + + public static final int DAY_IN_YEAR_29 = 29; + public static final int DAY_IN_YEAR_31 = 31; + public static final int DAY_IN_YEAR_59 = 59; + public static final int DAY_IN_YEAR_60 = 60; + public static final int DAY_IN_YEAR_90 = 90; + public static final int DAY_IN_YEAR_120 = 120; + public static final int DAY_IN_YEAR_151 = 151; + public static final int DAY_IN_YEAR_181 = 181; + public static final int DAY_IN_YEAR_212 = 212; + public static final int DAY_IN_YEAR_243 = 243; + public static final int DAY_IN_YEAR_273 = 273; + public static final int DAY_IN_YEAR_304 = 304; + public static final int DAY_IN_YEAR_334 = 334; + public static final ThreadLocal THREAD_LOCAL_LOCAL_TIMEZONE = new ThreadLocal() { @Override public TimeZone initialValue() { @@ -91,8 +109,9 @@ public static String timestampToString(long us) { String timestampString = ts.toString(); String formatted = THREAD_LOCAL_TIMESTAMP_FORMAT.get().format(ts); - if(timestampString.length() > 19 && !".0".equals(timestampString.substring(19))) { - formatted += timestampString.substring(19); + if(timestampString.length() > TIMESTAMP_STRING_LENGTH + && !TIMESTAMP_ZERO_SUFFIX.equals(timestampString.substring(TIMESTAMP_STRING_LENGTH))) { + formatted += timestampString.substring(TIMESTAMP_STRING_LENGTH); } return formatted; } @@ -231,33 +250,33 @@ public static int getMonth(int date) { int year = tuple2.getField(0); int dayInYear = tuple2.getField(1); if (isLeapYear(year)) { - if (dayInYear == 60) { + if (dayInYear == DAY_IN_YEAR_60) { return 2; - } else if (dayInYear > 60) { + } else if (dayInYear > DAY_IN_YEAR_60) { dayInYear = dayInYear - 1; } } - if (dayInYear <= 31) { + if (dayInYear <= DAY_IN_YEAR_31) { return 1; - } else if (dayInYear <= 59) { + } else if (dayInYear <= DAY_IN_YEAR_59) { return 2; - } else if (dayInYear <= 90) { + } else if (dayInYear <= DAY_IN_YEAR_90) { return 3; - } else if (dayInYear <= 120) { + } else if (dayInYear <= DAY_IN_YEAR_120) { return 4; - } else if (dayInYear <= 151) { + } else if (dayInYear <= DAY_IN_YEAR_151) { return 5; - } else if (dayInYear <= 181) { + } else if (dayInYear <= DAY_IN_YEAR_181) { return 6; - } else if (dayInYear <= 212) { + } else if (dayInYear <= DAY_IN_YEAR_212) { return 7; - } else if (dayInYear <= 243) { + } else if (dayInYear <= DAY_IN_YEAR_243) { return 8; - } else if (dayInYear <= 273) { + } else if (dayInYear <= DAY_IN_YEAR_273) { return 9; - } else if (dayInYear <= 304) { + } else if (dayInYear <= DAY_IN_YEAR_304) { return 10; - } else if (dayInYear <= 334) { + } else if (dayInYear <= DAY_IN_YEAR_334) { return 11; } else { return 12; @@ -274,39 +293,38 @@ public static int getDayOfMonth(int date) { int year = tuple2.getField(0); int dayInYear = tuple2.getField(1); if (isLeapYear(year)) { - if (dayInYear == 60) { - return 29; - } else if (dayInYear > 60) { + if (dayInYear == DAY_IN_YEAR_60) { + return DAY_IN_YEAR_29; + } else if (dayInYear > DAY_IN_YEAR_60) { dayInYear = dayInYear - 1; } } - if (dayInYear <= 31) { + if (dayInYear <= DAY_IN_YEAR_31) { return dayInYear; - } else if (dayInYear <= 59) { - return dayInYear - 31; - } else if (dayInYear <= 90) { - return dayInYear - 59; - } else if (dayInYear <= 120) { - return dayInYear - 90; - } else if (dayInYear <= 151) { - return dayInYear - 120; - } else if (dayInYear <= 181) { - return dayInYear - 151; - } else if (dayInYear <= 212) { - return dayInYear - 181; - } else if (dayInYear <= 243) { - return dayInYear - 212; - } else if (dayInYear <= 273) { - return dayInYear - 243; - } else if (dayInYear <= 304) { - return dayInYear - 273; - } else if (dayInYear <= 334) { - return dayInYear - 304; + } else if (dayInYear <= DAY_IN_YEAR_59) { + return dayInYear - DAY_IN_YEAR_31; + } else if (dayInYear <= DAY_IN_YEAR_90) { + return dayInYear - DAY_IN_YEAR_59; + } else if (dayInYear <= DAY_IN_YEAR_120) { + return dayInYear - DAY_IN_YEAR_90; + } else if (dayInYear <= DAY_IN_YEAR_151) { + return dayInYear - DAY_IN_YEAR_120; + } else if (dayInYear <= DAY_IN_YEAR_181) { + return dayInYear - DAY_IN_YEAR_151; + } else if (dayInYear <= DAY_IN_YEAR_212) { + return dayInYear - DAY_IN_YEAR_181; + } else if (dayInYear <= DAY_IN_YEAR_243) { + return dayInYear - DAY_IN_YEAR_212; + } else if (dayInYear <= DAY_IN_YEAR_273) { + return dayInYear - DAY_IN_YEAR_243; + } else if (dayInYear <= DAY_IN_YEAR_304) { + return dayInYear - DAY_IN_YEAR_273; + } else if (dayInYear <= DAY_IN_YEAR_334) { + return dayInYear - DAY_IN_YEAR_304; } else { - return dayInYear - 334; + return dayInYear - DAY_IN_YEAR_334; } - } diff --git a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/ExternalCatalogUtils.java b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/ExternalCatalogUtils.java index 1b83ff80fe..dbff692bbb 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/ExternalCatalogUtils.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/dict/ExternalCatalogUtils.java @@ -68,11 +68,4 @@ public static String escapePathName(String path) { private static boolean needsEscaping(char c) { return c >= 0 && c < charToEscape.size() && charToEscape.get(c); } - - public static void main(String[] args) { - String s = "2018-01-01 08:08:08"; - String d = escapePathName(s); - System.out.println(d); - } - } diff --git a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/AbstractRecordWriter.java b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/AbstractRecordWriter.java index 188601b450..63e01e580c 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/AbstractRecordWriter.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/AbstractRecordWriter.java @@ -39,11 +39,22 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapreduce.*; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.TaskID; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import java.io.IOException; -import java.util.*; +import java.util.List; +import java.util.ArrayList; +import java.util.Random; +import java.util.UUID; +import java.util.Map; +import java.util.Collections; +import java.util.HashMap; /** * Abstract record writer wrapper diff --git a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/CarbonPartitionRecordWriter.java b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/CarbonPartitionRecordWriter.java index 3a3236ca76..b2a6b20d30 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/CarbonPartitionRecordWriter.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/CarbonPartitionRecordWriter.java @@ -21,10 +21,7 @@ import com.dtstack.flinkx.carbondata.writer.dict.CarbonTypeConverter; -import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.StringUtil; import org.apache.carbondata.core.metadata.datatype.DataType; -import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.schema.PartitionInfo; import org.apache.carbondata.core.metadata.schema.partition.PartitionType; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; diff --git a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/HivePartitionRecordWriter.java b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/HivePartitionRecordWriter.java index 38dc8e42e8..a74982de1b 100644 --- a/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/HivePartitionRecordWriter.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/main/java/com/dtstack/flinkx/carbondata/writer/recordwriter/HivePartitionRecordWriter.java @@ -34,11 +34,21 @@ import org.apache.carbondata.processing.util.CarbonLoaderUtil; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.*; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.TaskID; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import java.io.IOException; -import java.util.*; +import java.util.List; +import java.util.Map; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Random; +import java.util.UUID; import java.util.stream.Collectors; diff --git a/flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/IDecode.java b/flinkx-carbondata/flinkx-carbondata-writer/src/test/java/com/dtstack/flinkx/carbondata/writer/dict/DateTimeUtilsTest.java similarity index 73% rename from flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/IDecode.java rename to flinkx-carbondata/flinkx-carbondata-writer/src/test/java/com/dtstack/flinkx/carbondata/writer/dict/DateTimeUtilsTest.java index ce63e6fd15..11a8f7136e 100644 --- a/flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/IDecode.java +++ b/flinkx-carbondata/flinkx-carbondata-writer/src/test/java/com/dtstack/flinkx/carbondata/writer/dict/DateTimeUtilsTest.java @@ -6,32 +6,29 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - *

+ * * http://www.apache.org/licenses/LICENSE-2.0 - *

+ * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ -package com.dtstack.flinkx.emqx.decoder; -import java.util.Map; + +package com.dtstack.flinkx.carbondata.writer.dict; + +import org.junit.Test; /** - * Date: 2020/02/12 - * Company: www.dtstack.com - * - * @author tudou + * @author jiangbo + * @date 2020/3/27 */ -public interface IDecode { - - /** - * 消息解码 - * @param message - * @return - */ - Map decode(String message); +public class DateTimeUtilsTest { + @Test + public void testGetMonth() { + DateTimeUtils.getMonth(10); + } } diff --git a/flinkx-cassandra/flinkx-cassandra-core/pom.xml b/flinkx-cassandra/flinkx-cassandra-core/pom.xml index 6dc4304d67..86b7f12abf 100644 --- a/flinkx-cassandra/flinkx-cassandra-core/pom.xml +++ b/flinkx-cassandra/flinkx-cassandra-core/pom.xml @@ -1,6 +1,6 @@ - flinkx-cassandra diff --git a/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraConfigKeys.java b/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraConfigKeys.java index 072a2b0096..a5a67df83d 100644 --- a/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraConfigKeys.java +++ b/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraConfigKeys.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.cassandra; /** diff --git a/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraUtil.java b/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraUtil.java index fa89243336..bc0e631faa 100644 --- a/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraUtil.java +++ b/flinkx-cassandra/flinkx-cassandra-core/src/main/java/com/dtstack/flinkx/cassandra/CassandraUtil.java @@ -32,7 +32,6 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.net.InetAddress; -import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.sql.Time; import java.util.Date; @@ -222,7 +221,7 @@ private static Optional objectToBytes(T obj){ * @param pos 位置 * @param sqlType cql类型 * @param value 值 - * @throws RuntimeException 对于不支持的数据类型,抛出异常 + * @throws Exception 对于不支持的数据类型,抛出异常 */ public static void bindColumn(BoundStatement ps, int pos, DataType sqlType, Object value) throws Exception { if (value != null) { diff --git a/flinkx-cassandra/flinkx-cassandra-core/src/test/java/com/dtstack/flinkx/cassandra/TestCassandraUtil.java b/flinkx-cassandra/flinkx-cassandra-core/src/test/java/com/dtstack/flinkx/cassandra/TestCassandraUtil.java index 179e420193..6d5dde5398 100644 --- a/flinkx-cassandra/flinkx-cassandra-core/src/test/java/com/dtstack/flinkx/cassandra/TestCassandraUtil.java +++ b/flinkx-cassandra/flinkx-cassandra-core/src/test/java/com/dtstack/flinkx/cassandra/TestCassandraUtil.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.cassandra; import com.datastax.driver.core.*; diff --git a/flinkx-cassandra/flinkx-cassandra-reader/pom.xml b/flinkx-cassandra/flinkx-cassandra-reader/pom.xml index feca24f0f9..804af7c5d6 100644 --- a/flinkx-cassandra/flinkx-cassandra-reader/pom.xml +++ b/flinkx-cassandra/flinkx-cassandra-reader/pom.xml @@ -1,6 +1,6 @@ - flinkx-cassandra diff --git a/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraConstants.java b/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraConstants.java index cd8da1f553..df6dd94a03 100644 --- a/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraConstants.java +++ b/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraConstants.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.cassandra.reader; /** diff --git a/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraInputFormat.java b/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraInputFormat.java index f5fdbe4ae2..52f1311861 100644 --- a/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraInputFormat.java +++ b/flinkx-cassandra/flinkx-cassandra-reader/src/main/java/com/dtstack/flinkx/cassandra/reader/CassandraInputFormat.java @@ -20,7 +20,6 @@ import com.datastax.driver.core.*; import com.dtstack.flinkx.cassandra.CassandraUtil; -import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.inputformat.BaseRichInputFormat; import com.dtstack.flinkx.reader.MetaColumn; import com.google.common.base.Preconditions; @@ -97,7 +96,6 @@ public Row nextRecordInternal(Row row) { Object value = CassandraUtil.getData(cqlRow, definitions.get(i).getType(), definitions.get(i).getName()); row.setField(i, value); } - LOG.info(row.toString()); return row; } @@ -108,7 +106,7 @@ protected void closeInternal() { } @Override - public InputSplit[] createInputSplitsInternal(int minNumSplits) { + protected InputSplit[] createInputSplitsInternal(int minNumSplits) { ArrayList splits = new ArrayList<>(); try { @@ -130,12 +128,12 @@ public InputSplit[] createInputSplitsInternal(int minNumSplits) { private InputSplit[] splitJob(int minNumSplits, ArrayList splits) { if(minNumSplits <= 1) { splits.add(new CassandraInputSplit()); - return splits.toArray(new CassandraInputSplit[splits.size()]); + return splits.toArray(new CassandraInputSplit[0]); } if(whereString != null && whereString.toLowerCase().contains(CassandraConstants.TOKEN)) { splits.add(new CassandraInputSplit()); - return splits.toArray(new CassandraInputSplit[splits.size()]); + return splits.toArray(new CassandraInputSplit[0]); } Session session = CassandraUtil.getSession(cassandraConfig, ""); String partitioner = session.getCluster().getMetadata().getPartitioner(); @@ -152,8 +150,7 @@ private InputSplit[] splitJob(int minNumSplits, ArrayList s } splits.add(new CassandraInputSplit(l.toString(), r.toString())); } - } - else if(partitioner.endsWith(CassandraConstants.MURMUR3_PARTITIONER)) { + }else if(partitioner.endsWith(CassandraConstants.MURMUR3_PARTITIONER)) { BigDecimal minToken = BigDecimal.valueOf(Long.MIN_VALUE); BigDecimal maxToken = BigDecimal.valueOf(Long.MAX_VALUE); BigDecimal step = maxToken.subtract(minToken) @@ -166,11 +163,10 @@ else if(partitioner.endsWith(CassandraConstants.MURMUR3_PARTITIONER)) { } splits.add(new CassandraInputSplit(String.valueOf(l), String.valueOf(r))); } - } - else { + }else { splits.add(new CassandraInputSplit()); } - return splits.toArray(new CassandraInputSplit[splits.size()]); + return splits.toArray(new CassandraInputSplit[0]); } /** @@ -181,7 +177,7 @@ else if(partitioner.endsWith(CassandraConstants.MURMUR3_PARTITIONER)) { private String getQueryString(CassandraInputSplit inputSplit) { StringBuilder columns = new StringBuilder(); if (columnMeta == null) { - columns.append(ConstantValue.STAR_SYMBOL); + columns.append("*"); } else { for(MetaColumn column : columnMeta) { if(columns.length() > 0 ) { diff --git a/flinkx-cassandra/flinkx-cassandra-writer/pom.xml b/flinkx-cassandra/flinkx-cassandra-writer/pom.xml index 565ef3ca4f..73593ae1c9 100644 --- a/flinkx-cassandra/flinkx-cassandra-writer/pom.xml +++ b/flinkx-cassandra/flinkx-cassandra-writer/pom.xml @@ -1,6 +1,6 @@ - flinkx-cassandra diff --git a/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormat.java b/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormat.java index f16eca03ab..a280dbf059 100644 --- a/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormat.java +++ b/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormat.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.cassandra.writer; import com.datastax.driver.core.*; @@ -102,7 +119,6 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException { throw new WriteRecordException("类型转换失败", e.getCause(), i, row); } } - LOG.info("insertSql: {}" + boundStatement); session.execute(boundStatement); } @@ -110,12 +126,14 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException { protected void writeMultipleRecordsInternal() throws Exception { if (batchSize > 1) { BoundStatement boundStatement = pstmt.bind(); - for (Row row : rows) { - for (int i = 0; i < columnMeta.size(); i++) { - Object value = row.getField(i); - CassandraUtil.bindColumn(boundStatement, i, columnTypes.get(i), value); + for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) { + for (int columnIndex = 0; columnIndex < columnMeta.size(); columnIndex++) { + Object value = rows.get(rowIndex).getField(columnIndex); + CassandraUtil.bindColumn(boundStatement, columnIndex, columnTypes.get(columnIndex), value); + } + if ((rowIndex % 1000) == 0) { + LOG.info("insertSql: {}", boundStatement); } - LOG.info("insertSql: {}" + boundStatement); if(asyncWrite) { unConfirmedWrite.add(session.executeAsync(boundStatement)); if (unConfirmedWrite.size() >= batchSize) { @@ -152,7 +170,7 @@ protected void writeMultipleRecordsInternal() throws Exception { } @Override - public void closeInternal() throws IOException { + public void closeInternal() { CassandraUtil.close(session); } } diff --git a/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormatBuilder.java b/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormatBuilder.java index 4446f1633f..56a170811e 100644 --- a/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormatBuilder.java +++ b/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraOutputFormatBuilder.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.cassandra.writer; import com.dtstack.flinkx.outputformat.BaseRichOutputFormatBuilder; diff --git a/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraWriter.java b/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraWriter.java index 2e2f0bbc9c..1e89082846 100644 --- a/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraWriter.java +++ b/flinkx-cassandra/flinkx-cassandra-writer/src/main/java/com/dtstack/flinkx/cassandra/writer/CassandraWriter.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.cassandra.writer; import com.dtstack.flinkx.config.DataTransferConfig; @@ -13,8 +30,6 @@ import java.util.Map; import static com.dtstack.flinkx.cassandra.CassandraConfigKeys.*; -import static com.dtstack.flinkx.cassandra.CassandraConfigKeys.KEY_CONSITANCY_LEVEL; -import static com.dtstack.flinkx.cassandra.CassandraConfigKeys.KEY_MAX_PENDING_CONNECTION; /** * diff --git a/flinkx-cassandra/pom.xml b/flinkx-cassandra/pom.xml index d788a4bea1..917b1dc9f8 100644 --- a/flinkx-cassandra/pom.xml +++ b/flinkx-cassandra/pom.xml @@ -1,6 +1,6 @@ - flinkx-all diff --git a/flinkx-clickhouse/flinkx-clickhouse-core/src/main/java/com/dtstack/flinkx/clickhouse/core/ClickhouseUtil.java b/flinkx-clickhouse/flinkx-clickhouse-core/src/main/java/com/dtstack/flinkx/clickhouse/core/ClickhouseUtil.java index b33160d10d..c6103d9dfa 100644 --- a/flinkx-clickhouse/flinkx-clickhouse-core/src/main/java/com/dtstack/flinkx/clickhouse/core/ClickhouseUtil.java +++ b/flinkx-clickhouse/flinkx-clickhouse-core/src/main/java/com/dtstack/flinkx/clickhouse/core/ClickhouseUtil.java @@ -23,6 +23,7 @@ import java.sql.Connection; import java.sql.SQLException; +import java.sql.Statement; import java.util.Properties; /** @@ -43,8 +44,10 @@ public static Connection getConnection(String url, String username, String passw for (int i = 0; i < MAX_RETRY_TIMES && failed; ++i) { try { conn = new BalancedClickhouseDataSource(url, properties).getConnection(); - conn.createStatement().execute("select 111"); - failed = false; + try (Statement statement = conn.createStatement()) { + statement.execute("select 111"); + failed = false; + } } catch (Exception e) { if (conn != null) { conn.close(); diff --git a/flinkx-clickhouse/flinkx-clickhouse-reader/pom.xml b/flinkx-clickhouse/flinkx-clickhouse-reader/pom.xml index 01d798e35d..4c5ef8cf4c 100644 --- a/flinkx-clickhouse/flinkx-clickhouse-reader/pom.xml +++ b/flinkx-clickhouse/flinkx-clickhouse-reader/pom.xml @@ -56,6 +56,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-clickhouse/flinkx-clickhouse-writer/pom.xml b/flinkx-clickhouse/flinkx-clickhouse-writer/pom.xml index 94e72635a6..8c3d65ad12 100644 --- a/flinkx-clickhouse/flinkx-clickhouse-writer/pom.xml +++ b/flinkx-clickhouse/flinkx-clickhouse-writer/pom.xml @@ -56,6 +56,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-core/pom.xml b/flinkx-core/pom.xml index 223aae3824..a0bd729335 100644 --- a/flinkx-core/pom.xml +++ b/flinkx-core/pom.xml @@ -110,33 +110,6 @@ ${flink.version} - - junit - junit - 4.11 - test - - - - org.powermock - powermock-api-mockito - 1.7.0 - test - - - - org.powermock - powermock-module-junit4 - 1.7.0 - test - - - - org.hamcrest - hamcrest-all - 1.3 - test - org.apache.httpcomponents httpcore @@ -200,6 +173,10 @@ org.slf4j:* org.apache.httpcomponents:* io.prometheus:* + org.apache.avro:* + org.codehaus.jackson:* + com.fasterxml.jackson.core:* + commons-*:* @@ -207,6 +184,10 @@ com.google.common shade.core.com.google.common + + com.google.thirdparty + shade.core.com.google.thirdparty + org.apache.http shade.core.org.apache.http diff --git a/flinkx-core/src/main/java/com/dtstack/flink/api/java/MyLocalStreamEnvironment.java b/flinkx-core/src/main/java/com/dtstack/flink/api/java/MyLocalStreamEnvironment.java index 56fb01de1c..c3dcf4ca9e 100644 --- a/flinkx-core/src/main/java/com/dtstack/flink/api/java/MyLocalStreamEnvironment.java +++ b/flinkx-core/src/main/java/com/dtstack/flink/api/java/MyLocalStreamEnvironment.java @@ -22,7 +22,6 @@ import org.apache.flink.api.common.InvalidProgramException; import org.apache.flink.api.common.JobExecutionResult; import org.apache.flink.api.java.ExecutionEnvironment; -import org.apache.flink.configuration.ConfigConstants; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.RestOptions; import org.apache.flink.configuration.TaskManagerOptions; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java index bcac73e949..b36613bdb7 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/Main.java @@ -30,7 +30,7 @@ import com.dtstack.flinkx.writer.DataWriterFactory; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.io.Charsets; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.JobExecutionResult; import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.api.common.time.Time; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/KerberosUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/KerberosUtil.java index c89f0b2274..8fde360336 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/KerberosUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/KerberosUtil.java @@ -46,6 +46,7 @@ public class KerberosUtil { private static final String SP = "/"; private static final String KEY_SFTP_CONF = "sftpConf"; + private static final String KEY_PRINCIPAL = "principal"; private static final String KEY_REMOTE_DIR = "remoteDir"; private static final String KEY_USE_LOCAL_FILE = "useLocalFile"; public static final String KEY_PRINCIPAL_FILE = "principalFile"; @@ -89,6 +90,15 @@ public static UserGroupInformation loginAndReturnUgi(Configuration conf, String return UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab); } + public static String getPrincipal(Map configMap, String keytabPath) { + String principal = MapUtils.getString(configMap, KEY_PRINCIPAL); + if (StringUtils.isEmpty(principal)) { + principal = findPrincipalFromKeytab(keytabPath); + } + + return principal; + } + private static void reloadKrb5Conf(Configuration conf){ String krb5File = conf.get(KEY_JAVA_SECURITY_KRB5_CONF); LOG.info("set krb5 file:{}", krb5File); @@ -188,7 +198,7 @@ private static String loadFromSftp(Map config, String fileName){ } } - public static String findPrincipalFromKeytab(String keytabFile) { + private static String findPrincipalFromKeytab(String keytabFile) { KeyTab keyTab = KeyTab.getInstance(keytabFile); for (KeyTabEntry entry : keyTab.getEntries()) { String principal = entry.getService().getName(); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/SftpHandler.java b/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/SftpHandler.java index 90c0e127fb..dc1a70a1b4 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/SftpHandler.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/authenticate/SftpHandler.java @@ -20,13 +20,18 @@ package com.dtstack.flinkx.authenticate; import com.dtstack.flinkx.util.RetryUtil; -import com.jcraft.jsch.*; +import com.jcraft.jsch.ChannelSftp; +import com.jcraft.jsch.JSch; +import com.jcraft.jsch.Session; +import com.jcraft.jsch.SftpException; import org.apache.commons.collections.MapUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; import java.util.Map; import java.util.Properties; import java.util.concurrent.Callable; @@ -131,21 +136,11 @@ private void downloadFile(String ftpPath, String localPath){ throw new RuntimeException("File not exist on sftp:" + ftpPath); } - OutputStream os = null; - try { - os = new FileOutputStream(new File(localPath)); + try (OutputStream os = new FileOutputStream(new File(localPath))){ channelSftp.get(ftpPath, os); + os.flush(); } catch (Exception e){ throw new RuntimeException("download file from sftp error", e); - } finally { - if(os != null){ - try { - os.flush(); - os.close(); - } catch (IOException e) { - LOG.warn("", e); - } - } } } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java index 9cb8f0220a..a6276ff42a 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/ClassLoaderManager.java @@ -25,7 +25,13 @@ import java.net.URL; import java.net.URLClassLoader; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Arrays; import java.util.concurrent.ConcurrentHashMap; /** diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/PluginUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/PluginUtil.java index fd51f7e65c..efee645720 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/PluginUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/classloader/PluginUtil.java @@ -24,9 +24,7 @@ import java.io.File; import java.net.MalformedURLException; import java.net.URL; -import java.util.ArrayList; import java.util.HashSet; -import java.util.List; import java.util.Set; /** diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/config/AbstractConfig.java b/flinkx-core/src/main/java/com/dtstack/flinkx/config/AbstractConfig.java index df46175089..087c5302bd 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/config/AbstractConfig.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/config/AbstractConfig.java @@ -76,9 +76,9 @@ public Object getVal(String key) { Object obj = internalMap.get(key); if (obj instanceof LinkedTreeMap) { LinkedTreeMap treeMap = (LinkedTreeMap) obj; - Map newMap = new HashMap<>(treeMap.size()); - newMap.putAll(treeMap); - return newMap; + Map map = new HashMap<>(Math.max((int) (treeMap.size()/.75f) + 1, 16)); + map.putAll(treeMap); + return map; } return obj; } @@ -109,10 +109,10 @@ public int getIntVal(String key, int defaultValue) { return defaultValue; } if(ret instanceof Integer) { - return ((Integer)ret).intValue(); + return (Integer) ret; } if(ret instanceof String) { - return Integer.valueOf((String)ret).intValue(); + return Integer.parseInt((String) ret); } if(ret instanceof Long) { return ((Long)ret).intValue(); @@ -144,7 +144,7 @@ public long getLongVal(String key, long defaultValue) { return ((Integer)ret).longValue(); } if(ret instanceof String) { - return Long.valueOf((String)ret); + return Long.parseLong((String)ret); } if(ret instanceof Float) { return ((Float)ret).longValue(); @@ -176,7 +176,7 @@ public double getDoubleVal(String key, double defaultValue) { return ((Integer) ret).doubleValue(); } if (ret instanceof String) { - return Double.valueOf((String) ret); + return Double.parseDouble((String) ret); } if (ret instanceof Float) { return ((Float) ret).doubleValue(); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/config/DataTransferConfig.java b/flinkx-core/src/main/java/com/dtstack/flinkx/config/DataTransferConfig.java index 5e58d85e5a..f3e732da0b 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/config/DataTransferConfig.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/config/DataTransferConfig.java @@ -90,7 +90,7 @@ private static void checkConfig(DataTransferConfig config) { Preconditions.checkNotNull(readerParameter, "Must specify parameter for reader"); - // 检查我writer配置 + // 检查writer配置 WriterConfig writerConfig = content.getWriter(); Preconditions.checkNotNull(writerConfig, "Must specify a writer element"); Preconditions.checkNotNull(writerConfig.getName(), "Must specify the writer name"); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestartConfig.java b/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestartConfig.java index 35068840ab..f83a394399 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestartConfig.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestartConfig.java @@ -49,7 +49,7 @@ public RestartConfig(Map map) { } public static RestartConfig defaultConfig(){ - Map map = new HashMap<>(1); + Map map = new HashMap<>(16); return new RestartConfig(map); } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestoreConfig.java b/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestoreConfig.java index c85c1bb376..ef886bf7e7 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestoreConfig.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/config/RestoreConfig.java @@ -18,7 +18,7 @@ package com.dtstack.flinkx.config; -import java.util.HashMap; +import java.util.Collections; import java.util.Map; /** @@ -40,8 +40,7 @@ public RestoreConfig(Map map) { } public static RestoreConfig defaultConfig(){ - Map map = new HashMap<>(1); - map.put(KEY_IS_RESTORE, false); + Map map = Collections.singletonMap(KEY_IS_RESTORE, false); return new RestoreConfig(map); } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/config/TestConfig.java b/flinkx-core/src/main/java/com/dtstack/flinkx/config/TestConfig.java index 2a15601654..dbcd774858 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/config/TestConfig.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/config/TestConfig.java @@ -37,7 +37,7 @@ public TestConfig(Map map) { } public static TestConfig defaultConfig(){ - Map map = new HashMap<>(1); + Map map = new HashMap<>(16); return new TestConfig(map); } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConstantValue.java b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConstantValue.java index 46de3749fb..d94589fbc6 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConstantValue.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/constants/ConstantValue.java @@ -51,4 +51,15 @@ public class ConstantValue { public static final String SYSTEM_PROPERTIES_KEY_FILE_ENCODING = "file.encoding"; public static final String OS_WINDOWS = "windows"; + + public static final String TIME_SECOND_SUFFIX = "sss"; + public static final String TIME_MILLISECOND_SUFFIX = "SSS"; + + public static final String FILE_SUFFIX_XML = ".xml"; + + public static final int MAX_BATCH_SIZE = 200000; + + public static final long STORE_SIZE_G = 1024L * 1024 * 1024; + + public static final long STORE_SIZE_M = 1024L * 1024; } diff --git a/flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/PlainDecoder.java b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/DecodeEnum.java similarity index 69% rename from flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/PlainDecoder.java rename to flinkx-core/src/main/java/com/dtstack/flinkx/decoder/DecodeEnum.java index 63d92b8463..ce360c6bc0 100644 --- a/flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/PlainDecoder.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/DecodeEnum.java @@ -6,31 +6,41 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - *

+ * * http://www.apache.org/licenses/LICENSE-2.0 - *

+ * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ -package com.dtstack.flinkx.emqx.decoder; -import java.util.Collections; -import java.util.Map; +package com.dtstack.flinkx.decoder; /** - * Date: 2020/02/12 - * Company: www.dtstack.com - * - * @author tudou + * @author jiangbo + * @date 2020/3/23 */ -public class PlainDecoder implements IDecode { +public enum DecodeEnum { + + /** + * json format + */ + JSON("json"), - @Override - public Map decode(final String message) { - return Collections.singletonMap("message", message); + /** + * text format + */ + PLAIN("plain"); + + private String name; + + DecodeEnum(String name) { + this.name = name; } + public String getName() { + return name; + } } diff --git a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/IDecode.java b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/IDecode.java similarity index 96% rename from flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/IDecode.java rename to flinkx-core/src/main/java/com/dtstack/flinkx/decoder/IDecode.java index 0d117d3956..5b195bd668 100644 --- a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/IDecode.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/IDecode.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.dtstack.flinkx.kafkabase.decoder; +package com.dtstack.flinkx.decoder; import java.util.Map; diff --git a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/JsonDecoder.java b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/JsonDecoder.java similarity index 97% rename from flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/JsonDecoder.java rename to flinkx-core/src/main/java/com/dtstack/flinkx/decoder/JsonDecoder.java index 8d6fad179e..10a954e601 100644 --- a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/JsonDecoder.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/JsonDecoder.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.dtstack.flinkx.kafkabase.decoder; +package com.dtstack.flinkx.decoder; import org.codehaus.jackson.map.ObjectMapper; import org.slf4j.Logger; @@ -31,12 +31,10 @@ * @author tudou */ public class JsonDecoder implements IDecode { + private static final String KEY_MESSAGE = "message"; private static Logger LOG = LoggerFactory.getLogger(JsonDecoder.class); - private static ObjectMapper objectMapper = new ObjectMapper(); - private static final String KEY_MESSAGE = "message"; - @Override @SuppressWarnings("unchecked") public Map decode(final String message) { diff --git a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/PlainDecoder.java b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/PlainDecoder.java similarity index 96% rename from flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/PlainDecoder.java rename to flinkx-core/src/main/java/com/dtstack/flinkx/decoder/PlainDecoder.java index 5a2a027369..63771cfd6e 100644 --- a/flinkx-kb/flinkx-kb-core/src/main/java/com/dtstack/flinkx/kafkabase/decoder/PlainDecoder.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/decoder/PlainDecoder.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.dtstack.flinkx.kafkabase.decoder; +package com.dtstack.flinkx.decoder; import java.util.Collections; import java.util.Map; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EDatabaseType.java b/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EDatabaseType.java index 475f6923d1..8de25bd051 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EDatabaseType.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/enums/EDatabaseType.java @@ -33,6 +33,7 @@ public enum EDatabaseType { SQLServer, Oracle, PostgreSQL, + Greenplum, DB2, MongoDB, Redis, diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/BaseRichInputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/BaseRichInputFormat.java index 1ce9844175..2c55ee3a19 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/BaseRichInputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/inputformat/BaseRichInputFormat.java @@ -28,7 +28,6 @@ import com.dtstack.flinkx.metrics.CustomPrometheusReporter; import com.dtstack.flinkx.reader.ByteRateLimiter; import com.dtstack.flinkx.restore.FormatState; -import org.apache.commons.lang.StringUtils; import com.dtstack.flinkx.util.ExceptionUtil; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.io.DefaultInputSplitAssigner; @@ -376,14 +375,13 @@ public RestoreConfig getRestoreConfig() { return restoreConfig; } - public void setLogConfig(LogConfig logConfig) { - this.logConfig = logConfig; - } - public void setRestoreConfig(RestoreConfig restoreConfig) { this.restoreConfig = restoreConfig; } + public void setLogConfig(LogConfig logConfig) { + this.logConfig = logConfig; + } public void setTestConfig(TestConfig testConfig) { this.testConfig = testConfig; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/AccumulatorCollector.java b/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/AccumulatorCollector.java index c701707ade..a1f67a983f 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/AccumulatorCollector.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/AccumulatorCollector.java @@ -25,7 +25,7 @@ import com.google.common.collect.Lists; import com.google.gson.Gson; import com.google.gson.internal.LinkedTreeMap; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.functions.RuntimeContext; import org.apache.http.impl.client.CloseableHttpClient; @@ -35,8 +35,13 @@ import org.slf4j.LoggerFactory; import java.io.InputStream; -import java.util.*; -import java.util.concurrent.*; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; /** * Regularly get statistics from the flink API @@ -247,7 +252,7 @@ private void checkErrorTimes() { } } - class ValueAccumulator{ + static class ValueAccumulator{ private long global; private LongCounter local; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/CustomPrometheusReporter.java b/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/CustomPrometheusReporter.java index 3735cc7c64..dfe35d84fe 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/CustomPrometheusReporter.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/metrics/CustomPrometheusReporter.java @@ -27,7 +27,13 @@ import org.apache.flink.api.common.accumulators.Accumulator; import org.apache.flink.api.common.functions.RuntimeContext; import org.apache.flink.configuration.Configuration; -import org.apache.flink.metrics.*; +import org.apache.flink.metrics.CharacterFilter; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.Gauge; +import org.apache.flink.metrics.Histogram; +import org.apache.flink.metrics.Meter; +import org.apache.flink.metrics.Metric; +import org.apache.flink.metrics.MetricGroup; import org.apache.flink.runtime.execution.Environment; import org.apache.flink.runtime.metrics.groups.AbstractMetricGroup; import org.apache.flink.runtime.metrics.groups.FrontMetricGroup; @@ -39,7 +45,14 @@ import java.io.IOException; import java.lang.reflect.Field; -import java.util.*; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.regex.Pattern; /** @@ -121,7 +134,6 @@ public void open() { if (StringUtils.isNullOrWhitespaceOnly(host) || port < 1) { return; -// throw new IllegalArgumentException("Invalid host/port configuration. Host: " + host + " Port: " + port); } if (randomSuffix) { @@ -190,6 +202,10 @@ private void notifyOfAddedMetric(final Metric metric, final String metricName, f count = collectorWithCount.getValue(); } else { collector = createCollector(metric, dimensionKeys, dimensionValues, scopedMetricName, helpString); + if (null == collector) { + return; + } + try { collector.register(defaultRegistry); } catch (Exception e) { diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/options/OptionParser.java b/flinkx-core/src/main/java/com/dtstack/flinkx/options/OptionParser.java index f5991f1f9c..c46d451f48 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/options/OptionParser.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/options/OptionParser.java @@ -100,19 +100,15 @@ public List getProgramExeArgList() throws Exception { continue; }else if(OPTION_JOB.equalsIgnoreCase(key)){ File file = new File(value.toString()); - FileInputStream in = new FileInputStream(file); - byte[] filecontent = new byte[(int) file.length()]; - in.read(filecontent); - value = new String(filecontent, Charsets.UTF_8.name()); + try (FileInputStream in = new FileInputStream(file)) { + byte[] filecontent = new byte[(int) file.length()]; + in.read(filecontent); + value = new String(filecontent, Charsets.UTF_8.name()); + } } args.add("-" + key); args.add(value.toString()); } return args; } - - private void printUsage() { - System.out.print(options.toString()); - } - } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/options/Options.java b/flinkx-core/src/main/java/com/dtstack/flinkx/options/Options.java index 0123d3f955..26a1189ecd 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/options/Options.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/options/Options.java @@ -64,9 +64,9 @@ public class Options { @OptionRequired(description = "env properties") private String confProp = "{}"; - /** - * savepoint - */ + @OptionRequired(description = "json modify") + private String p = ""; + @OptionRequired(description = "savepoint path") private String s; @@ -195,4 +195,12 @@ public String getPluginLoadMode() { public void setPluginLoadMode(String pluginLoadMode) { this.pluginLoadMode = pluginLoadMode; } + + public String getP() { + return p; + } + + public void setP(String p) { + this.p = p; + } } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseFileOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseFileOutputFormat.java index 6d92035700..f9735c0ab3 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseFileOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseFileOutputFormat.java @@ -294,11 +294,12 @@ protected void afterCloseInternal() { moveAllTemporaryDataFileToDirectory(); LOG.info("The task ran successfully,clear temporary data files"); + closeSource(); clearTemporaryDataFiles(); } + }else{ + closeSource(); } - - closeSource(); } catch(Exception ex) { throw new RuntimeException(ex); } @@ -328,11 +329,6 @@ public void tryCleanupOnError() throws Exception { } } - @Override - protected void writeMultipleRecordsInternal() throws Exception { - // CAN NOT HAPPEN - } - @Override protected boolean needWaitAfterCloseInternal() { return true; diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormat.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormat.java index 1b42017236..39ce43bc7b 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormat.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormat.java @@ -34,7 +34,7 @@ import com.dtstack.flinkx.writer.ErrorLimiter; import com.google.gson.JsonObject; import com.google.gson.JsonParser; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.io.CleanupWhenUnsuccessful; import org.apache.flink.configuration.Configuration; @@ -51,7 +51,9 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.writer.WriteErrorTypes.*; +import static com.dtstack.flinkx.writer.WriteErrorTypes.ERR_FORMAT_TRANSFORM; +import static com.dtstack.flinkx.writer.WriteErrorTypes.ERR_NULL_POINTER; +import static com.dtstack.flinkx.writer.WriteErrorTypes.ERR_PRIMARY_CONFLICT; /** * Abstract Specification for all the OutputFormat defined in flinkx plugins @@ -418,6 +420,10 @@ protected void writeMultipleRecords() throws Exception { */ protected abstract void writeMultipleRecordsInternal() throws Exception; + protected void notSupportBatchWrite(String writerName) { + throw new UnsupportedOperationException(writerName + "不支持批量写入"); + } + protected void writeRecordInternal() { try { writeMultipleRecords(); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormatBuilder.java b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormatBuilder.java index 0bdcca624a..38306365f2 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormatBuilder.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/outputformat/BaseRichOutputFormatBuilder.java @@ -19,6 +19,7 @@ package com.dtstack.flinkx.outputformat; import com.dtstack.flinkx.config.RestoreConfig; +import com.dtstack.flinkx.constants.ConstantValue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.List; @@ -71,6 +72,12 @@ public void setInitAccumulatorAndDirty(boolean initAccumulatorAndDirty) { this.format.initAccumulatorAndDirty = initAccumulatorAndDirty; } + protected void notSupportBatchWrite(String writerName) { + if (this.format.getBatchInterval() > 1) { + throw new IllegalArgumentException(writerName + "不支持批量写入"); + } + } + /** * Check the value of parameters */ @@ -78,7 +85,17 @@ public void setInitAccumulatorAndDirty(boolean initAccumulatorAndDirty) { public BaseRichOutputFormat finish() { checkFormat(); + + /** + * 200000条限制的原因: + * 按照目前的使用情况以及部署配置,假设写入字段数量平均为50个,一个单slot的TaskManager内存为1G, + * 在不考虑各插件批量写入对内存特殊要求并且只考虑插件缓存这么多条数据的情况下,batchInterval为400000条时出现fullGC, + * 为了避免fullGC以及OOM,并且保证batchInterval有足够的配置空间,取最大值的一半200000。 + */ + if (this.format.getBatchInterval() > ConstantValue.MAX_BATCH_SIZE) { + throw new IllegalArgumentException("批量写入条数必须小于[200000]条"); + } + return format; } - } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/ByteRateLimiter.java b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/ByteRateLimiter.java index f66d45b7f9..13c21f8129 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/ByteRateLimiter.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/ByteRateLimiter.java @@ -25,7 +25,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.concurrent.*; +import java.math.BigDecimal; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; /** * This class is user for speed control @@ -80,11 +84,12 @@ private void updateRate(){ long thisRecords = accumulatorCollector.getLocalAccumulatorValue(Metrics.NUM_READS); long totalRecords = accumulatorCollector.getAccumulatorValue(Metrics.NUM_READS); - double thisWriteRatio = (totalRecords == 0 ? 0 : thisRecords / (double)totalRecords); + BigDecimal thisWriteRatio = BigDecimal.valueOf(totalRecords == 0 ? 0 : thisRecords / (double) totalRecords); - if (totalRecords > MIN_RECORD_NUMBER_UPDATE_RATE && totalBytes != 0 && thisWriteRatio != 0) { - double bpr = totalBytes / totalRecords; - double permitsPerSecond = expectedBytePerSecond / bpr * thisWriteRatio; + if (totalRecords > MIN_RECORD_NUMBER_UPDATE_RATE && totalBytes != 0 + && thisWriteRatio.compareTo(new BigDecimal(0)) == 0) { + double bpr = totalBytes / (double)totalRecords; + double permitsPerSecond = expectedBytePerSecond / bpr * thisWriteRatio.doubleValue(); rateLimiter.setRate(permitsPerSecond); } } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/MetaColumn.java b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/MetaColumn.java index d328115334..a7436f894f 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/reader/MetaColumn.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/reader/MetaColumn.java @@ -20,7 +20,6 @@ import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.StringUtil; import org.apache.commons.lang.StringUtils; import java.io.Serializable; @@ -48,6 +47,8 @@ public class MetaColumn implements Serializable { private String splitter; + private Boolean isPart; + public String getSplitter() { return splitter; } @@ -124,6 +125,7 @@ public static List getMetaColumns(List columns, boolean generateInde mc.setType(sm.get("type") != null ? String.valueOf(sm.get("type")) : null); mc.setValue(sm.get("value") != null ? String.valueOf(sm.get("value")) : null); mc.setSplitter(sm.get("splitter") != null ? String.valueOf(sm.get("splitter")) : null); + mc.setPart(sm.get("isPart") != null ? (Boolean) sm.get("isPart") : false); if(sm.get("format") != null && String.valueOf(sm.get("format")).trim().length() > 0){ mc.setTimeFormat(DateUtil.buildDateFormatter(String.valueOf(sm.get("format")))); @@ -151,6 +153,14 @@ public static List getMetaColumns(List columns, boolean generateInde return metaColumns; } + public Boolean getPart() { + return isPart; + } + + public void setPart(Boolean part) { + isPart = part; + } + public static List getMetaColumns(List columns){ return getMetaColumns(columns, true); } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java index bba853e451..b073524412 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ClassUtil.java @@ -33,7 +33,7 @@ public class ClassUtil { private static final Logger LOG = LoggerFactory.getLogger(ClassUtil.class); - public final static String LOCK_STR = "jdbc_lock_str"; + public final static Object LOCK_STR = new Object(); public static void forName(String clazz, ClassLoader classLoader) { synchronized (LOCK_STR){ diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/Clock.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/Clock.java index 62f3a7e98c..c003af9457 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/Clock.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/Clock.java @@ -46,9 +46,14 @@ static Clock system() { return SYSTEM; } + /** + * Get current time + * @return time + */ default Instant currentTime() { return Instant.ofEpochMilli(currentTimeInMillis()); } + /** * Get the current time in nanoseconds. * @return the current time in nanoseconds. diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/DateUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/DateUtil.java index 8ce417196d..a9b2ce6e53 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/DateUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/DateUtil.java @@ -21,7 +21,11 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; /** * Date Utilities @@ -93,7 +97,12 @@ public static java.sql.Date columnToDate(Object column,SimpleDateFormat customTi if (((String) column).length() == 0){ return null; } - return new java.sql.Date(stringToDate((String)column,customTimeFormat).getTime()); + + Date date = stringToDate((String)column, customTimeFormat); + if (null == date) { + return null; + } + return new java.sql.Date(date.getTime()); } else if (column instanceof Integer) { Integer rawData = (Integer) column; return new java.sql.Date(getMillSecond(rawData.toString())); @@ -120,7 +129,12 @@ public static java.sql.Timestamp columnToTimestamp(Object column,SimpleDateForma if (((String) column).length() == 0){ return null; } - return new java.sql.Timestamp(stringToDate((String)column,customTimeFormat).getTime()); + + Date date = stringToDate((String)column,customTimeFormat); + if (null == date) { + return null; + } + return new java.sql.Timestamp(date.getTime()); } else if (column instanceof Integer) { Integer rawData = (Integer) column; return new java.sql.Timestamp(getMillSecond(rawData.toString())); diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java index cddcf1ba29..d770364db7 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ExceptionUtil.java @@ -33,30 +33,19 @@ public class ExceptionUtil { private static Logger logger = LoggerFactory.getLogger(ExceptionUtil.class); public static String getErrorMessage(Throwable e) { - StringWriter stringWriter = null; - PrintWriter writer = null; - try{ - stringWriter= new StringWriter(); - writer = new PrintWriter(stringWriter); - e.printStackTrace(writer); - writer.flush(); - stringWriter.flush(); - StringBuffer buffer= stringWriter.getBuffer(); - return buffer.toString(); - }catch(Throwable ee){ - logger.error("",ee); + if (null == e) { + return null; + } - }finally { - if(writer!=null){ - writer.close(); - } - if(stringWriter!=null){ - try{ - stringWriter.close(); - }catch (Throwable ee){ - logger.error("",ee); - } - } + try (StringWriter stringWriter = new StringWriter(); + PrintWriter writer = new PrintWriter(stringWriter)) { + e.printStackTrace(writer); + writer.flush(); + stringWriter.flush(); + StringBuffer buffer = stringWriter.getBuffer(); + return buffer.toString(); + } catch (Throwable ee) { + logger.error("", ee); } return null; } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/FileSystemUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/FileSystemUtil.java index c545ef3d94..0e7214fb58 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/FileSystemUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/FileSystemUtil.java @@ -29,6 +29,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.security.PrivilegedAction; import java.util.HashMap; import java.util.Map; @@ -76,7 +77,7 @@ public static void setHadoopUserName(Configuration conf){ } } - private static boolean isOpenKerberos(Map hadoopConfig){ + public static boolean isOpenKerberos(Map hadoopConfig){ if(!MapUtils.getBoolean(hadoopConfig, KEY_HADOOP_SECURITY_AUTHORIZATION, false)){ return false; } @@ -85,13 +86,7 @@ private static boolean isOpenKerberos(Map hadoopConfig){ } private static FileSystem getFsWithKerberos(Map hadoopConfig, String defaultFs) throws Exception{ - String keytabFileName = KerberosUtil.getPrincipalFileName(hadoopConfig); - - keytabFileName = KerberosUtil.loadFile(hadoopConfig, keytabFileName); - String principal = KerberosUtil.findPrincipalFromKeytab(keytabFileName); - KerberosUtil.loadKrb5Conf(hadoopConfig); - - UserGroupInformation ugi = KerberosUtil.loginAndReturnUgi(getConfiguration(hadoopConfig, defaultFs), principal, keytabFileName); + UserGroupInformation ugi = getUGI(hadoopConfig, defaultFs); UserGroupInformation.setLoginUser(ugi); return ugi.doAs(new PrivilegedAction() { @@ -106,6 +101,19 @@ public FileSystem run(){ }); } + public static UserGroupInformation getUGI(Map hadoopConfig, String defaultFs) throws IOException { + String keytabFileName = KerberosUtil.getPrincipalFileName(hadoopConfig); + + keytabFileName = KerberosUtil.loadFile(hadoopConfig, keytabFileName); + String principal = KerberosUtil.getPrincipal(hadoopConfig, keytabFileName); + KerberosUtil.loadKrb5Conf(hadoopConfig); + + UserGroupInformation ugi = KerberosUtil.loginAndReturnUgi(getConfiguration(hadoopConfig, defaultFs), principal, keytabFileName); + UserGroupInformation.setLoginUser(ugi); + + return ugi; + } + public static Configuration getConfiguration(Map confMap, String defaultFs) { confMap = fillConfig(confMap, defaultFs); @@ -134,7 +142,7 @@ public static JobConf getJobConf(Map confMap, String defaultFs){ private static Map fillConfig(Map confMap, String defaultFs) { if (confMap == null) { - confMap = new HashMap<>(8); + confMap = new HashMap<>(); } if (isHaMode(confMap)) { diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/GsonUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/GsonUtil.java new file mode 100644 index 0000000000..b94765dc4a --- /dev/null +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/GsonUtil.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.util; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.TypeAdapter; +import com.google.gson.TypeAdapterFactory; +import com.google.gson.internal.LinkedTreeMap; +import com.google.gson.internal.bind.ObjectTypeAdapter; +import com.google.gson.reflect.TypeToken; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonToken; +import com.google.gson.stream.JsonWriter; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * Date: 2020/06/12 + * Company: www.dtstack.com + * + * Gson工具类,用于对json的序列化及反序列化,及解决int类型在map中被转换成double类型问题 + * + * @author tudou + */ +public class GsonUtil { + + public static Gson GSON = getGson(); + public static Type gsonMapTypeToken = new TypeToken>(){}.getType(); + + @SuppressWarnings("unchecked") + private static Gson getGson() { + GSON = new GsonBuilder().create(); + try { + Field factories = Gson.class.getDeclaredField("factories"); + factories.setAccessible(true); + Object o = factories.get(GSON); + Class[] declaredClasses = Collections.class.getDeclaredClasses(); + for (Class c : declaredClasses) { + if ("java.util.Collections$UnmodifiableList".equals(c.getName())) { + Field listField = c.getDeclaredField("list"); + listField.setAccessible(true); + List list = (List) listField.get(o); + int i = list.indexOf(ObjectTypeAdapter.FACTORY); + list.set(i, new TypeAdapterFactory() { + @Override + public TypeAdapter create(Gson gson, TypeToken type) { + if (type.getRawType() == Object.class) { + return new TypeAdapter() { + @Override + public Object read(JsonReader in) throws IOException { + JsonToken token = in.peek(); + //判断字符串的实际类型 + switch (token) { + case BEGIN_ARRAY: + List list = new ArrayList<>(); + in.beginArray(); + while (in.hasNext()) { + list.add(read(in)); + } + in.endArray(); + return list; + + case BEGIN_OBJECT: + Map map = new LinkedTreeMap<>(); + in.beginObject(); + while (in.hasNext()) { + map.put(in.nextName(), read(in)); + } + in.endObject(); + return map; + case STRING: + return in.nextString(); + case NUMBER: + String s = in.nextString(); + if (s.contains(".")) { + return Double.valueOf(s); + } else { + try { + return Integer.valueOf(s); + } catch (Exception e) { + return Long.valueOf(s); + } + } + case BOOLEAN: + return in.nextBoolean(); + case NULL: + in.nextNull(); + return null; + default: + throw new IllegalStateException(); + } + } + + @Override + public void write(JsonWriter out, Object value) throws IOException { + if (value == null) { + out.nullValue(); + return; + } + //noinspection unchecked + TypeAdapter typeAdapter = gson.getAdapter((Class) value.getClass()); + if (typeAdapter instanceof ObjectTypeAdapter) { + out.beginObject(); + out.endObject(); + return; + } + typeAdapter.write(out, value); + } + }; + } + return null; + } + }); + break; + } + } + } catch (Exception e) { + e.printStackTrace(); + } + return GSON; + } +} diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/JsonModifyUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/JsonModifyUtil.java new file mode 100644 index 0000000000..2e912373a7 --- /dev/null +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/JsonModifyUtil.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.util; + +import org.apache.commons.lang3.StringUtils; + +import java.util.HashMap; + +/** + * @author tiezhu + */ +public class JsonModifyUtil { + + public static String JsonValueReplace(String json, HashMap parameter){ + for(String item: parameter.keySet()){ + if(json.contains("${"+item+"}")){ + json = json.replace("${"+item+"}", parameter.get(item)); + } + } + return json; + } + + /** + * 将命令行中的修改命令转化为HashMap保存 + */ + public static HashMap CommandTransform(String command) { + HashMap parameter = new HashMap<>(); + String[] split = StringUtils.split(command, ","); + for (String item : split) { + String[] temp = item.split("="); + parameter.put(temp[0], temp[1]); + } + return parameter; + } +} diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ResultPrintUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ResultPrintUtil.java index 78a328fdac..b4656af3ac 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/ResultPrintUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/ResultPrintUtil.java @@ -22,6 +22,8 @@ import com.google.common.collect.Lists; import org.apache.commons.lang.StringUtils; import org.apache.flink.api.common.JobExecutionResult; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.List; @@ -31,6 +33,8 @@ */ public class ResultPrintUtil { + private static Logger LOG = LoggerFactory.getLogger(ResultPrintUtil.class); + public static void printResult(JobExecutionResult result){ List names = Lists.newArrayList(); List values = Lists.newArrayList(); @@ -56,8 +60,8 @@ public static void printResult(JobExecutionResult result){ } } - System.out.println("---------------------------------"); - System.out.println(builder.toString()); - System.out.println("---------------------------------"); + LOG.info("---------------------------------"); + LOG.info(builder.toString()); + LOG.info("---------------------------------"); } } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/RetryUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/RetryUtil.java index 000d7a7ba2..5f8329a2e4 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/RetryUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/RetryUtil.java @@ -33,7 +33,7 @@ public final class RetryUtil { private static final Logger LOG = LoggerFactory.getLogger(RetryUtil.class); - private static final long MAX_SLEEP_MILLISECOND = 256 * 1000; + private static final long MAX_SLEEP_MILLISECOND = 256 * 1000L; /** * 重试次数工具方法. diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/util/StringUtil.java b/flinkx-core/src/main/java/com/dtstack/flinkx/util/StringUtil.java index b276d856cd..5ae0ca309c 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/util/StringUtil.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/util/StringUtil.java @@ -38,6 +38,8 @@ */ public class StringUtil { + public static final int STEP_SIZE = 2; + /** * Handle the escaped escape charactor. * @@ -192,14 +194,14 @@ public static String col2string(Object column, String type) { } - public static String row2string(Row row, List columnTypes, String delimiter, List columnNames) throws WriteRecordException { + public static String row2string(Row row, List columnTypes, String delimiter) throws WriteRecordException { // convert row to string - int cnt = row.getArity(); - StringBuilder sb = new StringBuilder(); + int size = row.getArity(); + StringBuilder sb = new StringBuilder(128); int i = 0; try { - for (; i < cnt; ++i) { + for (; i < size; ++i) { if (i != 0) { sb.append(delimiter); } @@ -212,9 +214,9 @@ public static String row2string(Row row, List columnTypes, String delimi sb.append(col2string(column, columnTypes.get(i))); } - } catch(Exception ex) { + } catch(Exception e) { String msg = "StringUtil.row2string error: when converting field[" + i + "] in Row(" + row + ")"; - throw new WriteRecordException(msg, ex, i, row); + throw new WriteRecordException(msg, e, i, row); } return sb.toString(); @@ -228,7 +230,7 @@ public static byte[] hexStringToByteArray(String hexString) { int length = hexString.length(); byte[] bytes = new byte[length / 2]; - for (int i = 0; i < length; i += 2) { + for (int i = 0; i < length; i += STEP_SIZE) { bytes[i / 2] = (byte) ((Character.digit(hexString.charAt(i), 16) << 4) + Character.digit(hexString.charAt(i+1), 16)); } diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java index dd4ff9308a..3cc4719ea9 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/DirtyDataManager.java @@ -34,9 +34,16 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.ArrayList; +import java.util.Date; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.UUID; -import static com.dtstack.flinkx.writer.WriteErrorTypes.*; +import static com.dtstack.flinkx.writer.WriteErrorTypes.ERR_FORMAT_TRANSFORM; +import static com.dtstack.flinkx.writer.WriteErrorTypes.ERR_NULL_POINTER; +import static com.dtstack.flinkx.writer.WriteErrorTypes.ERR_PRIMARY_CONFLICT; /** * The class handles dirty data management diff --git a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/ErrorLimiter.java b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/ErrorLimiter.java index 1d2f7d9ae8..189f198a15 100644 --- a/flinkx-core/src/main/java/com/dtstack/flinkx/writer/ErrorLimiter.java +++ b/flinkx-core/src/main/java/com/dtstack/flinkx/writer/ErrorLimiter.java @@ -29,7 +29,8 @@ * Company: www.dtstack.com * @author huyifan.zju@163.com */ -public class ErrorLimiter { +public class +ErrorLimiter { private final Integer maxErrors; private final Double maxErrorRatio; diff --git a/flinkx-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java b/flinkx-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java index e331b3e672..b8ca7cdbc7 100644 --- a/flinkx-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java +++ b/flinkx-core/src/main/java/org/apache/flink/streaming/api/environment/StreamExecutionEnvironment.java @@ -57,7 +57,19 @@ import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; -import org.apache.flink.streaming.api.functions.source.*; +import org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction; +import org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator; +import org.apache.flink.streaming.api.functions.source.FileMonitoringFunction; +import org.apache.flink.streaming.api.functions.source.FileProcessingMode; +import org.apache.flink.streaming.api.functions.source.FileReadFunction; +import org.apache.flink.streaming.api.functions.source.FromElementsFunction; +import org.apache.flink.streaming.api.functions.source.FromIteratorFunction; +import org.apache.flink.streaming.api.functions.source.FromSplittableIteratorFunction; +import org.apache.flink.streaming.api.functions.source.InputFormatSourceFunction; +import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction; +import org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.functions.source.StatefulSequenceSource; import org.apache.flink.streaming.api.graph.StreamGraph; import org.apache.flink.streaming.api.graph.StreamGraphGenerator; import org.apache.flink.streaming.api.operators.StoppableStreamSource; diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/ColumnTypeUtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/ColumnTypeUtilTest.java index 7b68e5fcc0..0210f3c05c 100644 --- a/flinkx-core/src/test/java/com/dtstack/flinkx/util/ColumnTypeUtilTest.java +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/ColumnTypeUtilTest.java @@ -1,7 +1,7 @@ package com.dtstack.flinkx.util; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; /** * @author jiangbo diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/DateUtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/DateUtilTest.java new file mode 100644 index 0000000000..6607984c4e --- /dev/null +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/DateUtilTest.java @@ -0,0 +1,112 @@ +package com.dtstack.flinkx.util; + +import org.junit.Assert; +import org.junit.Test; +import java.sql.Date; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; + + +/** + * @author jiangbo + * @date 2020/3/18 + */ +public class DateUtilTest { + + @Test + public void testColumnToDate() { + Date result = DateUtil.columnToDate(null, null); + Assert.assertNull(result); + + result = DateUtil.columnToDate("", null); + Assert.assertNull(result); + + result = DateUtil.columnToDate("2020-03-18 10:56:00", null); + Assert.assertEquals(result, new Date(1584500160000L)); + + result = DateUtil.columnToDate(1584500160, null); + Assert.assertEquals(result, new Date(1584500160000L)); + + result = DateUtil.columnToDate(1584500160000L, null); + Assert.assertEquals(result, new Date(1584500160000L)); + + result = DateUtil.columnToDate(new Date(1584500160000L), null); + Assert.assertEquals(result, new Date(1584500160000L)); + + result = DateUtil.columnToDate(new Timestamp(1584500160000L), null); + Assert.assertEquals(result, new Date(1584500160000L)); + + result = DateUtil.columnToDate(new java.util.Date(1584500160000L), null); + Assert.assertEquals(result, new Date(1584500160000L)); + + try { + DateUtil.columnToDate(true, null); + } catch (Exception e) { + Assert.assertTrue(e instanceof IllegalArgumentException); + } + } + + @Test + public void testColumnToTimestamp() { + Timestamp result = DateUtil.columnToTimestamp(null, null); + Assert.assertNull(result); + + result = DateUtil.columnToTimestamp("", null); + Assert.assertNull(result); + + result = DateUtil.columnToTimestamp("2020-03-18 10:56:00", null); + Assert.assertEquals(result, new Timestamp(1584500160000L)); + + result = DateUtil.columnToTimestamp(1584500160, null); + Assert.assertEquals(result, new Timestamp(1584500160000L)); + + result = DateUtil.columnToTimestamp(1584500160000L, null); + Assert.assertEquals(result, new Timestamp(1584500160000L)); + + result = DateUtil.columnToTimestamp(new Date(1584500160000L), null); + Assert.assertEquals(result, new Timestamp(1584500160000L)); + + result = DateUtil.columnToTimestamp(new Timestamp(1584500160000L), null); + Assert.assertEquals(result, new Timestamp(1584500160000L)); + + result = DateUtil.columnToTimestamp(new java.util.Date(1584500160000L), null); + Assert.assertEquals(result, new Timestamp(1584500160000L)); + + try { + DateUtil.columnToTimestamp(true, null); + } catch (Exception e) { + Assert.assertTrue(e instanceof IllegalArgumentException); + } + } + + @Test + public void testGetMillSecond() { + long result = DateUtil.getMillSecond("1584500160000"); + Assert.assertEquals(result, 1584500160000L); + + result = DateUtil.getMillSecond("1584500160000000"); + Assert.assertEquals(result, 1584500160000L); + + result = DateUtil.getMillSecond("1584500160000000000"); + Assert.assertEquals(result, 1584500160000L); + + long expect = 57600000; // 1970-01-02 00:00:00:000 + result = DateUtil.getMillSecond("1"); + Assert.assertEquals(result, expect); + } + + @Test + public void testStringToDate() { + java.util.Date result = DateUtil.stringToDate("", null); + Assert.assertNull(result); + + result = DateUtil.stringToDate("2020/03/18 13:26:00", new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")); + Assert.assertEquals(result, new java.util.Date(1584509160000L)); + + try { + DateUtil.stringToDate("xxxx", null); + } catch (Exception e) { + Assert.assertTrue(e.getMessage().contains("can't parse date")); + } + } +} \ No newline at end of file diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/ExceptionUtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/ExceptionUtilTest.java new file mode 100644 index 0000000000..5d988bd64d --- /dev/null +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/ExceptionUtilTest.java @@ -0,0 +1,23 @@ +package com.dtstack.flinkx.util; + +import org.junit.Assert; +import org.junit.Test; + +/** + * @author jiangbo + * @date 2020/3/18 + */ +public class ExceptionUtilTest { + + @Test + public void testGetErrorMessage() { + String result = ExceptionUtil.getErrorMessage(null); + Assert.assertNull(result); + + try { + ExceptionUtil.getErrorMessage(new IllegalArgumentException("error test")); + } catch (Exception e) { + Assert.fail(e.getMessage()); + } + } +} \ No newline at end of file diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/MapUtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/MapUtilTest.java index c7339dd3fa..1391e2a357 100644 --- a/flinkx-core/src/test/java/com/dtstack/flinkx/util/MapUtilTest.java +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/MapUtilTest.java @@ -1,8 +1,8 @@ package com.dtstack.flinkx.util; import com.google.gson.internal.LinkedTreeMap; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; import java.util.HashMap; import java.util.Map; diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/Md5UtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/Md5UtilTest.java index b38fc07328..b781529383 100644 --- a/flinkx-core/src/test/java/com/dtstack/flinkx/util/Md5UtilTest.java +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/Md5UtilTest.java @@ -1,7 +1,7 @@ package com.dtstack.flinkx.util; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; /** * @author jiangbo diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/RowUtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/RowUtilTest.java index b373be8f19..7a68496791 100644 --- a/flinkx-core/src/test/java/com/dtstack/flinkx/util/RowUtilTest.java +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/RowUtilTest.java @@ -1,8 +1,8 @@ package com.dtstack.flinkx.util; import org.apache.flink.types.Row; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; /** * @author jiangbo diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/SnowflakeIdWorkerTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/SnowflakeIdWorkerTest.java new file mode 100644 index 0000000000..69da0176bd --- /dev/null +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/SnowflakeIdWorkerTest.java @@ -0,0 +1,28 @@ +package com.dtstack.flinkx.util; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author jiangbo + * @date 2020/3/18 + */ +public class SnowflakeIdWorkerTest { + + SnowflakeIdWorker snowflakeIdWorker = new SnowflakeIdWorker(1L, 1L); + + @Test + public void testNextId() { + Set idSet = new HashSet<>(); + int i = 0; + while (i++ < 100) { + long result = snowflakeIdWorker.nextId(); + idSet.add(result); + } + + Assert.assertEquals(idSet.size(), 100); + } +} \ No newline at end of file diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/StringUtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/StringUtilTest.java index b072f9168a..bcff236c57 100644 --- a/flinkx-core/src/test/java/com/dtstack/flinkx/util/StringUtilTest.java +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/StringUtilTest.java @@ -1,9 +1,11 @@ package com.dtstack.flinkx.util; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; +import java.sql.Timestamp; import java.text.SimpleDateFormat; +import java.util.Date; /** * @author jiangbo @@ -66,4 +68,52 @@ public void testString2col() { result = StringUtil.string2col("xxx", "xxx", null); Assert.assertEquals(result, "xxx"); } + + @Test + public void testCol2string() { + String result = StringUtil.col2string(null, null); + Assert.assertEquals(result, ""); + + result = StringUtil.col2string("test", null); + Assert.assertEquals(result, "test"); + + result = StringUtil.col2string(Byte.valueOf("1"), "TINYINT"); + Assert.assertEquals(result, "1"); + + result = StringUtil.col2string(Short.valueOf("1"), "SMALLINT"); + Assert.assertEquals(result, "1"); + + result = StringUtil.col2string(123, "INTEGER"); + Assert.assertEquals(result, "123"); + + result = StringUtil.col2string(123L, "LONG"); + Assert.assertEquals(result, "123"); + + result = StringUtil.col2string(new Timestamp(1584510286187L), "LONG"); + Assert.assertEquals(result, "1584510286187"); + + result = StringUtil.col2string(123.123, "FLOAT"); + Assert.assertEquals(result, "123.123"); + + result = StringUtil.col2string(123.123, "DOUBLE"); + Assert.assertEquals(result, "123.123"); + + result = StringUtil.col2string(123.123, "DECIMAL"); + Assert.assertEquals(result, "123.123"); + + result = StringUtil.col2string("string", "STRING"); + Assert.assertEquals(result, "string"); + + result = StringUtil.col2string(new Timestamp(1584510286187L), "STRING"); + Assert.assertEquals(result, "2020-03-18 13:44:46"); + + result = StringUtil.col2string(true, "BOOLEAN"); + Assert.assertEquals(result, "true"); + + result = StringUtil.col2string(new Date(1584510286187L), "DATE"); + Assert.assertEquals(result, "2020-03-18"); + + result = StringUtil.col2string(new Date(1584510286187L), "DATETIME"); + Assert.assertEquals(result, "2020-03-18 13:44:46"); + } } \ No newline at end of file diff --git a/flinkx-core/src/test/java/com/dtstack/flinkx/util/ValueUtilTest.java b/flinkx-core/src/test/java/com/dtstack/flinkx/util/ValueUtilTest.java index d22a8853d7..75e254d9a5 100644 --- a/flinkx-core/src/test/java/com/dtstack/flinkx/util/ValueUtilTest.java +++ b/flinkx-core/src/test/java/com/dtstack/flinkx/util/ValueUtilTest.java @@ -1,7 +1,7 @@ package com.dtstack.flinkx.util; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.junit.Assert; +import org.junit.Test; /** * @author jiangbo @@ -9,7 +9,7 @@ */ public class ValueUtilTest { - @Test(expectedExceptions = RuntimeException.class) + @Test public void testGetInt() { Integer result = ValueUtil.getInt(null); Assert.assertNull(result); @@ -20,6 +20,10 @@ public void testGetInt() { result = ValueUtil.getInt(new Long(100)); Assert.assertEquals(result, new Integer(100)); - ValueUtil.getInt(new Object()); + try { + ValueUtil.getInt(new Object()); + } catch (Exception e) { + Assert.assertTrue(e.getMessage().contains("Unable to convert")); + } } } \ No newline at end of file diff --git a/flinkx-db2/flinkx-db2-reader/pom.xml b/flinkx-db2/flinkx-db2-reader/pom.xml index 6a6f40a2c7..483fc23905 100644 --- a/flinkx-db2/flinkx-db2-reader/pom.xml +++ b/flinkx-db2/flinkx-db2-reader/pom.xml @@ -63,8 +63,12 @@ shade.db2reader.io.netty - com.google - shade.db2reader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-db2/flinkx-db2-writer/pom.xml b/flinkx-db2/flinkx-db2-writer/pom.xml index 2ff75d22e0..b3d103b159 100644 --- a/flinkx-db2/flinkx-db2-writer/pom.xml +++ b/flinkx-db2/flinkx-db2-writer/pom.xml @@ -63,8 +63,12 @@ shade.db2writer.io.netty - com.google - shade.db2writer.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-dm/flinkx-dm-core/src/main/java/com/dtstack/flinkx/dm/DmDatabaseMeta.java b/flinkx-dm/flinkx-dm-core/src/main/java/com/dtstack/flinkx/dm/DmDatabaseMeta.java index 3c4d2883dc..da6829b005 100644 --- a/flinkx-dm/flinkx-dm-core/src/main/java/com/dtstack/flinkx/dm/DmDatabaseMeta.java +++ b/flinkx-dm/flinkx-dm-core/src/main/java/com/dtstack/flinkx/dm/DmDatabaseMeta.java @@ -47,7 +47,7 @@ protected String makeValues(List column) { public String quoteTable(String table) { table = table.replace("\"",""); String[] part = table.split("\\."); - if(part.length == 2) { + if(part.length == DB_TABLE_PART_SIZE) { table = getStartQuote() + part[0] + getEndQuote() + "." + getStartQuote() + part[1] + getEndQuote(); } else { table = getStartQuote() + table + getEndQuote(); diff --git a/flinkx-dm/flinkx-dm-reader/pom.xml b/flinkx-dm/flinkx-dm-reader/pom.xml index 15ce92929b..c2feddd9c8 100644 --- a/flinkx-dm/flinkx-dm-reader/pom.xml +++ b/flinkx-dm/flinkx-dm-reader/pom.xml @@ -63,8 +63,12 @@ shade.dmreader.io.netty - com.google - shade.dmreader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-dm/flinkx-dm-reader/src/main/java/com/dtstack/flinkx/dm/format/DmInputFormat.java b/flinkx-dm/flinkx-dm-reader/src/main/java/com/dtstack/flinkx/dm/format/DmInputFormat.java index 8a114752f1..67e70d499d 100644 --- a/flinkx-dm/flinkx-dm-reader/src/main/java/com/dtstack/flinkx/dm/format/DmInputFormat.java +++ b/flinkx-dm/flinkx-dm-reader/src/main/java/com/dtstack/flinkx/dm/format/DmInputFormat.java @@ -25,8 +25,6 @@ import java.io.IOException; import java.sql.Timestamp; -import static com.dtstack.flinkx.rdb.util.DbUtil.clobToString; - /** * Date: 2020/03/18 * Company: www.dtstack.com @@ -49,7 +47,7 @@ public Row nextRecordInternal(Row row) throws IOException { || obj.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { obj = resultSet.getTimestamp(pos + 1); } - obj = clobToString(obj); + obj = DbUtil.clobToString(obj); } row.setField(pos, obj); diff --git a/flinkx-dm/flinkx-dm-writer/pom.xml b/flinkx-dm/flinkx-dm-writer/pom.xml index abe284fc04..768be50eae 100644 --- a/flinkx-dm/flinkx-dm-writer/pom.xml +++ b/flinkx-dm/flinkx-dm-writer/pom.xml @@ -63,8 +63,12 @@ shade.dmwriter.io.netty - com.google - shade.dmwriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-dm/flinkx-dm-writer/src/main/java/com/dtstack/flinkx/dm/format/DmOutputFormat.java b/flinkx-dm/flinkx-dm-writer/src/main/java/com/dtstack/flinkx/dm/format/DmOutputFormat.java index c3912a218c..9612294ec8 100644 --- a/flinkx-dm/flinkx-dm-writer/src/main/java/com/dtstack/flinkx/dm/format/DmOutputFormat.java +++ b/flinkx-dm/flinkx-dm-writer/src/main/java/com/dtstack/flinkx/dm/format/DmOutputFormat.java @@ -18,11 +18,16 @@ package com.dtstack.flinkx.dm.format; import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.rdb.BaseDatabaseMeta; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; import com.dtstack.flinkx.util.DateUtil; import org.apache.flink.types.Row; -import java.sql.*; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; @@ -61,7 +66,7 @@ protected List probeFullColumns(String table, Connection dbConn) throws String schema =null; String[] parts = table.split("\\."); - if(parts.length == 2) { + if(parts.length == BaseDatabaseMeta.DB_TABLE_PART_SIZE) { schema = parts[0].toUpperCase(); table = parts[1]; } @@ -76,25 +81,27 @@ protected List probeFullColumns(String table, Connection dbConn) throws @Override protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { - Map> map = new HashMap<>(); - PreparedStatement ps = dbConn.prepareStatement(String.format(GET_INDEX_SQL,table)); - ResultSet rs = ps.executeQuery(); + Map> map = new HashMap<>(16); - while(rs.next()) { - String indexName = rs.getString("INDEX_NAME"); - if(!map.containsKey(indexName)) { - map.put(indexName,new ArrayList<>()); + try (PreparedStatement ps = dbConn.prepareStatement(String.format(GET_INDEX_SQL,table)); + ResultSet rs = ps.executeQuery()) { + while(rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if(!map.containsKey(indexName)) { + map.put(indexName,new ArrayList<>()); + } + map.get(indexName).add(rs.getString("COLUMN_NAME")); } - map.get(indexName).add(rs.getString("COLUMN_NAME")); - } - Map> retMap = new HashMap<>(); - for(Map.Entry> entry: map.entrySet()) { - String k = entry.getKey(); - List v = entry.getValue(); - if(v!=null && v.size() != 0 && v.get(0) != null) { - retMap.put(k, v); + + Map> retMap = new HashMap<>(16); + for(Map.Entry> entry: map.entrySet()) { + String k = entry.getKey(); + List v = entry.getValue(); + if(v!=null && v.size() != 0 && v.get(0) != null) { + retMap.put(k, v); + } } + return retMap; } - return retMap; } } diff --git a/flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/JsonDecoder.java b/flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/JsonDecoder.java deleted file mode 100644 index ecb26a2f3e..0000000000 --- a/flinkx-emqx/flinkx-emqx-core/src/main/java/com/dtstack/flinkx/emqx/decoder/JsonDecoder.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.dtstack.flinkx.emqx.decoder; - -import org.codehaus.jackson.map.ObjectMapper; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Collections; -import java.util.Map; - -/** - * Date: 2020/02/12 - * Company: www.dtstack.com - * - * @author tudou - */ -public class JsonDecoder implements IDecode { - private static Logger LOG = LoggerFactory.getLogger(JsonDecoder.class); - - private static ObjectMapper objectMapper = new ObjectMapper(); - - private static final String KEY_MESSAGE = "message"; - - @Override - @SuppressWarnings("unchecked") - public Map decode(final String message) { - try { - Map event = objectMapper.readValue(message, Map.class); - if (!event.containsKey(KEY_MESSAGE)) { - event.put(KEY_MESSAGE, message); - } - return event; - } catch (Exception e) { - LOG.error(e.getMessage()); - return Collections.singletonMap(KEY_MESSAGE, message); - } - - } - -} diff --git a/flinkx-emqx/flinkx-emqx-reader/pom.xml b/flinkx-emqx/flinkx-emqx-reader/pom.xml index 0e106143fd..205e8a9c5c 100644 --- a/flinkx-emqx/flinkx-emqx-reader/pom.xml +++ b/flinkx-emqx/flinkx-emqx-reader/pom.xml @@ -53,6 +53,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java index 5534601f51..7e92921dcc 100644 --- a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java +++ b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/format/EmqxInputFormat.java @@ -17,16 +17,21 @@ */ package com.dtstack.flinkx.emqx.format; -import com.dtstack.flinkx.emqx.decoder.IDecode; -import com.dtstack.flinkx.emqx.decoder.JsonDecoder; -import com.dtstack.flinkx.emqx.decoder.PlainDecoder; +import com.dtstack.flinkx.decoder.IDecode; +import com.dtstack.flinkx.decoder.JsonDecoder; +import com.dtstack.flinkx.decoder.PlainDecoder; import com.dtstack.flinkx.inputformat.BaseRichInputFormat; import com.dtstack.flinkx.util.ExceptionUtil; import org.apache.commons.lang3.StringUtils; import org.apache.flink.core.io.GenericInputSplit; import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; -import org.eclipse.paho.client.mqttv3.*; +import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken; +import org.eclipse.paho.client.mqttv3.MqttCallback; +import org.eclipse.paho.client.mqttv3.MqttClient; +import org.eclipse.paho.client.mqttv3.MqttConnectOptions; +import org.eclipse.paho.client.mqttv3.MqttException; +import org.eclipse.paho.client.mqttv3.MqttMessage; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java index 83d70d80d1..32121eec6a 100644 --- a/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java +++ b/flinkx-emqx/flinkx-emqx-reader/src/main/java/com/dtstack/flinkx/emqx/reader/EmqxReader.java @@ -8,7 +8,13 @@ import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; -import static com.dtstack.flinkx.emqx.EmqxConfigKeys.*; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_BROKER; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_CODEC; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_IS_CLEAN_SESSION; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_PASSWORD; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_QOS; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_TOPIC; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_USERNAME; /** * Date: 2020/02/12 diff --git a/flinkx-emqx/flinkx-emqx-writer/pom.xml b/flinkx-emqx/flinkx-emqx-writer/pom.xml index a6117433f5..fcf861883a 100644 --- a/flinkx-emqx/flinkx-emqx-writer/pom.xml +++ b/flinkx-emqx/flinkx-emqx-writer/pom.xml @@ -53,6 +53,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java index 79dec0f172..f1e87baba3 100644 --- a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java +++ b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/format/EmqxOutputFormat.java @@ -17,7 +17,7 @@ */ package com.dtstack.flinkx.emqx.format; -import com.dtstack.flinkx.emqx.decoder.JsonDecoder; +import com.dtstack.flinkx.decoder.JsonDecoder; import com.dtstack.flinkx.exception.WriteRecordException; import com.dtstack.flinkx.outputformat.BaseRichOutputFormat; import com.dtstack.flinkx.util.ExceptionUtil; diff --git a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java index 0ba2a1a12d..52a311b76e 100644 --- a/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java +++ b/flinkx-emqx/flinkx-emqx-writer/src/main/java/com/dtstack/flinkx/emqx/writer/EmqxWriter.java @@ -25,7 +25,12 @@ import org.apache.flink.streaming.api.datastream.DataStreamSink; import org.apache.flink.types.Row; -import static com.dtstack.flinkx.emqx.EmqxConfigKeys.*; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_BROKER; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_IS_CLEAN_SESSION; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_PASSWORD; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_QOS; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_TOPIC; +import static com.dtstack.flinkx.emqx.EmqxConfigKeys.KEY_USERNAME; /** * Date: 2020/02/12 diff --git a/flinkx-es/flinkx-es-reader/pom.xml b/flinkx-es/flinkx-es-reader/pom.xml index 227dc25d10..8ccef94149 100644 --- a/flinkx-es/flinkx-es-reader/pom.xml +++ b/flinkx-es/flinkx-es-reader/pom.xml @@ -53,6 +53,20 @@ + + + com.fasterxml.jackson.databind + shade.es.com.fasterxml.jackson.databind + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java index 282e2ac5f8..1c54a5e115 100644 --- a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java +++ b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormat.java @@ -25,7 +25,11 @@ import org.apache.flink.core.io.GenericInputSplit; import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; -import org.elasticsearch.action.search.*; +import org.elasticsearch.action.search.ClearScrollRequest; +import org.elasticsearch.action.search.ClearScrollResponse; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchScrollRequest; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilders; diff --git a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java index 33f9e85d21..57584faeb4 100644 --- a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java +++ b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsInputFormatBuilder.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.es.reader; +import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder; import java.util.List; import java.util.Map; @@ -103,5 +104,9 @@ protected void checkFormat() { if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){ throw new UnsupportedOperationException("This plugin not support restore from failed state"); } + + if (format.batchSize > ConstantValue.MAX_BATCH_SIZE) { + throw new IllegalArgumentException("批量读取数量不能大于[200000]条"); + } } } diff --git a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java index da95c5b0ee..e57fa881a4 100644 --- a/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java +++ b/flinkx-es/flinkx-es-reader/src/main/java/com/dtstack/flinkx/es/reader/EsReader.java @@ -28,6 +28,8 @@ import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.HashMap; @@ -42,6 +44,8 @@ */ public class EsReader extends BaseDataReader { + private static Logger LOG = LoggerFactory.getLogger(EsReader.class); + private String address; private String username; private String password; @@ -87,7 +91,8 @@ public EsReader(DataTransferConfig config, StreamExecutionEnvironment env) { columnValue.add((String) sm.get("value")); columnName.add((String) sm.get("name")); } - System.out.println("init column finished"); + + LOG.info("init column finished"); } else if (!ConstantValue.STAR_SYMBOL.equals(columns.get(0)) || columns.size() != 1) { throw new IllegalArgumentException("column argument error"); } diff --git a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReadDemo.java b/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReadDemo.java deleted file mode 100644 index 06e304c46a..0000000000 --- a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReadDemo.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.dtstack.flinkx.es.reader.test; - - -import com.google.gson.Gson; -import org.apache.http.HttpHost; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.Aggregations; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import static org.elasticsearch.index.query.QueryBuilders.termQuery; - -public class EsReadDemo { - - public static void searchAll() throws IOException { - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("rdos1", 9200, "http"), - new HttpHost("rdos2", 9200, "http"))); - SearchRequest searchRequest = new SearchRequest(); -// SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); -// searchSourceBuilder.query(QueryBuilders.matchAllQuery()); -// searchSourceBuilder().from(0); -// searchSourceBuilder().size(100); - - SearchResponse searchResponse = client.search(searchRequest); - SearchHits searchHits = searchResponse.getHits(); - - for(SearchHit searchHit : searchHits) { - Map source = searchHit.getSourceAsMap(); - System.out.println(source); - } - - - } - - public static void searchPart() throws IOException { - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("rdos1", 9200, "http"), - new HttpHost("rdos2", 9200, "http"))); - SearchRequest searchRequest = new SearchRequest(); - SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); - - sourceBuilder.from(0); - sourceBuilder.size(100); - -// QueryBuilder qb2 = QueryBuilders.wrapperQuery("{\"match_all\": {}}"); - Map map = new HashMap<>(); - Map match = new HashMap<>(); - map.put("match",match); - match.put("col2", "hallo"); - Gson gson = new Gson(); - - //QueryBuilder qb2 = QueryBuilders.wrapperQuery("{\"match\": {\"col2\":\"hallo\"}}"); - QueryBuilder qb2 = QueryBuilders.wrapperQuery(gson.toJson(map)); - sourceBuilder.query(qb2); - searchRequest.source(sourceBuilder); - - SearchResponse searchResponse = client.search(searchRequest); - System.out.println(searchResponse); - SearchHits searchHits = searchResponse.getHits(); - for(SearchHit searchHit : searchHits.getHits()) { - System.out.println(searchHit.docId() + ":" + searchHit.getSourceAsMap()); - } - long total = searchHits.getTotalHits(); - System.out.println("total: " + total); - - client.close(); - } - - - public static void main(String[] args) throws IOException { - //searchAll(); - searchPart(); - //searchAll(); - } - -} diff --git a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReaderTest.java b/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReaderTest.java deleted file mode 100644 index bf98581615..0000000000 --- a/flinkx-es/flinkx-es-reader/src/test/java/com/dtstack/flinkx/es/reader/test/EsReaderTest.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.dtstack.flinkx.es.reader.test; - -import com.google.gson.Gson; - -import java.util.HashMap; -import java.util.Map; - -/** - * Created by softfly on 18/2/8. - */ -public class EsReaderTest { - public static void main(String[] args) { - Gson gson = new Gson(); - Map map = new HashMap<>(); - map.put("xxx", 111); - map.put("yyyy", "fff"); - String json = gson.toJson(map); - System.out.println(json); - } -} diff --git a/flinkx-es/flinkx-es-writer/pom.xml b/flinkx-es/flinkx-es-writer/pom.xml index 1cdfab73fb..8e3e15a822 100644 --- a/flinkx-es/flinkx-es-writer/pom.xml +++ b/flinkx-es/flinkx-es-writer/pom.xml @@ -54,6 +54,20 @@ + + + com.fasterxml.jackson.databind + shade.es.com.fasterxml.jackson.databind + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-es/flinkx-es-writer/src/test/java/com/dtstack/flinkx/es/writer/test/EsDemo.java b/flinkx-es/flinkx-es-writer/src/test/java/com/dtstack/flinkx/es/writer/test/EsDemo.java deleted file mode 100644 index 503c09fa7e..0000000000 --- a/flinkx-es/flinkx-es-writer/src/test/java/com/dtstack/flinkx/es/writer/test/EsDemo.java +++ /dev/null @@ -1,146 +0,0 @@ -package com.dtstack.flinkx.es.writer.test; - -import org.apache.http.HttpHost; -import org.elasticsearch.action.bulk.BulkRequest; -import org.elasticsearch.action.bulk.BulkResponse; -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.index.IndexResponse; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.action.update.UpdateRequest; -import org.elasticsearch.action.update.UpdateResponse; -import org.elasticsearch.client.RestClient; -import org.elasticsearch.client.RestHighLevelClient; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.search.builder.SearchSourceBuilder; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -public class EsDemo { - - public static void test1() throws Exception { - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("rdos1", 9200, "http"), - new HttpHost("rdos2", 9200, "http"))); - - IndexRequest request = new IndexRequest( - "nani222", - "doc222", - "id2"); - - String jsonString = "{" + - "\"user\":\"user2\"," + - "\"postDate\":\"2014-01-30\"," + - "\"message\":\"trying out Elasticsearch\"" + - "}"; - - - request.source(jsonString, XContentType.JSON); - IndexResponse response = client.index(request); - System.out.println(response.getResult()); - client.close(); - } - - public static void test3() throws Exception { - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("study", 9200, "http"), - new HttpHost("study", 9201, "http"))); - - IndexRequest request = new IndexRequest( - "nani", - "doc"); - -// String jsonString = "{" + -// "\"user\":\"xxxx\"," + -// "\"postDate\":\"2013-01-30\"," + -// "\"message\":\"trying out Elasticsearch\"" + -// "}"; - Map jsonMap = new HashMap<>(); - jsonMap.put("xxx", "asfdasdf"); - jsonMap.put("zzz", "asdfsadf"); - request.source(jsonMap); - IndexResponse response = client.index(request); - System.out.println(response.getResult()); - client.close(); - } - - public static void test2() throws Exception { - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("localhost", 9200, "http"), - new HttpHost("localhost", 9201, "http"))); - - UpdateRequest request = new UpdateRequest( - "nani250", - "doc", - "2"); - - String jsonString = "{" + - "\"user\":\"kimchy\"," + - "\"postDate\":\"2013-01-30\"," + - "\"message\":\"trying out Elasticsearch\"" + - "}"; - - request.doc(jsonString, XContentType.JSON); - UpdateResponse response = client.update(request); - System.out.println(response.getResult()); - client.close(); - } - - public static void test4() throws IOException { - - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("study", 9200, "http"), - new HttpHost("study", 9201, "http"))); - SearchRequest searchRequest = new SearchRequest(); - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(QueryBuilders.matchAllQuery()); - - SearchResponse searchResponse = client.search(searchRequest); - System.out.println(searchResponse.getTotalShards()); - } - - public static void test5() throws Exception { - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("study", 9200, "http"), - new HttpHost("study", 9201, "http"))); - BulkRequest bulkRequest = new BulkRequest(); - - IndexRequest request = new IndexRequest("nani", "doc1"); - Map jsonMap = new HashMap<>(); - jsonMap.put("xxx", "8888"); - jsonMap.put("yyy", "9999"); - - bulkRequest.add(request.source(jsonMap)); - // bulkRequest.setRefreshPolicy(null); - // WriteRequest.RefreshPolicy; - - BulkResponse bulkResponse = client.bulk(bulkRequest); - - System.out.println(bulkResponse); - } - - public static void test6() throws IOException { - RestHighLevelClient client = new RestHighLevelClient( - RestClient.builder( - new HttpHost("study", 9200, "http"), - new HttpHost("study", 9201, "http"))); - SearchRequest searchRequest = new SearchRequest(); - SearchResponse resp = client.search(searchRequest); - resp.getAggregations(); - } - - public static void main(String[] args) throws Exception { - test1(); - } - - -} diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java index fc58ca41e7..afc45b081a 100644 --- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java +++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpConfig.java @@ -59,7 +59,7 @@ public class FtpConfig implements Serializable { public int timeout = FtpConfigConstants.DEFAULT_TIMEOUT; - public long maxFileSize = 1024 * 1024 * 1024; + public long maxFileSize = 1024 * 1024 * 1024L; public String getUsername() { return username; diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java index d201625068..4b99e46257 100644 --- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java +++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandler.java @@ -89,23 +89,14 @@ public void loginFtpServer(FtpConfig ftpConfig) { } @Override - public void logoutFtpServer() { + public void logoutFtpServer() throws IOException{ if (ftpClient.isConnected()) { try { ftpClient.logout(); - } catch (IOException e) { - LOG.error(DISCONNECT_FAIL_MESSAGE); - throw new RuntimeException(e); - }finally { + } finally { if(ftpClient.isConnected()){ - try { - ftpClient.disconnect(); - } catch (IOException e) { - LOG.error(DISCONNECT_FAIL_MESSAGE); - throw new RuntimeException(e); - } + ftpClient.disconnect(); } - } } } diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java index cc83e0343a..8c775f062d 100644 --- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java +++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/FtpHandlerFactory.java @@ -19,6 +19,8 @@ package com.dtstack.flinkx.ftp; +import org.apache.commons.lang3.StringUtils; + /** * @author jiangbo * @date 2019/11/21 @@ -28,8 +30,8 @@ public class FtpHandlerFactory { public static IFtpHandler createFtpHandler(String protocolStr){ IFtpHandler ftpHandler; - EProtocol protocol = EProtocol.getByName(protocolStr); - if(EProtocol.SFTP.equals(protocol)) { + Protocol protocol = Protocol.getByName(protocolStr); + if(Protocol.SFTP.equals(protocol)) { ftpHandler = new SftpHandler(); } else { ftpHandler = new FtpHandler(); @@ -37,4 +39,16 @@ public static IFtpHandler createFtpHandler(String protocolStr){ return ftpHandler; } + + enum Protocol{ + FTP, SFTP; + + public static Protocol getByName(String name) { + if (StringUtils.isEmpty(name)) { + return SFTP; + } + + return Protocol.valueOf(name.toUpperCase()); + } + } } diff --git a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java index c8da4124b1..16faf8e6d8 100644 --- a/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java +++ b/flinkx-ftp/flinkx-ftp-core/src/main/java/com/dtstack/flinkx/ftp/IFtpHandler.java @@ -19,6 +19,7 @@ package com.dtstack.flinkx.ftp; +import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; @@ -40,8 +41,9 @@ public interface IFtpHandler { /** * 登出服务器 + * @throws IOException logout error */ - void logoutFtpServer(); + void logoutFtpServer() throws IOException; /** * 判断给定的目录是否存在 diff --git a/flinkx-ftp/flinkx-ftp-reader/pom.xml b/flinkx-ftp/flinkx-ftp-reader/pom.xml index 1771055c60..42ae5c1123 100644 --- a/flinkx-ftp/flinkx-ftp-reader/pom.xml +++ b/flinkx-ftp/flinkx-ftp-reader/pom.xml @@ -71,6 +71,16 @@ under the License. + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java index 6f0f4471d3..3b3b62013c 100644 --- a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java +++ b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpInputFormat.java @@ -19,7 +19,12 @@ package com.dtstack.flinkx.ftp.reader; import com.dtstack.flinkx.constants.ConstantValue; -import com.dtstack.flinkx.ftp.*; +import com.dtstack.flinkx.ftp.EProtocol; +import com.dtstack.flinkx.ftp.FtpConfig; +import com.dtstack.flinkx.ftp.FtpHandler; +import com.dtstack.flinkx.ftp.FtpHandlerFactory; +import com.dtstack.flinkx.ftp.IFtpHandler; +import com.dtstack.flinkx.ftp.SftpHandler; import com.dtstack.flinkx.inputformat.BaseRichInputFormat; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.StringUtil; @@ -54,13 +59,7 @@ public class FtpInputFormat extends BaseRichInputFormat { public void openInputFormat() throws IOException { super.openInputFormat(); - if(EProtocol.SFTP.name().equalsIgnoreCase(ftpConfig.getProtocol())) { - ftpHandler = new SftpHandler(); - } else if(EProtocol.FTP.name().equalsIgnoreCase(ftpConfig.getProtocol())){ - ftpHandler = new FtpHandler(); - } else { - throw new RuntimeException("协议名称错误:" + ftpConfig.getProtocol()); - } + ftpHandler = FtpHandlerFactory.createFtpHandler(ftpConfig.getProtocol()); ftpHandler.loginFtpServer(ftpConfig); } @@ -157,5 +156,4 @@ public void closeInternal() throws IOException { ftpHandler.logoutFtpServer(); } } - } diff --git a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java index 411389206d..c889b249cf 100644 --- a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java +++ b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpReader.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.config.ReaderConfig; import com.dtstack.flinkx.ftp.FtpConfig; +import com.dtstack.flinkx.ftp.FtpConfigConstants; import com.dtstack.flinkx.reader.BaseDataReader; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.StringUtil; @@ -30,8 +31,6 @@ import java.util.List; -import static com.dtstack.flinkx.ftp.FtpConfigConstants.*; - /** * The reader plugin of Ftp * @@ -57,7 +56,7 @@ public FtpReader(DataTransferConfig config, StreamExecutionEnvironment env) { ftpConfig.setDefaultPort(); } - if(!DEFAULT_FIELD_DELIMITER.equals(ftpConfig.getFieldDelimiter())){ + if(!FtpConfigConstants.DEFAULT_FIELD_DELIMITER.equals(ftpConfig.getFieldDelimiter())){ String fieldDelimiter = StringUtil.convertRegularExpr(ftpConfig.getFieldDelimiter()); ftpConfig.setFieldDelimiter(fieldDelimiter); } diff --git a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java index a19e566fa6..34755a4109 100644 --- a/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java +++ b/flinkx-ftp/flinkx-ftp-reader/src/main/java/com/dtstack/flinkx/ftp/reader/FtpSeqBufferedReader.java @@ -20,8 +20,13 @@ import com.dtstack.flinkx.ftp.IFtpHandler; import com.dtstack.flinkx.ftp.FtpHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.util.Iterator; /** @@ -32,6 +37,8 @@ */ public class FtpSeqBufferedReader { + private static Logger LOG = LoggerFactory.getLogger(FtpSeqBufferedReader.class); + private IFtpHandler ftpHandler; private Iterator iter; @@ -76,7 +83,8 @@ private void nextStream() throws IOException{ br = new BufferedReader(new InputStreamReader(in, charsetName)); for (int i = 0; i < fromLine; i++) { - br.readLine(); + String skipLine = br.readLine(); + LOG.info("Skip line:{}", skipLine); } } else { br = null; diff --git a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/FtpServerDemo.java b/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/FtpServerDemo.java deleted file mode 100644 index ed9177c3b7..0000000000 --- a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/FtpServerDemo.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.dtstack.flinkx.ftp.reader; - -import org.apache.commons.net.ftp.FTPClient; -import org.apache.commons.net.ftp.FTPFile; - -import java.io.IOException; -import java.io.InputStream; - -/** - * Created by softfly on 17/11/22. - */ -public class FtpServerDemo { - - public static void main(String[] args) throws IOException { - FTPClient ftp = new FTPClient(); - String username = "test"; - String password = "qbI#5pNd"; - ftp.connect("node02", 21); - ftp.login(username, password); - FTPFile[] ftpFiles = ftp.listFiles(); - for(FTPFile ftpFile : ftpFiles) { - System.out.println(ftpFile.getName()); - } - - - String[] xxx = ftp.listNames(); - - InputStream is1 = ftp.retrieveFileStream("hyf/ttt"); - ftp.getReply(); - InputStream is2 = ftp.retrieveFileStream("xxx"); - ftp.remoteRetrieve("/hyf/ttt"); - ftp.getReply(); - - ftp.changeWorkingDirectory("/hyf"); - System.out.println(ftp.printWorkingDirectory()); - - ftp.logout(); - - } - -} diff --git a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/SftpServerDemo.java b/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/SftpServerDemo.java deleted file mode 100644 index 6defecb105..0000000000 --- a/flinkx-ftp/flinkx-ftp-reader/src/test/java/com/dtstack/flinkx/ftp/reader/SftpServerDemo.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.dtstack.flinkx.ftp.reader; - -import com.jcraft.jsch.*; - -import java.util.Properties; -import java.util.Vector; - -/** - * Created by softfly on 17/11/23. - */ -public class SftpServerDemo { - - public static void main(String[] args) throws JSchException, SftpException { - JSch jsch = new JSch(); - - Session session = jsch.getSession("mysftp", "node02"); - session.setPassword("oh1986mygod"); - session.setPort(22); - //session.setTimeout(10); - Properties config = new Properties(); - config.put("StrictHostKeyChecking", "no"); - - session.setConfig(config); - session.connect(); - - ChannelSftp channelSftp = (ChannelSftp) session.openChannel("sftp"); // 打开SFTP通道 - channelSftp.connect(); // 建立SFTP通道的连接 - - Vector vector = channelSftp.ls("/"); - - for(int i = 0; i < vector.size(); ++i) { - ChannelSftp.LsEntry le = (ChannelSftp.LsEntry) vector.get(i); - System.out.println(le.getFilename() ); - System.out.println(le.getLongname()); - } - - - //session.disconnect(); - - } - -} diff --git a/flinkx-ftp/flinkx-ftp-writer/pom.xml b/flinkx-ftp/flinkx-ftp-writer/pom.xml index 61d8ffaede..5c5b2bb6f7 100644 --- a/flinkx-ftp/flinkx-ftp-writer/pom.xml +++ b/flinkx-ftp/flinkx-ftp-writer/pom.xml @@ -72,6 +72,16 @@ under the License. + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java index aeddb31bcb..4d66e893b1 100644 --- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java +++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormat.java @@ -19,22 +19,27 @@ package com.dtstack.flinkx.ftp.writer; import com.dtstack.flinkx.exception.WriteRecordException; -import com.dtstack.flinkx.ftp.*; +import com.dtstack.flinkx.ftp.FtpConfig; +import com.dtstack.flinkx.ftp.FtpHandlerFactory; +import com.dtstack.flinkx.ftp.IFtpHandler; import com.dtstack.flinkx.outputformat.BaseFileOutputFormat; +import com.dtstack.flinkx.util.ExceptionUtil; +import com.dtstack.flinkx.util.GsonUtil; import com.dtstack.flinkx.util.StringUtil; import com.dtstack.flinkx.util.SysUtil; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.flink.types.Row; +import java.io.BufferedWriter; import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.util.Collections; import java.util.List; +import java.util.concurrent.TimeUnit; import java.util.function.Predicate; -import static com.dtstack.flinkx.ftp.FtpConfigConstants.SFTP_PROTOCOL; - /** * The OutputFormat Implementation which reads data from ftp servers. * @@ -54,21 +59,18 @@ public class FtpOutputFormat extends BaseFileOutputFormat { private transient IFtpHandler ftpHandler; - private transient OutputStream os; + private static final int FILE_NAME_PART_SIZE = 3; private static final String DOT = "."; private static final String FILE_SUFFIX = ".csv"; private static final String OVERWRITE_MODE = "overwrite"; + private transient BufferedWriter writer; @Override protected void openSource() throws IOException { - if(SFTP_PROTOCOL.equalsIgnoreCase(ftpConfig.getProtocol())) { - ftpHandler = new SftpHandler(); - } else { - ftpHandler = new FtpHandler(); - } + ftpHandler = FtpHandlerFactory.createFtpHandler(ftpConfig.getProtocol()); ftpHandler.loginFtpServer(ftpConfig); } @@ -106,7 +108,7 @@ public boolean test(String file) { } String[] splits = fileName.split("\\."); - if (splits.length == 3) { + if (splits.length == FILE_NAME_PART_SIZE) { return Integer.parseInt(splits[2]) <= fileIndex; } @@ -125,11 +127,16 @@ public boolean test(String file) { protected void nextBlock(){ super.nextBlock(); - if (os != null){ + if (writer != null){ return; } - - os = ftpHandler.getOutputStream(tmpPath + SP + currentBlockFileName); + String path = tmpPath + SP + currentBlockFileName; + try { + writer = new BufferedWriter(new OutputStreamWriter(ftpHandler.getOutputStream(path), ftpConfig.getEncoding())); + } catch (UnsupportedEncodingException e) { + LOG.error("exception when create BufferedWriter, path = {}, e = {}", path, ExceptionUtil.getErrorMessage(e)); + throw new RuntimeException(e); + } blockIndex++; } @@ -156,29 +163,42 @@ public void moveTemporaryDataBlockFileToDirectory(){ @Override public void writeSingleRecordToFile(Row row) throws WriteRecordException { - if(os == null){ - nextBlock(); - } - - String line = StringUtil.row2string(row, columnTypes, ftpConfig.getFieldDelimiter(), columnNames); try { - byte[] bytes = line.getBytes(ftpConfig.getEncoding()); - this.os.write(bytes); - this.os.write(NEWLINE); - this.os.flush(); + if(writer == null){ + nextBlock(); + } + + String line = StringUtil.row2string(row, columnTypes, ftpConfig.getFieldDelimiter()); + this.writer.write(line); + this.writer.write(NEWLINE); if(restoreConfig.isRestore()){ lastRow = row; rowsOfCurrentBlock++; } - } catch(Exception ex) { - throw new WriteRecordException(ex.getMessage(), ex); + } catch(Exception e) { + LOG.error("error happened when write single record to file, row = {}, columnTypes = {}, e = {}", row, GsonUtil.GSON.toJson(columnTypes), ExceptionUtil.getErrorMessage(e)); + throw new WriteRecordException(e.getMessage(), e); } } @Override - protected void createFinishedTag() throws IOException { - LOG.info("Subtask [{}] finished, create dir {}", taskNumber, finishedPath); + protected void createFinishedTag() { + LOG.info("SubTask [{}] finished, create dir {}", taskNumber, finishedPath); + String path = outputFilePath + SP + FINISHED_SUBDIR; + if(taskNumber == 0){ + ftpHandler.mkDirRecursive(path); + } + final int maxRetryTime = 15; + int i = 0; + try { + while(!(ftpHandler.isDirExist(path) || i > maxRetryTime)){ + i++; + TimeUnit.MILLISECONDS.sleep(10); + } + }catch (Exception e){ + LOG.error("exception when createFinishedTag, path = {}, e = {}", path, ExceptionUtil.getErrorMessage(e)); + } ftpHandler.mkDirRecursive(finishedPath); } @@ -238,7 +258,7 @@ protected void waitForAllTasksToFinish(){ if (i == maxRetryTime) { ftpHandler.deleteAllFilesInDir(finishedPath, null); - throw new RuntimeException("timeout when gathering finish tags for each subtasks"); + throw new RuntimeException("timeout when gathering finish tags for each subTasks"); } } @@ -246,7 +266,7 @@ protected void waitForAllTasksToFinish(){ protected void coverageData(){ boolean cleanPath = restoreConfig.isRestore() && OVERWRITE_MODE.equalsIgnoreCase(ftpConfig.getWriteMode()) && !SP.equals(ftpConfig.getPath()); if(cleanPath){ - ftpHandler.deleteAllFilesInDir(ftpConfig.getPath(), Arrays.asList(tmpPath)); + ftpHandler.deleteAllFilesInDir(ftpConfig.getPath(), Collections.singletonList(tmpPath)); } } @@ -286,15 +306,15 @@ protected void moveAllTemporaryDataFileToDirectory() throws IOException { @Override protected void closeSource() throws IOException { - if (os != null){ - os.flush(); - os.close(); - os = null; + if (writer != null){ + writer.flush(); + writer.close(); + writer = null; } } @Override - protected void clearTemporaryDataFiles() throws IOException { + protected void clearTemporaryDataFiles() { ftpHandler.deleteAllFilesInDir(tmpPath, null); LOG.info("Delete .data dir:{}", tmpPath); @@ -307,6 +327,12 @@ public void flushDataInternal() throws IOException { closeSource(); } + @Override + public void closeInternal() throws IOException { + closeSource(); + super.closeInternal(); + } + @Override public float getDeviation() { return 1.0F; @@ -316,4 +342,9 @@ public float getDeviation() { protected String getExtension() { return ".csv"; } + + @Override + protected void writeMultipleRecordsInternal() throws Exception { + notSupportBatchWrite("FtpWriter"); + } } diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java index dee19b9e1c..dc7bcae4f9 100644 --- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java +++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpOutputFormatBuilder.java @@ -19,9 +19,7 @@ package com.dtstack.flinkx.ftp.writer; import com.dtstack.flinkx.ftp.FtpConfig; -import com.dtstack.flinkx.ftp.FtpConfigConstants; import com.dtstack.flinkx.outputformat.FileOutputFormatBuilder; -import org.apache.commons.lang.StringUtils; import java.util.List; /** @@ -53,7 +51,7 @@ public void setFtpConfig(FtpConfig ftpConfig){ @Override protected void checkFormat() { - + notSupportBatchWrite("FtpWriter"); } } diff --git a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java index edfb666f4a..9588e3a3c0 100644 --- a/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java +++ b/flinkx-ftp/flinkx-ftp-writer/src/main/java/com/dtstack/flinkx/ftp/writer/FtpWriter.java @@ -30,7 +30,7 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.ftp.FtpConfigConstants.*; +import static com.dtstack.flinkx.ftp.FtpConfigConstants.DEFAULT_FIELD_DELIMITER; /** * The Writer Plugin of Ftp diff --git a/flinkx-gbase/flinkx-gbase-reader/pom.xml b/flinkx-gbase/flinkx-gbase-reader/pom.xml index 3082049696..9a7903f7fc 100644 --- a/flinkx-gbase/flinkx-gbase-reader/pom.xml +++ b/flinkx-gbase/flinkx-gbase-reader/pom.xml @@ -62,8 +62,12 @@ shade.gbasereader.io.netty - com.google - shade.gbasereader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java index 7361843030..eb1b86a8a4 100644 --- a/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java +++ b/flinkx-gbase/flinkx-gbase-reader/src/main/java/com/dtstack/flinkx/gbase/format/GbaseInputFormat.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.gbase.format; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputSplit; import com.dtstack.flinkx.rdb.util.DbUtil; import com.dtstack.flinkx.util.ClassUtil; import org.apache.commons.lang3.StringUtils; @@ -47,7 +48,9 @@ public void openInternal(InputSplit inputSplit) throws IOException { String startLocation = incrementConfig.getStartLocation(); if (incrementConfig.isPolling()) { - endLocationAccumulator.add(Long.parseLong(startLocation)); + if (StringUtils.isNotEmpty(startLocation)) { + endLocationAccumulator.add(Long.parseLong(startLocation)); + } isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType()); } else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) { getMaxValue(inputSplit); @@ -61,6 +64,10 @@ public void openInternal(InputSplit inputSplit) throws IOException { fetchSize = Integer.MIN_VALUE; querySql = buildQuerySql(inputSplit); + JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit; + if (null != jdbcInputSplit.getStartLocation()) { + startLocation = jdbcInputSplit.getStartLocation(); + } executeQuery(startLocation); columnCount = resultSet.getMetaData().getColumnCount(); diff --git a/flinkx-gbase/flinkx-gbase-writer/pom.xml b/flinkx-gbase/flinkx-gbase-writer/pom.xml index da6f21b4a2..d28e0af276 100644 --- a/flinkx-gbase/flinkx-gbase-writer/pom.xml +++ b/flinkx-gbase/flinkx-gbase-writer/pom.xml @@ -62,8 +62,12 @@ shade.gbasewriter.io.netty - com.google - shade.gbasewriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-greenplum/flinkx-greenplum-core/pom.xml b/flinkx-greenplum/flinkx-greenplum-core/pom.xml new file mode 100644 index 0000000000..3a79da783d --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-core/pom.xml @@ -0,0 +1,39 @@ + + + + flinkx-greenplum + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-greenplum-core + + + com.dtstack.flinkx + flinkx-postgresql-core + 1.6 + + + com.pivotal + greenplum-jdbc + 5.1.4 + + + org.postgresql + postgresql + 42.2.2 + + + + + + HandChina RDC + HandChina RDC + http://nexus.saas.hand-china.com/content/repositories/rdc/ + + + + \ No newline at end of file diff --git a/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMeta.java b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMeta.java new file mode 100644 index 0000000000..95dcb1b887 --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMeta.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.greenplum; + +import com.dtstack.flinkx.enums.EDatabaseType; +import com.dtstack.flinkx.postgresql.PostgresqlDatabaseMeta; + +import java.util.List; +import java.util.Map; + +/** + * The class of Greenplum database prototype + * + * @Company: www.dtstack.com + * @author kunni@dtstack.com + */ + +public class GreenplumDatabaseMeta extends PostgresqlDatabaseMeta { + + @Override + public String getUpsertStatement(List column, String table, Map> updateKey) { + throw new UnsupportedOperationException("Greenplum not support update mode"); + } + + @Override + public EDatabaseType getDatabaseType() { + return EDatabaseType.Greenplum; + } + + @Override + public String getDriverClass() { + return "com.pivotal.jdbc.GreenplumDriver"; + } +} diff --git a/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMetaInsert.java b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMetaInsert.java new file mode 100644 index 0000000000..cb811c6ef0 --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-core/src/main/java/com/dtstack/flinkx/greenplum/GreenplumDatabaseMetaInsert.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.greenplum; + +/** + * when use insert mode, use org.postgresql.Driver + * + * @Company: www.dtstack.com + * @author kunni@dtstack.com + */ + +public class GreenplumDatabaseMetaInsert extends GreenplumDatabaseMeta{ + @Override + public String getDriverClass() { + return "org.postgresql.Driver"; + } +} diff --git a/flinkx-greenplum/flinkx-greenplum-reader/pom.xml b/flinkx-greenplum/flinkx-greenplum-reader/pom.xml new file mode 100644 index 0000000000..0fb695ce58 --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-reader/pom.xml @@ -0,0 +1,112 @@ + + + + flinkx-greenplum + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-greenplum-reader + + + + com.dtstack.flinkx + flinkx-greenplum-core + 1.6 + + + com.dtstack.flinkx + flinkx-postgresql-reader + 1.6 + + + com.dtstack.flinkx + flinkx-rdb-reader + 1.6 + provided + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.1.0 + + + package + + shade + + + false + + + org.slf4j:slf4j-api + log4j:log4j + ch.qos.logback:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + io.netty + shade.greenplumreader.io.netty + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumInputFormat.java b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumInputFormat.java new file mode 100644 index 0000000000..c1fba4eb0b --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumInputFormat.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.greenplum.format; + +import com.dtstack.flinkx.postgresql.format.PostgresqlInputFormat; + +/**Simple inheritance of PostgresqlInputFormat + * + * @Company: www.dtstack.com + * @author kunni@dtstack.com + */ + +public class GreenplumInputFormat extends PostgresqlInputFormat { +} diff --git a/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/reader/GreenplumReader.java b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/reader/GreenplumReader.java new file mode 100644 index 0000000000..09f547469f --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-reader/src/main/java/com/dtstack/flinkx/greenplum/reader/GreenplumReader.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.greenplum.reader; + +import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.greenplum.format.GreenplumInputFormat; +import com.dtstack.flinkx.inputformat.BaseRichInputFormat; +import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; +import com.dtstack.flinkx.postgresql.reader.PostgresqlQuerySqlBuilder; +import com.dtstack.flinkx.rdb.datareader.JdbcDataReader; +import com.dtstack.flinkx.rdb.datareader.QuerySqlBuilder; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormatBuilder; +import com.dtstack.flinkx.greenplum.GreenplumDatabaseMeta; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.types.Row; + +/** + * The reader plugin for Greenplum database + * + * @Company: www.dtstack.com + * @author kunni@dtstack.com + */ + +public class GreenplumReader extends JdbcDataReader { + public GreenplumReader(DataTransferConfig config, StreamExecutionEnvironment env) { + super(config, env); + setDatabaseInterface(new GreenplumDatabaseMeta()); + setTypeConverterInterface(new PostgresqlTypeConverter()); + } + + @Override + protected JdbcInputFormatBuilder getBuilder() { + return new JdbcInputFormatBuilder(new GreenplumInputFormat()); + } + + @Override + public DataStream readData() { + JdbcInputFormatBuilder builder = new JdbcInputFormatBuilder(new GreenplumInputFormat()); + builder.setDriverName(databaseInterface.getDriverClass()); + builder.setDbUrl(dbUrl); + builder.setUsername(username); + builder.setPassword(password); + builder.setBytes(bytes); + builder.setMonitorUrls(monitorUrls); + builder.setTable(table); + builder.setDatabaseInterface(databaseInterface); + builder.setTypeConverter(typeConverter); + builder.setMetaColumn(metaColumns); + builder.setFetchSize(fetchSize == 0 ? databaseInterface.getFetchSize() : fetchSize); + builder.setQueryTimeOut(queryTimeOut == 0 ? databaseInterface.getQueryTimeout() : queryTimeOut); + builder.setIncrementConfig(incrementConfig); + builder.setSplitKey(splitKey); + builder.setNumPartitions(numPartitions); + builder.setCustomSql(customSql); + builder.setRestoreConfig(restoreConfig); + builder.setHadoopConfig(hadoopConfig); + builder.setTestConfig(testConfig); + + QuerySqlBuilder sqlBuilder = new PostgresqlQuerySqlBuilder(this); + builder.setQuery(sqlBuilder.buildSql()); + + BaseRichInputFormat format = builder.finish(); + return createInput(format, (databaseInterface.getDatabaseType() + "reader").toLowerCase()); + } + +} diff --git a/flinkx-greenplum/flinkx-greenplum-writer/pom.xml b/flinkx-greenplum/flinkx-greenplum-writer/pom.xml new file mode 100644 index 0000000000..db6738d9e5 --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-writer/pom.xml @@ -0,0 +1,110 @@ + + + + flinkx-greenplum + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-greenplum-writer + + + com.dtstack.flinkx + flinkx-rdb-writer + 1.6 + provided + + + com.dtstack.flinkx + flinkx-greenplum-core + 1.6 + + + com.dtstack.flinkx + flinkx-postgresql-writer + 1.6 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.1.0 + + + package + + shade + + + false + + + org.slf4j:slf4j-api + log4j:log4j + ch.qos.logback:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + io.netty + shade.greenplumwriter.io.netty + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumOutputFormat.java b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumOutputFormat.java new file mode 100644 index 0000000000..a1d02138d2 --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/format/GreenplumOutputFormat.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.greenplum.format; + +import com.dtstack.flinkx.postgresql.format.PostgresqlOutputFormat; + +/**Simple inheritance of PostgresqlOutputFormat + * + * Date: 2020/6/4 + * Company: www.dtstack.com + * @author kunni@dtstack.com + */ + +public class GreenplumOutputFormat extends PostgresqlOutputFormat { +} diff --git a/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/writer/GreenplumWriter.java b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/writer/GreenplumWriter.java new file mode 100644 index 0000000000..8d3267968b --- /dev/null +++ b/flinkx-greenplum/flinkx-greenplum-writer/src/main/java/com/dtstack/flinkx/greenplum/writer/GreenplumWriter.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.greenplum.writer; + +import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.greenplum.format.GreenplumOutputFormat; +import com.dtstack.flinkx.postgresql.PostgresqlTypeConverter; +import com.dtstack.flinkx.rdb.datawriter.JdbcDataWriter; +import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormatBuilder; +import com.dtstack.flinkx.greenplum.GreenplumDatabaseMetaInsert; +import org.apache.commons.lang.StringUtils; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.DataStreamSink; +import org.apache.flink.types.Row; + +/** + * The writer plugin for Greenplum database + * + * @Company: www.dtstack.com + * @author kunni@dtstack.com + */ + +public class GreenplumWriter extends JdbcDataWriter { + + public static final String INSERT_SQL_MODE_TYPE = "copy"; + public static final String DATABASE_NAME = ";DatabaseName="; + public static final String JDBC_POSTGRESQL_PREFIX = "jdbc:postgresql"; + public static final String JDBC_GREENPLUM_PREFIX = "jdbc:pivotal:greenplum"; + + public GreenplumWriter(DataTransferConfig config) { + super(config); + //统一固定为copy模式 + insertSqlMode = INSERT_SQL_MODE_TYPE; + dbUrl = changeToPostgresqlUrl(); + setDatabaseInterface(new GreenplumDatabaseMetaInsert()); + setTypeConverterInterface(new PostgresqlTypeConverter()); + } + + String changeToPostgresqlUrl(){ + dbUrl = StringUtils.replaceOnce(dbUrl, JDBC_GREENPLUM_PREFIX, JDBC_POSTGRESQL_PREFIX); + dbUrl = StringUtils.replaceOnce(dbUrl, DATABASE_NAME, "/"); + return dbUrl; + } + + @Override + public DataStreamSink writeData(DataStream dataSet) { + GreenplumOutputFormat greenplumOutputFormat = new GreenplumOutputFormat(); + JdbcOutputFormatBuilder builder = new JdbcOutputFormatBuilder(greenplumOutputFormat); + builder.setDriverName(databaseInterface.getDriverClass()); + builder.setDbUrl(dbUrl); + builder.setUsername(username); + builder.setPassword(password); + builder.setBatchInterval(batchSize); + builder.setMonitorUrls(monitorUrls); + builder.setPreSql(preSql); + builder.setPostSql(postSql); + builder.setErrors(errors); + builder.setErrorRatio(errorRatio); + builder.setDirtyPath(dirtyPath); + builder.setDirtyHadoopConfig(dirtyHadoopConfig); + builder.setSrcCols(srcCols); + builder.setDatabaseInterface(databaseInterface); + builder.setMode(mode); + builder.setTable(table); + builder.setColumn(column); + builder.setFullColumn(fullColumn); + builder.setUpdateKey(updateKey); + builder.setTypeConverter(typeConverter); + builder.setRestoreConfig(restoreConfig); + builder.setInsertSqlMode(insertSqlMode); + + DataStreamSink dataStreamSink = createOutput(dataSet, builder.finish()); + String sinkName = (databaseInterface.getDatabaseType() + "writer").toLowerCase(); + dataStreamSink.name(sinkName); + return dataStreamSink; + } +} diff --git a/flinkx-greenplum/pom.xml b/flinkx-greenplum/pom.xml new file mode 100644 index 0000000000..5c3fb85de9 --- /dev/null +++ b/flinkx-greenplum/pom.xml @@ -0,0 +1,36 @@ + + + + flinkx-all + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-greenplum + pom + + flinkx-greenplum-reader + flinkx-greenplum-core + flinkx-greenplum-writer + + + + + com.dtstack.flinkx + flinkx-core + 1.6 + provided + + + + com.dtstack.flinkx + flinkx-rdb-core + 1.6 + provided + + + + \ No newline at end of file diff --git a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java index d83d83b3ee..d7f0d61ba6 100644 --- a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java +++ b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseConfigConstants.java @@ -44,7 +44,7 @@ public class HbaseConfigConstants { public static final String DEFAULT_NULL_MODE = "skip"; - public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024; + public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024L; public static final boolean DEFAULT_WAL_FLAG = false; diff --git a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java index 0bce2c0c69..3c88f828f5 100644 --- a/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java +++ b/flinkx-hbase/flinkx-hbase-core/src/main/java/com/dtstack/flinkx/hbase/HbaseHelper.java @@ -27,15 +27,17 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.*; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.BufferedMutator; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.security.PrivilegedAction; -import java.util.Arrays; -import java.util.List; import java.util.Map; /** @@ -51,21 +53,9 @@ public class HbaseHelper { private final static String AUTHENTICATION_TYPE = "Kerberos"; private final static String KEY_HBASE_SECURITY_AUTHENTICATION = "hbase.security.authentication"; private final static String KEY_HBASE_SECURITY_AUTHORIZATION = "hbase.security.authorization"; - private final static String KEY_HBASE_MASTER_KERBEROS_PRINCIPAL = "hbase.master.kerberos.principal"; - private final static String KEY_HBASE_MASTER_KEYTAB_FILE = "hbase.master.keytab.file"; - private final static String KEY_HBASE_REGIONSERVER_KEYTAB_FILE = "hbase.regionserver.keytab.file"; - private final static String KEY_HBASE_REGIONSERVER_KERBEROS_PRINCIPAL = "hbase.regionserver.kerberos.principal"; - - private static List KEYS_KERBEROS_REQUIRED = Arrays.asList( - KEY_HBASE_SECURITY_AUTHENTICATION, - KEY_HBASE_MASTER_KERBEROS_PRINCIPAL, - KEY_HBASE_MASTER_KEYTAB_FILE, - KEY_HBASE_REGIONSERVER_KEYTAB_FILE, - KEY_HBASE_REGIONSERVER_KERBEROS_PRINCIPAL - ); public static org.apache.hadoop.hbase.client.Connection getHbaseConnection(Map hbaseConfigMap) { - Validate.isTrue(hbaseConfigMap != null && hbaseConfigMap.size() !=0, "hbaseConfig不能为空Map结构!"); + Validate.isTrue(MapUtils.isEmpty(hbaseConfigMap), "hbaseConfig不能为空Map结构!"); if(openKerberos(hbaseConfigMap)){ return getConnectionWithKerberos(hbaseConfigMap); @@ -81,43 +71,43 @@ public static org.apache.hadoop.hbase.client.Connection getHbaseConnection(Map hbaseConfigMap){ - for (String key : KEYS_KERBEROS_REQUIRED) { - if(StringUtils.isEmpty(MapUtils.getString(hbaseConfigMap, key))){ - throw new IllegalArgumentException(String.format("Must provide [%s] when authentication is Kerberos", key)); - } + try { + UserGroupInformation ugi = getUgi(hbaseConfigMap); + return ugi.doAs(new PrivilegedAction() { + @Override + public Connection run() { + try { + Configuration hConfiguration = getConfig(hbaseConfigMap); + return ConnectionFactory.createConnection(hConfiguration); + } catch (IOException e) { + LOG.error("Get connection fail with config:{}", hbaseConfigMap); + throw new RuntimeException(e); + } + } + }); + } catch (Exception e){ + throw new RuntimeException("Login kerberos error", e); } + } + public static UserGroupInformation getUgi(Map hbaseConfigMap) throws IOException{ String keytabFileName = KerberosUtil.getPrincipalFileName(hbaseConfigMap); keytabFileName = KerberosUtil.loadFile(hbaseConfigMap, keytabFileName); - String principal = KerberosUtil.findPrincipalFromKeytab(keytabFileName); + String principal = KerberosUtil.getPrincipal(hbaseConfigMap, keytabFileName); KerberosUtil.loadKrb5Conf(hbaseConfigMap); Configuration conf = FileSystemUtil.getConfiguration(hbaseConfigMap, null); - UserGroupInformation ugi; - try { - ugi = KerberosUtil.loginAndReturnUgi(conf, principal, keytabFileName); - } catch (Exception e){ - throw new RuntimeException("Login kerberos error", e); - } - - return ugi.doAs(new PrivilegedAction() { - @Override - public Connection run() { - try { - Configuration hConfiguration = getConfig(hbaseConfigMap); - return ConnectionFactory.createConnection(hConfiguration); - } catch (IOException e) { - LOG.error("Get connection fail with config:{}", hbaseConfigMap); - throw new RuntimeException(e); - } - } - }); + return KerberosUtil.loginAndReturnUgi(conf, principal, keytabFileName); } public static Configuration getConfig(Map hbaseConfigMap){ Configuration hConfiguration = HBaseConfiguration.create(); + if (MapUtils.isEmpty(hbaseConfigMap)) { + return hConfiguration; + } + for (Map.Entry entry : hbaseConfigMap.entrySet()) { if(entry.getValue() != null && !(entry.getValue() instanceof Map)){ hConfiguration.set(entry.getKey(), entry.getValue().toString()); diff --git a/flinkx-hbase/flinkx-hbase-core/src/test/java/com/dtstack/flinkx/hbase/test/HbaseHelperTest.java b/flinkx-hbase/flinkx-hbase-core/src/test/java/com/dtstack/flinkx/hbase/test/HbaseHelperTest.java deleted file mode 100644 index c1a67b5750..0000000000 --- a/flinkx-hbase/flinkx-hbase-core/src/test/java/com/dtstack/flinkx/hbase/test/HbaseHelperTest.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.hbase.test; - -import com.dtstack.flinkx.hbase.HbaseHelper; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.*; - -import java.util.HashMap; -import java.util.Map; - -/** - * @author jiangbo - * @date 2019/8/29 - */ -public class HbaseHelperTest { - - public static void main(String[] args) throws Exception{ - Map sftpConf = new HashMap<>(); - sftpConf.put("host", "172.16.10.79"); - sftpConf.put("port", "22"); - sftpConf.put("username", "root"); - sftpConf.put("password", "abc123"); - - Map hbaseConfig = new HashMap<>(); - hbaseConfig.put("hbase.security.authorization", "true"); - hbaseConfig.put("hbase.security.authentication", "kerberos"); - hbaseConfig.put("hbase.master.kerberos.principal", "hbase/cdh01@HADOOP.COM"); - hbaseConfig.put("hbase.master.keytab.file", "D:\\cdh_cluster\\cdh2\\hbase.keytab"); - hbaseConfig.put("hbase.regionserver.kerberos.principal", "hbase/cdh01@HADOOP.COM"); - hbaseConfig.put("hbase.regionserver.keytab.file", "D:\\cdh_cluster\\cdh2\\hbase.keytab"); - hbaseConfig.put("java.security.krb5.conf", "D:\\cdh_cluster\\cdh2\\krb5.conf"); - hbaseConfig.put("useLocalFile", "true"); -// hbaseConfig.put("sftpConf", sftpConf); -// hbaseConfig.put("remoteDir", "/home/sftp/keytab/jiangbo"); - -// hbaseConfig.put("hbase.zookeeper.quorum", "cdh01:2181,cdh02:2181,cdh03:2181"); - hbaseConfig.put("hbase.zookeeper.quorum", "172.16.10.201:2181"); - hbaseConfig.put("hbase.rpc.timeout", "60000"); - hbaseConfig.put("ipc.socket.timeout", "20000"); - hbaseConfig.put("hbase.client.retries.number", "3"); - hbaseConfig.put("hbase.client.pause", "100"); - hbaseConfig.put("zookeeper.recovery.retry", "3"); - - Connection connection = HbaseHelper.getHbaseConnection(hbaseConfig); - Table table = connection.getTable(TableName.valueOf("tb1")); - - ResultScanner rs = table.getScanner(new Scan()); - Result result = rs.next(); - if(result != null){ - System.out.println(result.getRow()); - } - - HbaseHelper.getRegionLocator(connection, "tb1"); - - connection.close(); - } -} diff --git a/flinkx-hbase/flinkx-hbase-reader/pom.xml b/flinkx-hbase/flinkx-hbase-reader/pom.xml index f77a9613ca..5cdc499262 100644 --- a/flinkx-hbase/flinkx-hbase-reader/pom.xml +++ b/flinkx-hbase/flinkx-hbase-reader/pom.xml @@ -65,6 +65,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java index 495a89c4e6..f17383ca87 100644 --- a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java +++ b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseInputFormat.java @@ -29,16 +29,24 @@ import org.apache.flink.types.Row; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.*; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.RegionLocator; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import java.io.IOException; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; +import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.List; import java.util.Map; +import com.google.common.collect.Maps; +import org.apache.hadoop.security.UserGroupInformation; /** @@ -87,7 +95,17 @@ public void openInputFormat() throws IOException { @Override public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOException { try (Connection connection = HbaseHelper.getHbaseConnection(hbaseConfig)) { - return split(connection, tableName, startRowkey, endRowkey, isBinaryRowkey); + if(HbaseHelper.openKerberos(hbaseConfig)) { + UserGroupInformation ugi = HbaseHelper.getUgi(hbaseConfig); + return ugi.doAs(new PrivilegedAction() { + @Override + public HbaseInputSplit[] run() { + return split(connection, tableName, startRowkey, endRowkey, isBinaryRowkey); + } + }); + } else { + return split(connection, tableName, startRowkey, endRowkey, isBinaryRowkey); + } } } @@ -132,9 +150,10 @@ private List doSplit(byte[] startRowkeyByte, // 当前的region为最后一个region // 如果最后一个region的start Key大于用户指定的userEndKey,则最后一个region,应该不包含在内 // 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region - if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 + boolean isSkip = Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo( - regionStartKey, endRowkeyByte) > 0))) { + regionStartKey, endRowkeyByte) > 0)); + if (isSkip) { continue; } diff --git a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java index 04c5150990..b1d4186132 100644 --- a/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java +++ b/flinkx-hbase/flinkx-hbase-reader/src/main/java/com/dtstack/flinkx/hbase/reader/HbaseReader.java @@ -26,6 +26,8 @@ import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; @@ -39,6 +41,8 @@ */ public class HbaseReader extends BaseDataReader { + private static Logger LOG = LoggerFactory.getLogger(HbaseReader.class); + protected List columnName; protected List columnType; protected List columnValue; @@ -82,7 +86,8 @@ public HbaseReader(DataTransferConfig config, StreamExecutionEnvironment env) { columnValue.add((String) sm.get("value")); columnFormat.add((String) sm.get("format")); } - System.out.println("init column finished"); + + LOG.info("init column finished"); } else{ throw new IllegalArgumentException("column argument error"); } diff --git a/flinkx-hbase/flinkx-hbase-reader/src/test/java/com/dtstack/flinkx/hbase/reader/HbaseSplitDemo.java b/flinkx-hbase/flinkx-hbase-reader/src/test/java/com/dtstack/flinkx/hbase/reader/HbaseSplitDemo.java deleted file mode 100644 index 47f153621b..0000000000 --- a/flinkx-hbase/flinkx-hbase-reader/src/test/java/com/dtstack/flinkx/hbase/reader/HbaseSplitDemo.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.dtstack.flinkx.hbase.reader; - -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.Connection; -import org.apache.hadoop.hbase.client.ConnectionFactory; -import org.apache.hadoop.hbase.client.RegionLocator; -import java.io.IOException; - -/** - * Created by softfly on 17/7/25. - */ -public class HbaseSplitDemo { - - private static void split() { - - } - - public static void main(String[] args) throws IOException { - - org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create(); - conf.set("hbase.zookeeper.property.clientPort", "2181"); - conf.set("hbase.zookeeper.quorum", "172.16.1.151" ); - conf.set("zookeeper.znode.parent", "/hbase2"); - - Connection conn = ConnectionFactory.createConnection(conf); - //Table table = conn.getTable(TableName.valueOf("tb2")); - - RegionLocator regionLocator = conn.getRegionLocator(TableName.valueOf("tb2")); - regionLocator.getStartEndKeys(); - - } - -} diff --git a/flinkx-hbase/flinkx-hbase-writer/pom.xml b/flinkx-hbase/flinkx-hbase-writer/pom.xml index d406061b54..d5dd35eba4 100644 --- a/flinkx-hbase/flinkx-hbase-writer/pom.xml +++ b/flinkx-hbase/flinkx-hbase-writer/pom.xml @@ -54,6 +54,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java index a3e75a9caa..d4db300d08 100644 --- a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java +++ b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormat.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.hbase.writer; +import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.enums.ColumnType; import com.dtstack.flinkx.exception.WriteRecordException; import com.dtstack.flinkx.hbase.HbaseHelper; @@ -27,6 +28,7 @@ import com.dtstack.flinkx.util.DateUtil; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.flink.configuration.Configuration; import org.apache.flink.types.Row; @@ -34,10 +36,12 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.security.UserGroupInformation; import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.security.PrivilegedAction; import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -93,8 +97,40 @@ public class HbaseOutputFormat extends BaseRichOutputFormat { private transient ThreadLocal timeMillisecondFormatThreadLocal; + private boolean openKerberos = false; + @Override public void configure(Configuration parameters) { + } + + @Override + public void openInternal(int taskNumber, int numTasks) throws IOException { + openKerberos = HbaseHelper.openKerberos(hbaseConfig); + if (openKerberos) { + sleepRandomTime(); + + UserGroupInformation ugi = HbaseHelper.getUgi(hbaseConfig); + ugi.doAs(new PrivilegedAction() { + @Override + public Object run() { + openConnection(); + return null; + } + }); + } else { + openConnection(); + } + } + + private void sleepRandomTime() { + try { + Thread.sleep(5000L + (long)(10000 * Math.random())); + } catch (Exception exception) { + LOG.warn("", exception); + } + } + + public void openConnection() { LOG.info("HbaseOutputFormat configure start"); nameMaps = Maps.newConcurrentMap(); nameByteMaps = Maps.newConcurrentMap(); @@ -103,9 +139,9 @@ public void configure(Configuration parameters) { Validate.isTrue(hbaseConfig != null && hbaseConfig.size() !=0, "hbaseConfig不能为空Map结构!"); try { - connection = HbaseHelper.getHbaseConnection(hbaseConfig); - org.apache.hadoop.conf.Configuration hConfiguration = HbaseHelper.getConfig(hbaseConfig); + connection = ConnectionFactory.createConnection(hConfiguration); + bufferedMutator = connection.getBufferedMutator( new BufferedMutatorParams(TableName.valueOf(tableName)) .pool(HTable.getDefaultExecutor(hConfiguration)) @@ -129,11 +165,6 @@ public void configure(Configuration parameters) { LOG.info("HbaseOutputFormat configure end"); } - @Override - public void openInternal(int taskNumber, int numTasks) throws IOException { - - } - @Override public void writeSingleRecordInternal(Row record) throws WriteRecordException { int i = 0; @@ -159,17 +190,19 @@ public void writeSingleRecordInternal(Row record) throws WriteRecordException { String name = columnNames.get(i); String[] cfAndQualifier = nameMaps.get(name); byte[][] cfAndQualifierBytes = nameByteMaps.get(name); - if(cfAndQualifier == null || cfAndQualifierBytes==null){ - String promptInfo = "Hbasewriter 中,column 的列配置格式应该是:列族:列名. 您配置的列错误:" + name; + if(cfAndQualifier == null || cfAndQualifierBytes == null){ cfAndQualifier = name.split(":"); - Validate.isTrue(cfAndQualifier != null && cfAndQualifier.length == 2 - && org.apache.commons.lang3.StringUtils.isNotBlank(cfAndQualifier[0]) - && org.apache.commons.lang3.StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo); - nameMaps.put(name,cfAndQualifier); - cfAndQualifierBytes = new byte[2][]; - cfAndQualifierBytes[0] = Bytes.toBytes(cfAndQualifier[0]); - cfAndQualifierBytes[1] = Bytes.toBytes(cfAndQualifier[1]); - nameByteMaps.put(name,cfAndQualifierBytes); + if(cfAndQualifier.length == 2 + && StringUtils.isNotBlank(cfAndQualifier[0]) + && StringUtils.isNotBlank(cfAndQualifier[1])){ + nameMaps.put(name,cfAndQualifier); + cfAndQualifierBytes = new byte[2][]; + cfAndQualifierBytes[0] = Bytes.toBytes(cfAndQualifier[0]); + cfAndQualifierBytes[1] = Bytes.toBytes(cfAndQualifier[1]); + nameByteMaps.put(name,cfAndQualifierBytes); + } else { + throw new IllegalArgumentException("Hbasewriter 中,column 的列配置格式应该是:列族:列名. 您配置的列错误:" + name); + } } ColumnType columnType = ColumnType.getType(type); @@ -180,8 +213,6 @@ public void writeSingleRecordInternal(Row record) throws WriteRecordException { cfAndQualifierBytes[0], cfAndQualifierBytes[1], columnBytes); - }else{ - continue; } } @@ -195,20 +226,21 @@ public void writeSingleRecordInternal(Row record) throws WriteRecordException { } private SimpleDateFormat getSimpleDateFormat(String sign){ - SimpleDateFormat format = null; - if("sss".equalsIgnoreCase(sign)){ + SimpleDateFormat format; + if(ConstantValue.TIME_SECOND_SUFFIX.equals(sign)){ format = timeSecondFormatThreadLocal.get(); if(format == null){ format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); timeSecondFormatThreadLocal.set(format); } - }else if("SSS".equalsIgnoreCase(sign)){ + } else { format = timeMillisecondFormatThreadLocal.get(); if(format == null){ format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS"); timeMillisecondFormatThreadLocal.set(format); } } + return format; } @@ -219,7 +251,7 @@ protected String recordConvertDetailErrorMessage(int pos, Row row) { @Override protected void writeMultipleRecordsInternal() throws Exception { - throw new IllegalArgumentException(); + notSupportBatchWrite("HbaseWriter"); } private byte[] getRowkey(Row record) throws Exception{ @@ -249,8 +281,8 @@ public long getVersion(Row record){ if(record.getField(index) == null){ throw new IllegalArgumentException("null verison column!"); } - SimpleDateFormat dfSeconds = getSimpleDateFormat("sss"); - SimpleDateFormat dfMs = getSimpleDateFormat("SSS"); + SimpleDateFormat dfSeconds = getSimpleDateFormat(ConstantValue.TIME_SECOND_SUFFIX); + SimpleDateFormat dfMs = getSimpleDateFormat(ConstantValue.TIME_MILLISECOND_SUFFIX); Object column = record.getField(index); if(column instanceof Long){ Long longValue = (Long) column; @@ -370,7 +402,7 @@ private byte[] intToBytes(Object column) { if(column instanceof Integer) { intValue = (Integer) column; } else if(column instanceof Long) { - intValue = Integer.valueOf(((Long)column).intValue()); + intValue = ((Long) column).intValue(); } else if(column instanceof Double) { intValue = ((Double) column).intValue(); } else if(column instanceof Float) { @@ -378,7 +410,7 @@ private byte[] intToBytes(Object column) { } else if(column instanceof Short) { intValue = ((Short) column).intValue(); } else if(column instanceof Boolean) { - intValue = ((Boolean) column).booleanValue() ? 1 : 0; + intValue = (Boolean) column ? 1 : 0; } else if(column instanceof String) { intValue = Integer.valueOf((String) column); } else { @@ -401,7 +433,7 @@ private byte[] longToBytes(Object column) { } else if(column instanceof Short) { longValue = ((Short) column).longValue(); } else if(column instanceof Boolean) { - longValue = ((Boolean) column).booleanValue() ? 1L : 0L; + longValue = (Boolean) column ? 1L : 0L; } else if(column instanceof String) { longValue = Long.valueOf((String) column); }else if (column instanceof Timestamp){ @@ -426,7 +458,7 @@ private byte[] doubleToBytes(Object column) { } else if(column instanceof Short) { doubleValue = ((Short) column).doubleValue(); } else if(column instanceof Boolean) { - doubleValue = ((Boolean) column).booleanValue() ? 1.0 : 0.0; + doubleValue = (Boolean) column ? 1.0 : 0.0; } else if(column instanceof String) { doubleValue = Double.valueOf((String) column); } else { @@ -449,7 +481,7 @@ private byte[] floatToBytes(Object column) { } else if(column instanceof Short) { floatValue = ((Short) column).floatValue(); } else if(column instanceof Boolean) { - floatValue = ((Boolean) column).booleanValue() ? 1.0f : 0.0f; + floatValue = (Boolean) column ? 1.0f : 0.0f; } else if(column instanceof String) { floatValue = Float.valueOf((String) column); } else { @@ -472,7 +504,7 @@ private byte[] shortToBytes(Object column) { } else if(column instanceof Short) { shortValue = (Short) column; } else if(column instanceof Boolean) { - shortValue = ((Boolean) column).booleanValue() ? (short) 1 : (short) 0 ; + shortValue = (Boolean) column ? (short) 1 : (short) 0 ; } else if(column instanceof String) { shortValue = Short.valueOf((String) column); } else { @@ -484,15 +516,15 @@ private byte[] shortToBytes(Object column) { private byte[] boolToBytes(Object column) { Boolean booleanValue = null; if(column instanceof Integer) { - booleanValue = (Integer)column == 0 ? false : true; + booleanValue = (Integer) column != 0; } else if(column instanceof Long) { - booleanValue = (Long) column == 0L ? false : true; + booleanValue = (Long) column != 0L; } else if(column instanceof Double) { - booleanValue = (Double) column == 0.0 ? false : true; + booleanValue = new Double(0.0).compareTo((Double) column) != 0; } else if(column instanceof Float) { - booleanValue = (Float) column == 0.0f ? false : true; + booleanValue = new Float(0.0f).compareTo((Float) column) != 0; } else if(column instanceof Short) { - booleanValue = (Short) column == 0 ? false : true; + booleanValue = (Short) column != 0; } else if(column instanceof Boolean) { booleanValue = (Boolean) column; } else if(column instanceof String) { diff --git a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java index 4da4c10754..ec96e04c17 100644 --- a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java +++ b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseOutputFormatBuilder.java @@ -110,5 +110,7 @@ protected void checkFormat() { if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){ throw new UnsupportedOperationException("This plugin not support restore from failed state"); } + + notSupportBatchWrite("HbaseWriter"); } } diff --git a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java index 42091f62d3..3cb5bce6ce 100644 --- a/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java +++ b/flinkx-hbase/flinkx-hbase-writer/src/main/java/com/dtstack/flinkx/hbase/writer/HbaseWriter.java @@ -31,8 +31,23 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.hbase.HbaseConfigConstants.*; -import static com.dtstack.flinkx.hbase.HbaseConfigKeys.*; + +import static com.dtstack.flinkx.hbase.HbaseConfigConstants.DEFAULT_WAL_FLAG; +import static com.dtstack.flinkx.hbase.HbaseConfigConstants.DEFAULT_WRITE_BUFFER_SIZE; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_COLUMN_NAME; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_COLUMN_TYPE; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ENCODING; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_HBASE_CONFIG; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_NULL_MODE; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ROW_KEY_COLUMN; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ROW_KEY_COLUMN_INDEX; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_ROW_KEY_COLUMN_VALUE; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_TABLE; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_VERSION_COLUMN; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_VERSION_COLUMN_INDEX; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_VERSION_COLUMN_VALUE; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_WAL_FLAG; +import static com.dtstack.flinkx.hbase.HbaseConfigKeys.KEY_WRITE_BUFFER_SIZE; /** * The Writer plugin of HBase diff --git a/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java b/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java index dbf12dfa2e..75143994f5 100644 --- a/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java +++ b/flinkx-hbase/flinkx-hbase-writer/src/test/java/com.dtstack.flinkx.hbase.writer/RowKeyFunctionTest.java @@ -18,7 +18,10 @@ package com.dtstack.flinkx.hbase.writer; -import com.dtstack.flinkx.hbase.writer.function.*; +import com.dtstack.flinkx.hbase.writer.function.FunctionParser; +import com.dtstack.flinkx.hbase.writer.function.FunctionTree; +import com.dtstack.flinkx.hbase.writer.function.Md5Function; +import com.dtstack.flinkx.hbase.writer.function.StringFunction; import org.junit.Assert; import org.junit.Test; diff --git a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java index 913c0fd41c..2f0c7ec101 100644 --- a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java +++ b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/ECompressType.java @@ -19,7 +19,6 @@ package com.dtstack.flinkx.hdfs; import org.apache.commons.lang.StringUtils; -import org.apache.parquet.hadoop.metadata.CompressionCodecName; /** * @author jiangbo diff --git a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java index fc6ced4a25..a4b44360e3 100644 --- a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java +++ b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsConfigKeys.java @@ -60,4 +60,6 @@ public class HdfsConfigKeys { public static final String KEY_FLUSH_INTERVAL = "flushInterval"; + public static final String KEY_ENABLE_DICTIONARY = "enableDictionary"; + } diff --git a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java index b678812f68..0acaa924b8 100644 --- a/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java +++ b/flinkx-hdfs/flinkx-hdfs-core/src/main/java/com/dtstack/flinkx/hdfs/HdfsUtil.java @@ -19,15 +19,20 @@ package com.dtstack.flinkx.hdfs; import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.util.DateUtil; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.io.*; +import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.parquet.io.api.Binary; -import java.sql.Date; -import java.text.SimpleDateFormat; /** * Utilities for HdfsReader and HdfsWriter @@ -39,62 +44,13 @@ public class HdfsUtil { public static final String NULL_VALUE = "\\N"; - public static Object string2col(String str, String type, SimpleDateFormat customDateFormat) { - if (str == null || str.length() == 0){ - return null; - } - - if(type == null){ - return str; - } + private static final long NANO_SECONDS_PER_DAY = 86400_000_000_000L; - ColumnType columnType = ColumnType.fromString(type.toUpperCase()); - Object ret; - switch(columnType) { - case TINYINT: - ret = Byte.valueOf(str.trim()); - break; - case SMALLINT: - ret = Short.valueOf(str.trim()); - break; - case INT: - ret = Integer.valueOf(str.trim()); - break; - case BIGINT: - ret = Long.valueOf(str.trim()); - break; - case FLOAT: - ret = Float.valueOf(str.trim()); - break; - case DOUBLE: - case DECIMAL: - ret = Double.valueOf(str.trim()); - break; - case STRING: - case VARCHAR: - case CHAR: - if(customDateFormat != null){ - ret = DateUtil.columnToDate(str,customDateFormat); - ret = DateUtil.timestampToString((Date)ret); - } else { - ret = str; - } - break; - case BOOLEAN: - ret = Boolean.valueOf(str.trim().toLowerCase()); - break; - case DATE: - ret = DateUtil.columnToDate(str,customDateFormat); - break; - case TIMESTAMP: - ret = DateUtil.columnToTimestamp(str,customDateFormat); - break; - default: - throw new IllegalArgumentException("Unsupported field type:" + type); - } + private static final long JULIAN_EPOCH_OFFSET_DAYS = 2440588; - return ret; - } + private static final double SCALE_TWO = 2.0; + private static final double SCALE_TEN = 10.0; + private static final int BIT_SIZE = 8; public static Object getWritableValue(Object writable) { Class clz = writable.getClass(); @@ -166,4 +122,78 @@ public static ObjectInspector columnTypeToObjectInspetor(ColumnType columnType) return objectInspector; } + + public static Binary decimalToBinary(final HiveDecimal hiveDecimal, int prec, int scale) { + byte[] decimalBytes = hiveDecimal.setScale(scale).unscaledValue().toByteArray(); + + // Estimated number of bytes needed. + int precToBytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; + if (precToBytes == decimalBytes.length) { + // No padding needed. + return Binary.fromReusedByteArray(decimalBytes); + } + + byte[] tgt = new byte[precToBytes]; + if (hiveDecimal.signum() == -1) { + // For negative number, initializing bits to 1 + for (int i = 0; i < precToBytes; i++) { + tgt[i] |= 0xFF; + } + } + + // Padding leading zeroes/ones. + System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length); + return Binary.fromReusedByteArray(tgt); + } + + public static int computeMinBytesForPrecision(int precision){ + int numBytes = 1; + while (Math.pow(SCALE_TWO, BIT_SIZE * numBytes - 1.0) < Math.pow(SCALE_TEN, precision)) { + numBytes += 1; + } + return numBytes; + } + + public static byte[] longToByteArray(long data){ + long nano = data * 1000_000; + + int julianDays = (int) ((nano / NANO_SECONDS_PER_DAY) + JULIAN_EPOCH_OFFSET_DAYS); + byte[] julianDaysBytes = getBytes(julianDays); + flip(julianDaysBytes); + + long lastDayNanos = nano % NANO_SECONDS_PER_DAY; + byte[] lastDayNanosBytes = getBytes(lastDayNanos); + flip(lastDayNanosBytes); + + byte[] dst = new byte[12]; + + System.arraycopy(lastDayNanosBytes, 0, dst, 0, 8); + System.arraycopy(julianDaysBytes, 0, dst, 8, 4); + + return dst; + } + + private static byte[] getBytes(long i) { + byte[] bytes=new byte[8]; + bytes[0]=(byte)((i >> 56) & 0xFF); + bytes[1]=(byte)((i >> 48) & 0xFF); + bytes[2]=(byte)((i >> 40) & 0xFF); + bytes[3]=(byte)((i >> 32) & 0xFF); + bytes[4]=(byte)((i >> 24) & 0xFF); + bytes[5]=(byte)((i >> 16) & 0xFF); + bytes[6]=(byte)((i >> 8) & 0xFF); + bytes[7]=(byte)(i & 0xFF); + return bytes; + } + + /** + * @param bytes + */ + private static void flip(byte[] bytes) { + for(int i=0,j=bytes.length-1;i + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java index f6d802cf1b..e5e6a8ee37 100644 --- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/BaseHdfsInputFormat.java @@ -21,10 +21,15 @@ import com.dtstack.flinkx.inputformat.BaseRichInputFormat; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.FileSystemUtil; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.security.UserGroupInformation; +import java.io.File; import java.io.IOException; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -36,6 +41,8 @@ */ public abstract class BaseHdfsInputFormat extends BaseRichInputFormat { + private static final String PARTITION_SPLIT_CHAR = "="; + protected Map hadoopConfig; protected List metaColumns; @@ -61,14 +68,25 @@ public abstract class BaseHdfsInputFormat extends BaseRichInputFormat { protected Object value; - protected boolean isFileEmpty = false; - protected String filterRegex; + protected transient UserGroupInformation ugi; + + protected boolean openKerberos; + + protected String currentPartition; + + protected transient FileSystem fs; + @Override public void openInputFormat() throws IOException { super.openInputFormat(); conf = buildConfig(); + + openKerberos = FileSystemUtil.isOpenKerberos(hadoopConfig); + if (openKerberos) { + ugi = FileSystemUtil.getUGI(hadoopConfig, defaultFs); + } } protected JobConf buildConfig() { @@ -81,7 +99,7 @@ protected JobConf buildConfig() { @Override public boolean reachedEnd() throws IOException { - return isFileEmpty || !recordReader.next(key, value); + return !recordReader.next(key, value); } @Override @@ -91,4 +109,27 @@ public void closeInternal() throws IOException { } } + /** + * 从hdfs路径中获取当前分区信息 + * @param path hdfs路径 + */ + public void findCurrentPartition(Path path){ + Map map = new HashMap<>(16); + String pathStr = path.getParent().toString(); + int index; + while((index = pathStr.lastIndexOf(PARTITION_SPLIT_CHAR)) > 0){ + int i = pathStr.lastIndexOf(File.separator); + String name = pathStr.substring(i + 1, index); + String value = pathStr.substring(index + 1); + map.put(name, value); + pathStr = pathStr.substring(0, i); + } + + for (MetaColumn column : metaColumns) { + if(column.getPart()){ + column.setValue(map.get(column.getName())); + } + } + } + } diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java index a87d704310..6f48d4e404 100644 --- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormat.java @@ -22,21 +22,25 @@ import com.dtstack.flinkx.hdfs.HdfsUtil; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.FileSystemUtil; +import com.dtstack.flinkx.util.StringUtil; import org.apache.commons.lang.StringUtils; import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.hive.ql.io.orc.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; +import org.apache.hadoop.hive.ql.io.orc.OrcSplit; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; +import org.apache.hadoop.security.UserGroupInformation; + +import java.io.*; +import java.security.PrivilegedAction; import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; /** * The subclass of HdfsInputFormat which handles orc files @@ -46,99 +50,102 @@ */ public class HdfsOrcInputFormat extends BaseHdfsInputFormat { - private transient OrcSerde orcSerde; - private transient String[] fullColNames; - private transient String[] fullColTypes; - private transient StructObjectInspector inspector; private transient List fields; private static final String COMPLEX_FIELD_TYPE_SYMBOL_REGEX = ".*(<|>|\\{|}|[|]).*"; + private AtomicBoolean isInit = new AtomicBoolean(false); + @Override - public void openInputFormat() throws IOException{ + public void openInputFormat() throws IOException { super.openInputFormat(); + inputFormat = new OrcInputFormat(); + } + + @Override + public void openInternal(InputSplit inputSplit) throws IOException { + HdfsOrcInputSplit hdfsOrcInputSplit = (HdfsOrcInputSplit) inputSplit; + OrcSplit orcSplit = hdfsOrcInputSplit.getOrcSplit(); - FileSystem fs; try { - fs = FileSystemUtil.getFileSystem(hadoopConfig, defaultFs); + if (!isInit.get()) { + init(orcSplit.getPath()); + isInit.set(true); + } } catch (Exception e) { - throw new RuntimeException(e); + throw new IOException("初始化[inspector]出错", e); } - orcSerde = new OrcSerde(); - inputFormat = new OrcInputFormat(); - org.apache.hadoop.hive.ql.io.orc.Reader reader = null; - try { - OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf); - readerOptions.filesystem(fs); - - Path path = new Path(inputPath); - String typeStruct = null; - - if(fs.isDirectory(path)) { - RemoteIterator iterator = fs.listFiles(path, true); - while(iterator.hasNext()) { - FileStatus fileStatus = iterator.next(); - if(fileStatus.isFile() && fileStatus.getLen() > 49) { - Path subPath = fileStatus.getPath(); - reader = OrcFile.createReader(subPath, readerOptions); - typeStruct = reader.getObjectInspector().getTypeName(); - if(StringUtils.isNotEmpty(typeStruct)) { - break; - } + if (openKerberos) { + ugi.doAs(new PrivilegedAction() { + @Override + public Object run() { + try { + openOrcReader(inputSplit); + } catch (Exception e) { + throw new RuntimeException(e); } - } - if(reader == null) { - //throw new RuntimeException("orcfile dir is empty!"); - LOG.error("orc file {} is empty!", inputPath); - isFileEmpty = true; - return; + return null; } + }); + } else { + openOrcReader(inputSplit); + } + } - } else { - reader = OrcFile.createReader(path, readerOptions); - typeStruct = reader.getObjectInspector().getTypeName(); - } + private void openOrcReader(InputSplit inputSplit) throws IOException{ + numReadCounter = getRuntimeContext().getLongCounter("numRead"); + HdfsOrcInputSplit hdfsOrcInputSplit = (HdfsOrcInputSplit) inputSplit; + OrcSplit orcSplit = hdfsOrcInputSplit.getOrcSplit(); + recordReader = inputFormat.getRecordReader(orcSplit, conf, Reporter.NULL); + key = recordReader.createKey(); + value = recordReader.createValue(); + fields = inspector.getAllStructFieldRefs(); + } - if (StringUtils.isEmpty(typeStruct)) { - throw new RuntimeException("can't retrieve type struct from " + path); - } + private void init(Path path) throws Exception { + OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf); + readerOptions.filesystem(fs); + org.apache.hadoop.hive.ql.io.orc.Reader reader = OrcFile.createReader(path, readerOptions); + String typeStruct = reader.getObjectInspector().getTypeName(); - int startIndex = typeStruct.indexOf("<") + 1; - int endIndex = typeStruct.lastIndexOf(">"); - typeStruct = typeStruct.substring(startIndex, endIndex); + if (StringUtils.isEmpty(typeStruct)) { + throw new RuntimeException("can't retrieve type struct from " + path); + } - if(typeStruct.matches(COMPLEX_FIELD_TYPE_SYMBOL_REGEX)){ - throw new RuntimeException("Field types such as array, map, and struct are not supported."); - } + int startIndex = typeStruct.indexOf("<") + 1; + int endIndex = typeStruct.lastIndexOf(">"); + typeStruct = typeStruct.substring(startIndex, endIndex); - List cols = parseColumnAndType(typeStruct); + if(typeStruct.matches(COMPLEX_FIELD_TYPE_SYMBOL_REGEX)){ + throw new RuntimeException("Field types such as array, map, and struct are not supported."); + } - fullColNames = new String[cols.size()]; - fullColTypes = new String[cols.size()]; + List cols = parseColumnAndType(typeStruct); - for(int i = 0; i < cols.size(); ++i) { - String[] temp = cols.get(i).split(":"); - fullColNames[i] = temp[0]; - fullColTypes[i] = temp[1]; - } + fullColNames = new String[cols.size()]; + String[] fullColTypes = new String[cols.size()]; - Properties p = new Properties(); - p.setProperty("columns", StringUtils.join(fullColNames, ",")); - p.setProperty("columns.types", StringUtils.join(fullColTypes, ":")); - orcSerde.initialize(conf, p); + for(int i = 0; i < cols.size(); ++i) { + String[] temp = cols.get(i).split(":"); + fullColNames[i] = temp[0]; + fullColTypes[i] = temp[1]; + } - this.inspector = (StructObjectInspector) orcSerde.getObjectInspector(); + Properties p = new Properties(); + p.setProperty("columns", StringUtils.join(fullColNames, ",")); + p.setProperty("columns.types", StringUtils.join(fullColTypes, ":")); - } catch (Throwable e) { - throw new RuntimeException(e); - } + OrcSerde orcSerde = new OrcSerde(); + orcSerde.initialize(conf, p); + + this.inspector = (StructObjectInspector) orcSerde.getObjectInspector(); } private List parseColumnAndType(String typeStruct){ @@ -175,12 +182,25 @@ private List parseColumnAndType(String typeStruct){ @Override public HdfsOrcInputSplit[] createInputSplitsInternal(int minNumSplits) throws IOException { - try { - FileSystemUtil.getFileSystem(hadoopConfig, defaultFs); - } catch (Exception e) { - throw new IOException(e); + if (FileSystemUtil.isOpenKerberos(hadoopConfig)) { + UserGroupInformation ugi = FileSystemUtil.getUGI(hadoopConfig, defaultFs); + LOG.info("user:{}, ", ugi.getShortUserName()); + return ugi.doAs(new PrivilegedAction() { + @Override + public HdfsOrcInputSplit[] run() { + try { + return createOrcSplit(minNumSplits); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); + } else { + return createOrcSplit(minNumSplits); } + } + private HdfsOrcInputSplit[] createOrcSplit(int minNumSplits) throws IOException{ JobConf jobConf = FileSystemUtil.getJobConf(hadoopConfig, defaultFs); org.apache.hadoop.mapred.FileInputFormat.setInputPaths(jobConf, inputPath); org.apache.hadoop.mapred.FileInputFormat.setInputPathFilter(buildConfig(), HdfsPathFilter.class); @@ -204,24 +224,6 @@ public HdfsOrcInputSplit[] createInputSplitsInternal(int minNumSplits) throws IO return null; } - - @Override - public void openInternal(InputSplit inputSplit) throws IOException { - - if(isFileEmpty){ - return; - } - - numReadCounter = getRuntimeContext().getLongCounter("numRead"); - HdfsOrcInputSplit hdfsOrcInputSplit = (HdfsOrcInputSplit) inputSplit; - OrcSplit orcSplit = hdfsOrcInputSplit.getOrcSplit(); - recordReader = inputFormat.getRecordReader(orcSplit, conf, Reporter.NULL); - key = recordReader.createKey(); - value = recordReader.createValue(); - fields = inspector.getAllStructFieldRefs(); - } - - @Override public Row nextRecordInternal(Row row) throws IOException { if(metaColumns.size() == 1 && ConstantValue.STAR_SYMBOL.equals(metaColumns.get(0).getName())){ @@ -239,17 +241,17 @@ public Row nextRecordInternal(Row row) throws IOException { MetaColumn metaColumn = metaColumns.get(i); Object val = null; - if(metaColumn.getIndex() != -1){ + if(metaColumn.getValue() != null){ + val = metaColumn.getValue(); + }else if(metaColumn.getIndex() != -1){ val = inspector.getStructFieldData(value, fields.get(metaColumn.getIndex())); if (val == null && metaColumn.getValue() != null){ val = metaColumn.getValue(); } - } else if(metaColumn.getValue() != null){ - val = metaColumn.getValue(); } if(val instanceof String || val instanceof org.apache.hadoop.io.Text){ - val = HdfsUtil.string2col(String.valueOf(val),metaColumn.getType(),metaColumn.getTimeFormat()); + val = StringUtil.string2col(String.valueOf(val), metaColumn.getType(), metaColumn.getTimeFormat()); } else if(val != null){ val = HdfsUtil.getWritableValue(val); } @@ -291,5 +293,4 @@ public int getSplitNumber() { return splitNumber; } } - } diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java index 16fb276495..565c0a49b2 100644 --- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsParquetInputFormat.java @@ -20,9 +20,9 @@ import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.enums.ColumnType; -import com.dtstack.flinkx.hdfs.HdfsUtil; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.FileSystemUtil; +import com.dtstack.flinkx.util.StringUtil; import com.google.common.collect.Lists; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; @@ -43,6 +43,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.security.PrivilegedAction; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Iterator; @@ -75,6 +76,8 @@ public class HdfsParquetInputFormat extends BaseHdfsInputFormat { private static final long NANOS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1); + private static final int TIMESTAMP_BINARY_LENGTH = 12; + @Override protected void openInternal(InputSplit inputSplit) throws IOException { currentSplitFilePaths = ((HdfsParquetSplit)inputSplit).getPaths(); @@ -82,14 +85,33 @@ protected void openInternal(InputSplit inputSplit) throws IOException { private boolean nextLine() throws IOException{ if (currentFileReader == null && currentFileIndex <= currentSplitFilePaths.size()-1){ - nextFile(); + if (openKerberos) { + ugi.doAs(new PrivilegedAction() { + @Override + public Object run() { + try { + nextFile(); + return null; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); + } else { + nextFile(); + } } if (currentFileReader == null){ return false; } - currentLine = currentFileReader.read(); + if (openKerberos) { + currentLine = nextLineWithKerberos(); + } else { + currentLine = currentFileReader.read(); + } + if (fullColNames == null && currentLine != null){ fullColNames = new ArrayList<>(); fullColTypes = new ArrayList<>(); @@ -117,11 +139,24 @@ private boolean nextLine() throws IOException{ return currentLine != null; } + private Group nextLineWithKerberos() { + return ugi.doAs(new PrivilegedAction() { + @Override + public Group run() { + try { + return currentFileReader.read(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }); + } + private void nextFile() throws IOException{ - String path = currentSplitFilePaths.get(currentFileIndex); - ParquetReader.Builder reader = ParquetReader.builder(new GroupReadSupport(), new Path(path)).withConf(conf); + Path path = new Path(currentSplitFilePaths.get(currentFileIndex)); + findCurrentPartition(path); + ParquetReader.Builder reader = ParquetReader.builder(new GroupReadSupport(), path).withConf(conf); currentFileReader = reader.build(); - currentFileIndex++; } @@ -139,22 +174,20 @@ protected Row nextRecordInternal(Row row) throws IOException { MetaColumn metaColumn = metaColumns.get(i); Object val = null; - if(metaColumn.getIndex() != -1){ - if (metaColumn.getIndex() < currentLine.getType().getFieldCount()) { - if(currentLine.getFieldRepetitionCount(metaColumn.getIndex()) > 0){ - val = getData(currentLine,metaColumn.getType(),metaColumn.getIndex()); - } + if (metaColumn.getValue() != null){ + val = metaColumn.getValue(); + }else if(metaColumn.getIndex() != -1){ + if(currentLine.getFieldRepetitionCount(metaColumn.getIndex()) > 0){ + val = getData(currentLine,metaColumn.getType(),metaColumn.getIndex()); + } - if (val == null && metaColumn.getValue() != null){ - val = metaColumn.getValue(); - } + if (val == null && metaColumn.getValue() != null){ + val = metaColumn.getValue(); } - } else if (metaColumn.getValue() != null){ - val = metaColumn.getValue(); } if(val instanceof String){ - val = HdfsUtil.string2col(String.valueOf(val),metaColumn.getType(),metaColumn.getTimeFormat()); + val = StringUtil.string2col(String.valueOf(val), metaColumn.getType(), metaColumn.getTimeFormat()); } row.setField(i,val); @@ -169,7 +202,7 @@ public boolean reachedEnd() throws IOException { return !nextLine(); } - private Object getData(Group currentLine,String type,int index){ + public Object getData(Group currentLine,String type,int index){ Object data = null; ColumnType columnType = ColumnType.fromString(type); @@ -302,13 +335,12 @@ private static List getAllPartitionPath(String tableLocation, FileSystem private String getTypeName(String method){ String typeName; switch (method){ + case "getBoolean": case "getInteger" : typeName = "int";break; case "getInt96" : typeName = "bigint";break; case "getFloat" : typeName = "float";break; case "getDouble" : typeName = "double";break; case "getBinary" : typeName = "binary";break; - case "getString" : typeName = "string";break; - case "getBoolean" : typeName = "int";break; default:typeName = "string"; } @@ -319,11 +351,11 @@ private String getTypeName(String method){ * @param timestampBinary * @return */ - private long getTimestampMillis(Binary timestampBinary) - { - if (timestampBinary.length() != 12) { + private long getTimestampMillis(Binary timestampBinary) { + if (timestampBinary.length() != TIMESTAMP_BINARY_LENGTH) { return 0; } + byte[] bytes = timestampBinary.getBytes(); long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java index 5e3cea2a94..d4291741f0 100644 --- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java +++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsReader.java @@ -43,9 +43,9 @@ public class HdfsReader extends BaseDataReader { protected String fileType; protected String path; protected String fieldDelimiter; - private List metaColumns; + protected List metaColumns; protected Map hadoopConfig; - private String filterRegex; + protected String filterRegex; public HdfsReader(DataTransferConfig config, StreamExecutionEnvironment env) { super(config, env); diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java index 2ba2b1c363..f7d2dac2b7 100644 --- a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/com/dtstack/flinkx/hdfs/reader/HdfsTextInputFormat.java @@ -22,25 +22,25 @@ import com.dtstack.flinkx.hdfs.HdfsUtil; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.FileSystemUtil; -import jodd.util.StringUtil; +import com.dtstack.flinkx.util.StringUtil; import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hadoop.security.UserGroupInformation; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.charset.UnsupportedCharsetException; -import java.util.Map; +import java.security.PrivilegedAction; /** * The subclass of HdfsInputFormat which handles text files @@ -59,12 +59,25 @@ public void openInputFormat() throws IOException { @Override public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOException { - try { - FileSystemUtil.getFileSystem(hadoopConfig, defaultFs); - } catch (Exception e) { - throw new IOException(e); + if (FileSystemUtil.isOpenKerberos(hadoopConfig)) { + UserGroupInformation ugi = FileSystemUtil.getUGI(hadoopConfig, defaultFs); + LOG.info("user:{}, ", ugi.getShortUserName()); + return ugi.doAs(new PrivilegedAction() { + @Override + public InputSplit[] run() { + try { + return createTextSplit(minNumSplits); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); + } else { + return createTextSplit(minNumSplits); } + } + private InputSplit[] createTextSplit(int minNumSplits) throws IOException{ JobConf jobConf = buildConfig(); org.apache.hadoop.mapred.FileInputFormat.setInputPathFilter(jobConf, HdfsPathFilter.class); @@ -88,8 +101,30 @@ public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOExcepti @Override public void openInternal(InputSplit inputSplit) throws IOException { + + if(openKerberos){ + ugi.doAs(new PrivilegedAction() { + @Override + public Object run() { + try { + openHdfsTextReader(inputSplit); + } catch (Exception e) { + throw new RuntimeException(e); + } + + return null; + } + }); + }else{ + openHdfsTextReader(inputSplit); + } + + } + + private void openHdfsTextReader(InputSplit inputSplit) throws IOException{ HdfsTextInputSplit hdfsTextInputSplit = (HdfsTextInputSplit) inputSplit; org.apache.hadoop.mapred.InputSplit fileSplit = hdfsTextInputSplit.getTextSplit(); + findCurrentPartition(((FileSplit) fileSplit).getPath()); recordReader = inputFormat.getRecordReader(fileSplit, conf, Reporter.NULL); key = new LongWritable(); value = new Text(); @@ -98,7 +133,7 @@ public void openInternal(InputSplit inputSplit) throws IOException { @Override public Row nextRecordInternal(Row row) throws IOException { String line = new String(((Text)value).getBytes(), 0, ((Text)value).getLength(), charsetName); - String[] fields = line.split(delimiter); + String[] fields = StringUtils.splitPreserveAllTokens(line, delimiter); if (metaColumns.size() == 1 && ConstantValue.STAR_SYMBOL.equals(metaColumns.get(0).getName())){ row = new Row(fields.length); @@ -121,7 +156,7 @@ public Row nextRecordInternal(Row row) throws IOException { } if(value != null){ - value = HdfsUtil.string2col(String.valueOf(value),metaColumn.getType(),metaColumn.getTimeFormat()); + value = StringUtil.string2col(String.valueOf(value), metaColumn.getType(),metaColumn.getTimeFormat()); } row.setField(i, value); @@ -135,69 +170,7 @@ public Row nextRecordInternal(Row row) throws IOException { public boolean reachedEnd() throws IOException { key = new LongWritable(); value = new Text(); - return isFileEmpty || !recordReader.next(key, value); - } - - - public static class HdfsTextInputFormatBuilder { - - private HdfsTextInputFormat format; - - private HdfsTextInputFormatBuilder() { - format = new HdfsTextInputFormat(); - } - - public HdfsTextInputFormatBuilder setHadoopConfig(Map hadoopConfig) { - format.hadoopConfig = hadoopConfig; - return this; - } - - public HdfsTextInputFormatBuilder setInputPaths(String inputPaths) { - format.inputPath = inputPaths; - return this; - } - - public HdfsTextInputFormatBuilder setBytes(long bytes) { - format.bytes = bytes; - return this; - } - - public HdfsTextInputFormatBuilder setMonitorUrls(String monitorUrls) { - format.monitorUrls = monitorUrls; - return this; - } - - public HdfsTextInputFormatBuilder setDelimiter(String delimiter) { - if(delimiter == null) { - delimiter = "\\001"; - } - format.delimiter = delimiter; - return this; - } - - public HdfsTextInputFormatBuilder setDefaultFs(String defaultFs) { - format.defaultFs = defaultFs; - return this; - } - - public HdfsTextInputFormatBuilder setcharsetName (String charsetName) { - if(StringUtil.isNotEmpty(charsetName)) { - if(!Charset.isSupported(charsetName)) { - throw new UnsupportedCharsetException("The charset " + charsetName + " is not supported."); - } - this.format.charsetName = charsetName; - } - - return this; - } - - public HdfsTextInputFormat finish() { - return format; - } - } - - public static HdfsTextInputFormatBuilder buildHdfsTextInputFormat() { - return new HdfsTextInputFormatBuilder(); + return !recordReader.next(key, value); } static class HdfsTextInputSplit implements InputSplit { @@ -229,5 +202,4 @@ public int getSplitNumber() { return splitNumber; } } - } \ No newline at end of file diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java new file mode 100644 index 0000000000..a01ea427df --- /dev/null +++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -0,0 +1,1221 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.orc; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.io.*; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.StringUtils; + +import java.io.IOException; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +/** + * A MapReduce/Hive input format for ORC files. + *

+ * This class implements both the classic InputFormat, which stores the rows + * directly, and AcidInputFormat, which stores a series of events with the + * following schema: + *

+ *   class AcidEvent<ROW> {
+ *     enum ACTION {INSERT, UPDATE, DELETE}
+ *     ACTION operation;
+ *     long originalTransaction;
+ *     int bucket;
+ *     long rowId;
+ *     long currentTransaction;
+ *     ROW row;
+ *   }
+ * 
+ * Each AcidEvent object corresponds to an update event. The + * originalTransaction, bucket, and rowId are the unique identifier for the row. + * The operation and currentTransaction are the operation and the transaction + * that added this event. Insert and update events include the entire row, while + * delete events have null for row. + */ +public class OrcInputFormat implements InputFormat, + InputFormatChecker, VectorizedInputFormatInterface, + AcidInputFormat, + CombineHiveInputFormat.AvoidSplitCombination { + + static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); + static final String MIN_SPLIT_SIZE = + SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE"); + static final String MAX_SPLIT_SIZE = + SHIMS.getHadoopConfNames().get("MAPREDMAXSPLITSIZE"); + static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static final Log LOG = LogFactory.getLog(OrcInputFormat.class); + private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; + private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; + + private static final PerfLogger perfLogger = PerfLogger.getPerfLogger(); + private static final String CLASS_NAME = ReaderImpl.class.getName(); + + /** + * When picking the hosts for a split that crosses block boundaries, + * any drop any host that has fewer than MIN_INCLUDED_LOCATION of the + * number of bytes available on the host with the most. + * If host1 has 10MB of the split, host2 has 20MB, and host3 has 18MB the + * split will contain host2 (100% of host2) and host3 (90% of host2). Host1 + * with 50% will be dropped. + */ + private static final double MIN_INCLUDED_LOCATION = 0.80; + + /** + * Get the root column for the row. In ACID format files, it is offset by + * the extra metadata columns. + * @param isOriginal is the file in the original format? + * @return the column number for the root of row. + */ + private static int getRootColumn(boolean isOriginal) { + return isOriginal ? 0 : (OrcRecordUpdater.ROW + 1); + } + + public static RecordReader createReaderFromFile(Reader file, + Configuration conf, + long offset, long length + ) throws IOException { + Reader.Options options = new Reader.Options().range(offset, length); + boolean isOriginal = + !file.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME); + List types = file.getTypes(); + setIncludedColumns(options, types, conf, isOriginal); + setSearchArgument(options, types, conf, isOriginal); + return file.rowsOptions(options); + } + + /** + * Recurse down into a type subtree turning on all of the sub-columns. + * @param types the types of the file + * @param result the global view of columns that should be included + * @param typeId the root of tree to enable + * @param rootColumn the top column + */ + private static void includeColumnRecursive(List types, + boolean[] result, + int typeId, + int rootColumn) { + result[typeId - rootColumn] = true; + OrcProto.Type type = types.get(typeId); + int children = type.getSubtypesCount(); + for(int i=0; i < children; ++i) { + includeColumnRecursive(types, result, type.getSubtypes(i), rootColumn); + } + } + + /** + * Take the configuration and figure out which columns we need to include. + * @param options the options to update + * @param types the types for the file + * @param conf the configuration + * @param isOriginal is the file in the original format? + */ + static void setIncludedColumns(Reader.Options options, + List types, + Configuration conf, + boolean isOriginal) { + int rootColumn = getRootColumn(isOriginal); + if (!ColumnProjectionUtils.isReadAllColumns(conf)) { + int numColumns = types.size() - rootColumn; + boolean[] result = new boolean[numColumns]; + result[0] = true; + OrcProto.Type root = types.get(rootColumn); + List included = ColumnProjectionUtils.getReadColumnIDs(conf); + for(int i=0; i < root.getSubtypesCount(); ++i) { + if (included.contains(i)) { + includeColumnRecursive(types, result, root.getSubtypes(i), + rootColumn); + } + } + options.include(result); + } else { + options.include(null); + } + } + + static void setSearchArgument(Reader.Options options, + List types, + Configuration conf, + boolean isOriginal) { + int rootColumn = getRootColumn(isOriginal); + String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); + String sargPushdown = conf.get(SARG_PUSHDOWN); + String columnNamesString = + conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); + if ((sargPushdown == null && serializedPushdown == null) + || columnNamesString == null) { + LOG.debug("No ORC pushdown predicate"); + options.searchArgument(null, null); + } else { + SearchArgument sarg; + if (serializedPushdown != null) { + sarg = SearchArgumentFactory.create + (Utilities.deserializeExpression(serializedPushdown)); + } else { + sarg = SearchArgumentFactory.create(sargPushdown); + } + LOG.info("ORC pushdown predicate: " + sarg); + String[] neededColumnNames = columnNamesString.split(","); + String[] columnNames = new String[types.size() - rootColumn]; + boolean[] includedColumns = options.getInclude(); + int i = 0; + for(int columnId: types.get(rootColumn).getSubtypesList()) { + if (includedColumns == null || includedColumns[columnId - rootColumn]) { + // this is guaranteed to be positive because types only have children + // ids greater than their own id. + columnNames[columnId - rootColumn] = neededColumnNames[i++]; + } + } + options.searchArgument(sarg, columnNames); + } + } + + /** + * Get the list of input {@link Path}s for the map-reduce job. + * + * @param conf The configuration of the job + * @return the list of input {@link Path}s for the map-reduce job. + */ + static Path[] getInputPaths(Configuration conf) throws IOException { + String dirs = conf.get("mapred.input.dir"); + if (dirs == null) { + throw new IOException("Configuration mapred.input.dir is not defined."); + } + String [] list = StringUtils.split(dirs); + Path[] result = new Path[list.length]; + for (int i = 0; i < list.length; i++) { + result[i] = new Path(StringUtils.unEscapeString(list[i])); + } + return result; + } + + static List generateSplitsInfo(Configuration conf) + throws IOException { + // use threads to resolve directories into splits + Context context = new Context(conf); + for(Path dir: getInputPaths(conf)) { + FileSystem fs = dir.getFileSystem(conf); + context.schedule(new FileGenerator(context, fs, dir)); + } + context.waitForTasks(); + // deal with exceptions + if (!context.errors.isEmpty()) { + List errors = + new ArrayList(context.errors.size()); + for(Throwable th: context.errors) { + if (th instanceof IOException) { + errors.add((IOException) th); + } else { + throw new RuntimeException("serious problem", th); + } + } + throw new InvalidInputException(errors); + } + if (context.cacheStripeDetails) { + LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + + context.numFilesCounter.get()); + } + return context.splits; + } + + static Path findOriginalBucket(FileSystem fs, + Path directory, + int bucket) throws IOException { + for(FileStatus stat: fs.listStatus(directory)) { + String name = stat.getPath().getName(); + String numberPart = name.substring(0, name.indexOf('_')); + if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart) && + Integer.parseInt(numberPart) == bucket) { + return stat.getPath(); + } + } + throw new IllegalArgumentException("Can't find bucket " + bucket + " in " + + directory); + } + + @Override + public boolean shouldSkipCombine(Path path, + Configuration conf) throws IOException { + return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf); + } + + @Override + public boolean validateInput(FileSystem fs, HiveConf conf, + ArrayList files + ) throws IOException { + + if (Utilities.isVectorMode(conf)) { + return new VectorizedOrcInputFormat().validateInput(fs, conf, files); + } + + if (files.size() <= 0) { + return false; + } + for (FileStatus file : files) { + try { + OrcFile.createReader(file.getPath(), + OrcFile.readerOptions(conf).filesystem(fs)); + } catch (IOException e) { + return false; + } + } + return true; + } + + @Override + public InputSplit[] getSplits(JobConf job, + int numSplits) throws IOException { + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); + List result = generateSplitsInfo(job); + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); + return result.toArray(new InputSplit[result.size()]); + } + + @SuppressWarnings("unchecked") + private org.apache.hadoop.mapred.RecordReader + createVectorizedReader(InputSplit split, JobConf conf, Reporter reporter + ) throws IOException { + return (org.apache.hadoop.mapred.RecordReader) + new VectorizedOrcInputFormat().getRecordReader(split, conf, reporter); + } + + @Override + public org.apache.hadoop.mapred.RecordReader + getRecordReader(InputSplit inputSplit, JobConf conf, + Reporter reporter) throws IOException { + boolean vectorMode = Utilities.isVectorMode(conf); + + // if HiveCombineInputFormat gives us FileSplits instead of OrcSplits, + // we know it is not ACID. (see a check in CombineHiveInputFormat.getSplits() that assures this) + if (inputSplit.getClass() == FileSplit.class) { + if (vectorMode) { + return createVectorizedReader(inputSplit, conf, reporter); + } + return new OrcRecordReader(OrcFile.createReader( + ((FileSplit) inputSplit).getPath(), + OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit); + } + + OrcSplit split = (OrcSplit) inputSplit; + reporter.setStatus(inputSplit.toString()); + + Options options = new Options(conf).reporter(reporter); + final RowReader inner = getReader(inputSplit, options); + + + /*Even though there are no delta files, we still need to produce row ids so that an + * UPDATE or DELETE statement would work on a table which didn't have any previous updates*/ + if (split.isOriginal() && split.getDeltas().isEmpty()) { + if (vectorMode) { + return createVectorizedReader(inputSplit, conf, reporter); + } else { + return new NullKeyRecordReader(inner, conf); + } + } + + if (vectorMode) { + return (org.apache.hadoop.mapred.RecordReader) + new VectorizedOrcAcidRowReader(inner, conf, (FileSplit) inputSplit); + } + return new NullKeyRecordReader(inner, conf); + } + + @Override + public RowReader getReader(InputSplit inputSplit, + Options options) throws IOException { + final OrcSplit split = (OrcSplit) inputSplit; + final Path path = split.getPath(); + Path root; + if (split.hasBase()) { + if (split.isOriginal()) { + root = path.getParent(); + } else { + root = path.getParent().getParent(); + } + } else { + root = path; + } + final Path[] deltas = AcidUtils.deserializeDeltas(root, split.getDeltas()); + final Configuration conf = options.getConfiguration(); + final Reader reader; + final int bucket; + Reader.Options readOptions = new Reader.Options(); + readOptions.range(split.getStart(), split.getLength()); + if (split.hasBase()) { + bucket = AcidUtils.parseBaseBucketFilename(split.getPath(), conf) + .getBucket(); + reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); + final List types = reader.getTypes(); + setIncludedColumns(readOptions, types, conf, split.isOriginal()); + setSearchArgument(readOptions, types, conf, split.isOriginal()); + } else { + bucket = (int) split.getStart(); + reader = null; + } + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY, + Long.MAX_VALUE + ":"); + ValidTxnList validTxnList = new ValidReadTxnList(txnString); + final OrcRawRecordMerger records = + new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket, + validTxnList, readOptions, deltas); + return new RowReader() { + OrcStruct innerRecord = records.createValue(); + + @Override + public ObjectInspector getObjectInspector() { + return ((StructObjectInspector) records.getObjectInspector()) + .getAllStructFieldRefs().get(OrcRecordUpdater.ROW) + .getFieldObjectInspector(); + } + + @Override + public boolean next(RecordIdentifier recordIdentifier, + OrcStruct orcStruct) throws IOException { + boolean result; + // filter out the deleted records + do { + result = records.next(recordIdentifier, innerRecord); + } while (result && + OrcRecordUpdater.getOperation(innerRecord) == + OrcRecordUpdater.DELETE_OPERATION); + if (result) { + // swap the fields with the passed in orcStruct + orcStruct.linkFields(OrcRecordUpdater.getRow(innerRecord)); + } + return result; + } + + @Override + public RecordIdentifier createKey() { + return records.createKey(); + } + + @Override + public OrcStruct createValue() { + return new OrcStruct(records.getColumns()); + } + + @Override + public long getPos() throws IOException { + return records.getPos(); + } + + @Override + public void close() throws IOException { + records.close(); + } + + @Override + public float getProgress() throws IOException { + return records.getProgress(); + } + }; + } + + @Override + public RawReader getRawReader(Configuration conf, + boolean collapseEvents, + int bucket, + ValidTxnList validTxnList, + Path baseDirectory, + Path[] deltaDirectory + ) throws IOException { + Reader reader = null; + boolean isOriginal = false; + if (baseDirectory != null) { + Path bucketFile; + if (baseDirectory.getName().startsWith(AcidUtils.BASE_PREFIX)) { + bucketFile = AcidUtils.createBucketFile(baseDirectory, bucket); + } else { + isOriginal = true; + bucketFile = findOriginalBucket(baseDirectory.getFileSystem(conf), + baseDirectory, bucket); + } + reader = OrcFile.createReader(bucketFile, OrcFile.readerOptions(conf)); + } + return new OrcRawRecordMerger(conf, collapseEvents, reader, isOriginal, + bucket, validTxnList, new Reader.Options(), deltaDirectory); + } + + private static class OrcRecordReader + implements org.apache.hadoop.mapred.RecordReader, + StatsProvidingRecordReader { + private final RecordReader reader; + private final long offset; + private final long length; + private final int numColumns; + private final Reader file; + private final SerDeStats stats; + private float progress = 0.0f; + + + OrcRecordReader(Reader file, Configuration conf, + FileSplit split) throws IOException { + List types = file.getTypes(); + this.file = file; + numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); + this.offset = split.getStart(); + this.length = split.getLength(); + this.reader = createReaderFromFile(file, conf, offset, length); + this.stats = new SerDeStats(); + } + + @Override + public boolean next(NullWritable key, OrcStruct value) throws IOException { + if (reader.hasNext()) { + reader.next(value); + progress = reader.getProgress(); + return true; + } else { + return false; + } + } + + @Override + public NullWritable createKey() { + return NullWritable.get(); + } + + @Override + public OrcStruct createValue() { + return new OrcStruct(numColumns); + } + + @Override + public long getPos() throws IOException { + return offset + (long) (progress * length); + } + + @Override + public void close() throws IOException { + reader.close(); + } + + @Override + public float getProgress() throws IOException { + return progress; + } + + @Override + public SerDeStats getStats() { + stats.setRawDataSize(file.getRawDataSize()); + stats.setRowCount(file.getNumberOfRows()); + return stats; + } + } + + /** + * The global information about the split generation that we pass around to + * the different worker threads. + */ + static class Context { + private static Cache footerCache; + private final Configuration conf; + private final ExecutorService threadPool; + private final List splits = + new ArrayList(10000); + private final int numBuckets; + private final List errors = new ArrayList(); + private final long maxSize; + private final long minSize; + private final boolean footerInSplits; + private final boolean cacheStripeDetails; + private final AtomicInteger cacheHitCounter = new AtomicInteger(0); + private final AtomicInteger numFilesCounter = new AtomicInteger(0); + private Throwable fatalError = null; + private ValidTxnList transactionList; + + /** + * A count of the number of threads that may create more work for the + * thread pool. + */ + private int schedulers = 0; + + Context(Configuration conf) { + this.conf = conf; + minSize = conf.getLong(MIN_SPLIT_SIZE, DEFAULT_MIN_SPLIT_SIZE); + maxSize = conf.getLong(MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE); + footerInSplits = HiveConf.getBoolVar(conf, + ConfVars.HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS); + numBuckets = + Math.max(conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0), 0); + LOG.debug("Number of buckets specified by conf file is " + numBuckets); + int cacheStripeDetailsSize = HiveConf.getIntVar(conf, + ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE); + int numThreads = HiveConf.getIntVar(conf, + ConfVars.HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS); + + cacheStripeDetails = (cacheStripeDetailsSize > 0); + + threadPool = Executors.newFixedThreadPool(numThreads, + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("ORC_GET_SPLITS #%d").build()); + + synchronized (Context.class) { + if (footerCache == null && cacheStripeDetails) { + footerCache = CacheBuilder.newBuilder().concurrencyLevel(numThreads) + .initialCapacity(cacheStripeDetailsSize).softValues().build(); + } + } + String value = conf.get(ValidTxnList.VALID_TXNS_KEY, + Long.MAX_VALUE + ":"); + transactionList = new ValidReadTxnList(value); + } + + int getSchedulers() { + return schedulers; + } + + /** + * Get the Nth split. + * @param index if index >= 0, count from the front, otherwise count from + * the back. + * @return the Nth file split + */ + OrcSplit getResult(int index) { + if (index >= 0) { + return splits.get(index); + } else { + return splits.get(splits.size() + index); + } + } + + List getErrors() { + return errors; + } + + /** + * Add a unit of work. + * @param runnable the object to run + */ + synchronized void schedule(Runnable runnable) { + if (fatalError == null) { + if (runnable instanceof FileGenerator || + runnable instanceof SplitGenerator) { + schedulers += 1; + } + threadPool.execute(runnable); + } else { + throw new RuntimeException("serious problem", fatalError); + } + } + + /** + * Mark a worker that may generate more work as done. + */ + synchronized void decrementSchedulers() { + schedulers -= 1; + if (schedulers == 0) { + notify(); + } + } + + synchronized void notifyOnNonIOException(Throwable th) { + fatalError = th; + notify(); + } + + /** + * Wait until all of the tasks are done. It waits until all of the + * threads that may create more work are done and then shuts down the + * thread pool and waits for the final threads to finish. + */ + synchronized void waitForTasks() { + try { + while (schedulers != 0) { + wait(); + if (fatalError != null) { + threadPool.shutdownNow(); + throw new RuntimeException("serious problem", fatalError); + } + } + threadPool.shutdown(); + threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS); + } catch (InterruptedException ie) { + throw new IllegalStateException("interrupted", ie); + } + } + } + + /** + * Given a directory, get the list of files and blocks in those files. + * A thread is used for each directory. + */ + static final class FileGenerator implements Runnable { + private final Context context; + private final FileSystem fs; + private final Path dir; + + FileGenerator(Context context, FileSystem fs, Path dir) { + this.context = context; + this.fs = fs; + this.dir = dir; + } + + private void scheduleSplits(FileStatus file, + boolean isOriginal, + boolean hasBase, + List deltas) throws IOException{ + FileInfo info = null; + if (context.cacheStripeDetails) { + info = verifyCachedFileInfo(file); + } + new SplitGenerator(context, fs, file, info, isOriginal, deltas, + hasBase).schedule(); + } + + /** + * For each path, get the list of files and blocks that they consist of. + */ + @Override + public void run() { + try { + AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, + context.conf, context.transactionList); + List deltas = + AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories()); + Path base = dirInfo.getBaseDirectory(); + List original = dirInfo.getOriginalFiles(); + + boolean[] covered = new boolean[context.numBuckets]; + boolean isOriginal = base == null; + + // if we have a base to work from + if (base != null || !original.isEmpty()) { + + // find the base files (original or new style) + List children = original; + if (base != null) { + children = SHIMS.listLocatedStatus(fs, base, + AcidUtils.hiddenFileFilter); + } + + // for each child, schedule splits and mark off the bucket + for(FileStatus child: children) { + //update by tudou on 20200529, redmine = http://redmine.prod.dtstack.cn/issues/26286 + // make sure the file length on HDFS > 0(is not empty), otherwise an IndexOutOfBoundsException will be thrown in org.apache.hadoop.hive.ql.io.orc.ReaderImpl.extractMetaInfoFromFooter:362 + if(child.getLen() > 0){ + AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename + (child.getPath(), context.conf); + scheduleSplits(child, isOriginal, true, deltas); + int b = opts.getBucket(); + // If the bucket is in the valid range, mark it as covered. + // I wish Hive actually enforced bucketing all of the time. + if (b >= 0 && b < covered.length) { + covered[b] = true; + } + } + } + } + + // Generate a split for any buckets that weren't covered. + // This happens in the case where a bucket just has deltas and no + // base. + if (!deltas.isEmpty()) { + for (int b = 0; b < context.numBuckets; ++b) { + if (!covered[b]) { + synchronized (context.splits) { + context.splits.add(new OrcSplit(dir, b, 0, new String[0], null, + false, false, deltas)); + } + } + } + } + } catch (Throwable th) { + if (!(th instanceof IOException)) { + LOG.error("Unexpected Exception", th); + } + synchronized (context.errors) { + context.errors.add(th); + } + if (!(th instanceof IOException)) { + context.notifyOnNonIOException(th); + } + } finally { + context.decrementSchedulers(); + } + } + + private FileInfo verifyCachedFileInfo(FileStatus file) { + context.numFilesCounter.incrementAndGet(); + FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath()); + if (fileInfo != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Info cached for path: " + file.getPath()); + } + if (fileInfo.modificationTime == file.getModificationTime() && + fileInfo.size == file.getLen()) { + // Cached copy is valid + context.cacheHitCounter.incrementAndGet(); + return fileInfo; + } else { + // Invalidate + Context.footerCache.invalidate(file.getPath()); + if (LOG.isDebugEnabled()) { + LOG.debug("Meta-Info for : " + file.getPath() + + " changed. CachedModificationTime: " + + fileInfo.modificationTime + ", CurrentModificationTime: " + + file.getModificationTime() + + ", CachedLength: " + fileInfo.size + ", CurrentLength: " + + file.getLen()); + } + } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Info not cached for path: " + file.getPath()); + } + } + return null; + } + } + + /** + * Split the stripes of a given file into input splits. + * A thread is used for each file. + */ + static final class SplitGenerator implements Runnable { + private final Context context; + private final FileSystem fs; + private final FileStatus file; + private final long blockSize; + private final TreeMap locations; + private final FileInfo fileInfo; + private final boolean isOriginal; + private final List deltas; + private final boolean hasBase; + private List stripes; + private ReaderImpl.FileMetaInfo fileMetaInfo; + private Metadata metadata; + private List types; + private OrcFile.WriterVersion writerVersion; + + SplitGenerator(Context context, FileSystem fs, + FileStatus file, FileInfo fileInfo, + boolean isOriginal, + List deltas, + boolean hasBase) throws IOException { + this.context = context; + this.fs = fs; + this.file = file; + this.blockSize = file.getBlockSize(); + this.fileInfo = fileInfo; + locations = SHIMS.getLocationsWithOffset(fs, file); + this.isOriginal = isOriginal; + this.deltas = deltas; + this.hasBase = hasBase; + } + + /** + * Compute the number of bytes that overlap between the two ranges. + * @param offset1 start of range1 + * @param length1 length of range1 + * @param offset2 start of range2 + * @param length2 length of range2 + * @return the number of bytes in the overlap range + */ + static long getOverlap(long offset1, long length1, + long offset2, long length2) { + long end1 = offset1 + length1; + long end2 = offset2 + length2; + if (end2 <= offset1 || end1 <= offset2) { + return 0; + } else { + return Math.min(end1, end2) - Math.max(offset1, offset2); + } + } + + Path getPath() { + return file.getPath(); + } + + void schedule() throws IOException { + if(locations.size() == 1 && file.getLen() < context.maxSize) { + String[] hosts = locations.firstEntry().getValue().getHosts(); + synchronized (context.splits) { + context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(), + hosts, fileMetaInfo, isOriginal, hasBase, deltas)); + } + } else { + // if it requires a compute task + context.schedule(this); + } + } + + @Override + public String toString() { + return "splitter(" + file.getPath() + ")"; + } + + /** + * Create an input split over the given range of bytes. The location of the + * split is based on where the majority of the byte are coming from. ORC + * files are unlikely to have splits that cross between blocks because they + * are written with large block sizes. + * @param offset the start of the split + * @param length the length of the split + * @param fileMetaInfo file metadata from footer and postscript + * @throws IOException + */ + void createSplit(long offset, long length, + ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException { + String[] hosts; + Map.Entry startEntry = locations.floorEntry(offset); + BlockLocation start = startEntry.getValue(); + if (offset + length <= start.getOffset() + start.getLength()) { + // handle the single block case + hosts = start.getHosts(); + } else { + Map.Entry endEntry = locations.floorEntry(offset + length); + BlockLocation end = endEntry.getValue(); + //get the submap + NavigableMap navigableMap = locations.subMap(startEntry.getKey(), + true, endEntry.getKey(), true); + // Calculate the number of bytes in the split that are local to each + // host. + Map sizes = new HashMap(); + long maxSize = 0; + for (BlockLocation block : navigableMap.values()) { + long overlap = getOverlap(offset, length, block.getOffset(), + block.getLength()); + if (overlap > 0) { + for(String host: block.getHosts()) { + LongWritable val = sizes.get(host); + if (val == null) { + val = new LongWritable(); + sizes.put(host, val); + } + val.set(val.get() + overlap); + maxSize = Math.max(maxSize, val.get()); + } + } else { + throw new IOException("File " + file.getPath().toString() + + " should have had overlap on block starting at " + block.getOffset()); + } + } + // filter the list of locations to those that have at least 80% of the + // max + long threshold = (long) (maxSize * MIN_INCLUDED_LOCATION); + List hostList = new ArrayList(); + // build the locations in a predictable order to simplify testing + for(BlockLocation block: navigableMap.values()) { + for(String host: block.getHosts()) { + if (sizes.containsKey(host)) { + if (sizes.get(host).get() >= threshold) { + hostList.add(host); + } + sizes.remove(host); + } + } + } + hosts = new String[hostList.size()]; + hostList.toArray(hosts); + } + synchronized (context.splits) { + context.splits.add(new OrcSplit(file.getPath(), offset, length, + hosts, fileMetaInfo, isOriginal, hasBase, deltas)); + } + } + + /** + * Divide the adjacent stripes in the file into input splits based on the + * block size and the configured minimum and maximum sizes. + */ + @Override + public void run() { + try { + populateAndCacheStripeDetails(); + + // figure out which stripes we need to read + boolean[] includeStripe = null; + // we can't eliminate stripes if there are deltas because the + // deltas may change the rows making them match the predicate. + if (deltas.isEmpty()) { + Reader.Options options = new Reader.Options(); + setIncludedColumns(options, types, context.conf, isOriginal); + setSearchArgument(options, types, context.conf, isOriginal); + // only do split pruning if HIVE-8732 has been fixed in the writer + if (options.getSearchArgument() != null && + writerVersion != OrcFile.WriterVersion.ORIGINAL) { + SearchArgument sarg = options.getSearchArgument(); + List sargLeaves = sarg.getLeaves(); + List stripeStats = metadata.getStripeStatistics(); + int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves, + options.getColumnNames(), getRootColumn(isOriginal)); + + if (stripeStats != null) { + // eliminate stripes that doesn't satisfy the predicate condition + includeStripe = new boolean[stripes.size()]; + for(int i=0; i < stripes.size(); ++i) { + includeStripe[i] = (i >= stripeStats.size()) || + isStripeSatisfyPredicate(stripeStats.get(i), sarg, + filterColumns); + if (LOG.isDebugEnabled() && !includeStripe[i]) { + LOG.debug("Eliminating ORC stripe-" + i + " of file '" + + file.getPath() + "' as it did not satisfy " + + "predicate condition."); + } + } + } + } + } + + // if we didn't have predicate pushdown, read everything + if (includeStripe == null) { + includeStripe = new boolean[stripes.size()]; + Arrays.fill(includeStripe, true); + } + + long currentOffset = -1; + long currentLength = 0; + int idx = -1; + for(StripeInformation stripe: stripes) { + idx++; + + if (!includeStripe[idx]) { + // create split for the previous unfinished stripe + if (currentOffset != -1) { + createSplit(currentOffset, currentLength, fileMetaInfo); + currentOffset = -1; + } + continue; + } + + // if we are working on a stripe, over the min stripe size, and + // crossed a block boundary, cut the input split here. + if (currentOffset != -1 && currentLength > context.minSize && + (currentOffset / blockSize != stripe.getOffset() / blockSize)) { + createSplit(currentOffset, currentLength, fileMetaInfo); + currentOffset = -1; + } + // if we aren't building a split, start a new one. + if (currentOffset == -1) { + currentOffset = stripe.getOffset(); + currentLength = stripe.getLength(); + } else { + currentLength = + (stripe.getOffset() + stripe.getLength()) - currentOffset; + } + if (currentLength >= context.maxSize) { + createSplit(currentOffset, currentLength, fileMetaInfo); + currentOffset = -1; + } + } + if (currentOffset != -1) { + createSplit(currentOffset, currentLength, fileMetaInfo); + } + } catch (Throwable th) { + if (!(th instanceof IOException)) { + LOG.error("Unexpected Exception", th); + } + synchronized (context.errors) { + context.errors.add(th); + } + if (!(th instanceof IOException)) { + context.notifyOnNonIOException(th); + } + } finally { + context.decrementSchedulers(); + } + } + + private void populateAndCacheStripeDetails() { + try { + Reader orcReader; + if (fileInfo != null) { + stripes = fileInfo.stripeInfos; + fileMetaInfo = fileInfo.fileMetaInfo; + metadata = fileInfo.metadata; + types = fileInfo.types; + writerVersion = fileInfo.writerVersion; + // For multiple runs, in case sendSplitsInFooter changes + if (fileMetaInfo == null && context.footerInSplits) { + orcReader = OrcFile.createReader(file.getPath(), + OrcFile.readerOptions(context.conf).filesystem(fs)); + fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo(); + fileInfo.metadata = orcReader.getMetadata(); + fileInfo.types = orcReader.getTypes(); + fileInfo.writerVersion = orcReader.getWriterVersion(); + } + } else { + orcReader = OrcFile.createReader(file.getPath(), + OrcFile.readerOptions(context.conf).filesystem(fs)); + stripes = orcReader.getStripes(); + metadata = orcReader.getMetadata(); + types = orcReader.getTypes(); + writerVersion = orcReader.getWriterVersion(); + fileMetaInfo = context.footerInSplits ? + ((ReaderImpl) orcReader).getFileMetaInfo() : null; + if (context.cacheStripeDetails) { + // Populate into cache. + Context.footerCache.put(file.getPath(), + new FileInfo(file.getModificationTime(), file.getLen(), stripes, + metadata, types, fileMetaInfo, writerVersion)); + } + } + } catch (Throwable th) { + if (!(th instanceof IOException)) { + LOG.error("Unexpected Exception", th); + } + synchronized (context.errors) { + context.errors.add(th); + } + if (!(th instanceof IOException)) { + context.notifyOnNonIOException(th); + } + } + } + + private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics, + SearchArgument sarg, + int[] filterColumns) { + List predLeaves = sarg.getLeaves(); + TruthValue[] truthValues = new TruthValue[predLeaves.size()]; + for (int pred = 0; pred < truthValues.length; pred++) { + if (filterColumns[pred] != -1) { + + // column statistics at index 0 contains only the number of rows + ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; + truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred)); + } else { + + // parition column case. + // partition filter will be evaluated by partition pruner so + // we will not evaluate partition filter here. + truthValues[pred] = TruthValue.YES_NO_NULL; + } + } + return sarg.evaluate(truthValues).isNeeded(); + } + } + + /** + * FileInfo. + * + * Stores information relevant to split generation for an ORC File. + * + */ + private static class FileInfo { + long modificationTime; + long size; + List stripeInfos; + ReaderImpl.FileMetaInfo fileMetaInfo; + Metadata metadata; + List types; + private OrcFile.WriterVersion writerVersion; + + + FileInfo(long modificationTime, long size, + List stripeInfos, + Metadata metadata, List types, + ReaderImpl.FileMetaInfo fileMetaInfo, + OrcFile.WriterVersion writerVersion) { + this.modificationTime = modificationTime; + this.size = size; + this.stripeInfos = stripeInfos; + this.fileMetaInfo = fileMetaInfo; + this.metadata = metadata; + this.types = types; + this.writerVersion = writerVersion; + } + } + + /** + * Return a RecordReader that is compatible with the Hive 0.12 reader + * with NullWritable for the key instead of RecordIdentifier. + */ + public static final class NullKeyRecordReader implements AcidRecordReader { + private final RecordIdentifier id; + private final RowReader inner; + + private NullKeyRecordReader(RowReader inner, Configuration conf) { + this.inner = inner; + id = inner.createKey(); + } + + public RecordIdentifier getRecordIdentifier() { + return id; + } + + @Override + public boolean next(NullWritable nullWritable, + OrcStruct orcStruct) throws IOException { + return inner.next(id, orcStruct); + } + + @Override + public NullWritable createKey() { + return NullWritable.get(); + } + + @Override + public OrcStruct createValue() { + return inner.createValue(); + } + + @Override + public long getPos() throws IOException { + return inner.getPos(); + } + + @Override + public void close() throws IOException { + inner.close(); + } + + @Override + public float getProgress() throws IOException { + return inner.getProgress(); + } + } + + +} diff --git a/flinkx-hdfs/flinkx-hdfs-reader/src/main/test/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormatTest.java b/flinkx-hdfs/flinkx-hdfs-reader/src/main/test/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormatTest.java new file mode 100644 index 0000000000..6df5ceb002 --- /dev/null +++ b/flinkx-hdfs/flinkx-hdfs-reader/src/main/test/com/dtstack/flinkx/hdfs/reader/HdfsOrcInputFormatTest.java @@ -0,0 +1,22 @@ +package com.dtstack.flinkx.hdfs.reader; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +/** + * @author jiangbo + * @date 2020/3/16 + */ +public class HdfsOrcInputFormatTest { + + @Test + public void testParseColumnAndType() { + HdfsOrcInputFormat hdfsOrcInputFormat = new HdfsOrcInputFormat(); + + String struct = "int,float(10,2),char(12)"; + List result = hdfsOrcInputFormat.parseColumnAndType(struct); + Assert.assertEquals(result.size(), 3); + } +} \ No newline at end of file diff --git a/flinkx-hdfs/flinkx-hdfs-writer/pom.xml b/flinkx-hdfs/flinkx-hdfs-writer/pom.xml index 89f059b99d..ff3e37a7d6 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/pom.xml +++ b/flinkx-hdfs/flinkx-hdfs-writer/pom.xml @@ -98,6 +98,16 @@ under the License. + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java index 38ff34f667..f611487af5 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/BaseHdfsOutputFormat.java @@ -43,6 +43,8 @@ */ public abstract class BaseHdfsOutputFormat extends BaseFileOutputFormat { + private static final int FILE_NAME_PART_SIZE = 3; + protected int rowGroupSize; protected FileSystem fs; @@ -66,14 +68,29 @@ public abstract class BaseHdfsOutputFormat extends BaseFileOutputFormat { protected Configuration conf; + protected boolean enableDictionary; + protected transient Map decimalColInfo; @Override protected void openInternal(int taskNumber, int numTasks) throws IOException { + // 这里休眠一段时间是为了避免reader和writer或者多个任务在同一个taskmanager里同时认证kerberos + if (FileSystemUtil.isOpenKerberos(hadoopConfig)) { + sleepRandomTime(); + } + initColIndices(); super.openInternal(taskNumber, numTasks); } + private void sleepRandomTime() { + try { + Thread.sleep(5000L + (long)(10000 * Math.random())); + } catch (Exception exception) { + LOG.warn("", exception); + } + } + @Override protected void checkOutputDir() { try{ @@ -120,7 +137,7 @@ protected void waitForActionFinishedBeforeWrite() { n++; } } catch (Exception e){ - + LOG.warn("Call method waitForActionFinishedBeforeWrite error", e); } } @@ -142,7 +159,7 @@ public boolean accept(Path path) { } String[] splits = fileName.split("\\."); - if (splits.length == 3) { + if (splits.length == FILE_NAME_PART_SIZE) { return Integer.parseInt(splits[2]) > fileIndex; } @@ -308,4 +325,8 @@ protected void moveAllTemporaryDataFileToDirectory() throws IOException { } } + @Override + protected void writeMultipleRecordsInternal() throws Exception { + notSupportBatchWrite("HdfsWriter"); + } } diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java index 90f84054d5..a112570a66 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOrcOutputFormat.java @@ -34,7 +34,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.compress.*; +import org.apache.hadoop.io.compress.BZip2Codec; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.compress.Lz4Codec; +import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordWriter; @@ -213,17 +217,7 @@ private void getData(List recordList, int index, Row row) throws WriteRe recordList.add(Integer.valueOf(rowData)); break; case BIGINT: - if (column instanceof Timestamp){ - column=((Timestamp) column).getTime(); - recordList.add(column); - break; - } - BigInteger data = new BigInteger(rowData); - if (data.compareTo(new BigInteger(String.valueOf(Long.MAX_VALUE))) > 0){ - recordList.add(data); - } else { - recordList.add(Long.valueOf(rowData)); - } + recordList.add(getBigint(column, rowData)); break; case FLOAT: recordList.add(Float.valueOf(rowData)); @@ -232,16 +226,7 @@ private void getData(List recordList, int index, Row row) throws WriteRe recordList.add(Double.valueOf(rowData)); break; case DECIMAL: - ColumnTypeUtil.DecimalInfo decimalInfo = decimalColInfo.get(fullColumnNames.get(index)); - HiveDecimal hiveDecimal = HiveDecimal.create(new BigDecimal(rowData)); - hiveDecimal = HiveDecimal.enforcePrecisionScale(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale()); - if(hiveDecimal == null){ - String msg = String.format("第[%s]个数据数据[%s]precision和scale和元数据不匹配:decimal(%s, %s)", index, decimalInfo.getPrecision(), decimalInfo.getScale(), rowData); - throw new WriteRecordException(msg, new IllegalArgumentException()); - } - - HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(hiveDecimal); - recordList.add(hiveDecimalWritable); + recordList.add(getDecimalWritable(index, rowData)); break; case STRING: case VARCHAR: @@ -270,6 +255,32 @@ private void getData(List recordList, int index, Row row) throws WriteRe } } + private Object getBigint(Object column, String rowData) { + if (column instanceof Timestamp){ + column = ((Timestamp) column).getTime(); + return column; + } + + BigInteger data = new BigInteger(rowData); + if (data.compareTo(new BigInteger(String.valueOf(Long.MAX_VALUE))) > 0){ + return data; + } else { + return Long.valueOf(rowData); + } + } + + private HiveDecimalWritable getDecimalWritable(int index, String rowData) throws WriteRecordException { + ColumnTypeUtil.DecimalInfo decimalInfo = decimalColInfo.get(fullColumnNames.get(index)); + HiveDecimal hiveDecimal = HiveDecimal.create(new BigDecimal(rowData)); + hiveDecimal = HiveDecimal.enforcePrecisionScale(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale()); + if(hiveDecimal == null){ + String msg = String.format("第[%s]个数据数据[%s]precision和scale和元数据不匹配:decimal(%s, %s)", index, decimalInfo.getPrecision(), decimalInfo.getScale(), rowData); + throw new WriteRecordException(msg, new IllegalArgumentException()); + } + + return new HiveDecimalWritable(hiveDecimal); + } + @Override protected String recordConvertDetailErrorMessage(int pos, Row row) { return "\nHdfsOrcOutputFormat [" + jobName + "] writeRecord error: when converting field[" + fullColumnNames.get(pos) + "] in Row(" + row + ")"; diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java index ec30366e16..0d8b40b951 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsOutputFormatBuilder.java @@ -83,6 +83,10 @@ public void setDefaultFs(String defaultFs) { format.defaultFs = defaultFs; } + public void setEnableDictionary(boolean enableDictionary) { + format.enableDictionary = enableDictionary; + } + @Override protected void checkFormat() { super.checkFormat(); diff --git a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java index 7c4cd0e1fa..2dcb955c74 100644 --- a/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java +++ b/flinkx-hdfs/flinkx-hdfs-writer/src/main/java/com/dtstack/flinkx/hdfs/writer/HdfsParquetOutputFormat.java @@ -21,13 +21,13 @@ import com.dtstack.flinkx.enums.ColumnType; import com.dtstack.flinkx.exception.WriteRecordException; import com.dtstack.flinkx.hdfs.ECompressType; +import com.dtstack.flinkx.hdfs.HdfsUtil; import com.dtstack.flinkx.util.ColumnTypeUtil; import com.dtstack.flinkx.util.DateUtil; import org.apache.commons.lang.StringUtils; import org.apache.flink.types.Row; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.example.data.Group; @@ -38,12 +38,16 @@ import org.apache.parquet.hadoop.example.GroupWriteSupport; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.io.api.Binary; -import org.apache.parquet.schema.*; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Types; import java.io.IOException; import java.math.BigDecimal; import java.sql.Timestamp; -import java.util.*; +import java.util.Date; +import java.util.HashMap; /** * The subclass of HdfsOutputFormat writing parquet files @@ -59,22 +63,8 @@ public class HdfsParquetOutputFormat extends BaseHdfsOutputFormat { private MessageType schema; - private static Calendar cal = Calendar.getInstance(); - - private static final long NANO_SECONDS_PER_DAY = 86400_000_000_000L; - - private static final long JULIAN_EPOCH_OFFSET_DAYS = 2440588; - private static ColumnTypeUtil.DecimalInfo PARQUET_DEFAULT_DECIMAL_INFO = new ColumnTypeUtil.DecimalInfo(10, 0); - static { - try { - cal.setTime(DateUtil.getDateFormatter().parse("1970-01-01")); - } catch (Exception e){ - throw new RuntimeException("Init calendar fail:",e); - } - } - @Override protected void openSource() throws IOException{ super.openSource(); @@ -101,6 +91,7 @@ protected void nextBlock(){ .withCompressionCodec(getCompressType()) .withConf(conf) .withType(schema) + .withDictionaryEncoding(enableDictionary) .withRowGroupSize(rowGroupSize); writer = builder.build(); @@ -162,7 +153,7 @@ public void writeSingleRecordToFile(Row row) throws WriteRecordException { try { for (; i < fullColumnNames.size(); i++) { Object valObj = row.getField(colIndices[i]); - if(valObj == null){ + if(valObj == null || valObj.toString().length() == 0){ continue; } @@ -233,7 +224,7 @@ private void addDataToGroup(Group group, Object valObj, int i) throws Exception{ case "boolean" : group.add(colName,Boolean.parseBoolean(val));break; case "timestamp" : Timestamp ts = DateUtil.columnToTimestamp(valObj,null); - byte[] dst = longToByteArray(ts.getTime()); + byte[] dst = HdfsUtil.longToByteArray(ts.getTime()); group.add(colName, Binary.fromConstantByteArray(dst)); break; case "decimal" : @@ -246,7 +237,7 @@ private void addDataToGroup(Group group, Object valObj, int i) throws Exception{ throw new WriteRecordException(msg, new IllegalArgumentException()); } - group.add(colName,decimalToBinary(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale())); + group.add(colName, HdfsUtil.decimalToBinary(hiveDecimal, decimalInfo.getPrecision(), decimalInfo.getScale())); break; case "date" : Date date = DateUtil.columnToDate(valObj,null); @@ -256,29 +247,6 @@ private void addDataToGroup(Group group, Object valObj, int i) throws Exception{ } } - private Binary decimalToBinary(final HiveDecimal hiveDecimal, int prec,int scale) { - byte[] decimalBytes = hiveDecimal.setScale(scale).unscaledValue().toByteArray(); - - // Estimated number of bytes needed. - int precToBytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; - if (precToBytes == decimalBytes.length) { - // No padding needed. - return Binary.fromReusedByteArray(decimalBytes); - } - - byte[] tgt = new byte[precToBytes]; - if (hiveDecimal.signum() == -1) { - // For negative number, initializing bits to 1 - for (int i = 0; i < precToBytes; i++) { - tgt[i] |= 0xFF; - } - } - - // Padding leading zeroes/ones. - System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length); - return Binary.fromReusedByteArray(tgt); - } - @Override protected String recordConvertDetailErrorMessage(int pos, Row row) { return "\nHdfsParquetOutputFormat [" + jobName + "] writeRecord error: when converting field[" + fullColumnNames.get(pos) + "] in Row(" + row + ")"; @@ -292,6 +260,7 @@ protected void closeSource() throws IOException { } private MessageType buildSchema(){ + decimalColInfo = new HashMap<>(16); Types.MessageTypeBuilder typeBuilder = Types.buildMessage(); for (int i = 0; i < fullColumnNames.size(); i++) { String name = fullColumnNames.get(i); @@ -317,10 +286,10 @@ private MessageType buildSchema(){ .as(OriginalType.DECIMAL) .precision(decimalInfo.getPrecision()) .scale(decimalInfo.getScale()) - .length(computeMinBytesForPrecision(decimalInfo.getPrecision())) + .length(HdfsUtil.computeMinBytesForPrecision(decimalInfo.getPrecision())) .named(name); - decimalColInfo = Collections.singletonMap(name, decimalInfo); + decimalColInfo.put(name, decimalInfo); } else { typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).named(name); } @@ -330,55 +299,4 @@ private MessageType buildSchema(){ return typeBuilder.named("Pair"); } - - private int computeMinBytesForPrecision(int precision){ - int numBytes = 1; - while (Math.pow(2.0, 8 * numBytes - 1) < Math.pow(10.0, precision)) { - numBytes += 1; - } - return numBytes; - } - - private static byte[] longToByteArray(long data){ - long nano = data * 1000_000; - - int julianDays = (int) ((nano / NANO_SECONDS_PER_DAY) + JULIAN_EPOCH_OFFSET_DAYS); - byte[] julianDaysBytes = getBytes(julianDays); - flip(julianDaysBytes); - - long lastDayNanos = nano % NANO_SECONDS_PER_DAY; - byte[] lastDayNanosBytes = getBytes(lastDayNanos); - flip(lastDayNanosBytes); - - byte[] dst = new byte[12]; - - System.arraycopy(lastDayNanosBytes, 0, dst, 0, 8); - System.arraycopy(julianDaysBytes, 0, dst, 8, 4); - - return dst; - } - - private static byte[] getBytes(long i) { - byte[] bytes=new byte[8]; - bytes[0]=(byte)((i >> 56) & 0xFF); - bytes[1]=(byte)((i >> 48) & 0xFF); - bytes[2]=(byte)((i >> 40) & 0xFF); - bytes[3]=(byte)((i >> 32) & 0xFF); - bytes[4]=(byte)((i >> 24) & 0xFF); - bytes[5]=(byte)((i >> 16) & 0xFF); - bytes[6]=(byte)((i >> 8) & 0xFF); - bytes[7]=(byte)(i & 0xFF); - return bytes; - } - - /** - * @param bytes - */ - private static void flip(byte[] bytes) { - for(int i=0,j=bytes.length-1;i writeData(DataStream dataSet) { builder.setRestoreConfig(restoreConfig); builder.setMaxFileSize(maxFileSize); builder.setFlushBlockInterval(flushInterval); + builder.setEnableDictionary(enableDictionary); return createOutput(dataSet, builder.finish()); } diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java index 0de6b79e84..33917f375d 100644 --- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java +++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/TimePartitionFormat.java @@ -29,9 +29,9 @@ public class TimePartitionFormat { - private static final long CONSTANT_TWO_DAY_TIME = 1000 * 60 * 60 * 24 * 2; - private static final long CONSTANT_TWO_HOUR_TIME = 1000 * 60 * 60 * 2; - private static final long CONSTANT_TWO_MINUTE_TIME = 1000 * 60 * 2; + private static final long CONSTANT_TWO_DAY_TIME = 1000 * 60 * 60 * 24 * 2L; + private static final long CONSTANT_TWO_HOUR_TIME = 1000 * 60 * 60 * 2L; + private static final long CONSTANT_TWO_MINUTE_TIME = 1000 * 60 * 2L; private static PartitionEnum partitionEnum; diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java index 21cb769d04..10d49b992c 100644 --- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java +++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/AbstractHiveMetadataParser.java @@ -25,7 +25,9 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.hive.EStoreType.*; +import static com.dtstack.flinkx.hive.EStoreType.ORC; +import static com.dtstack.flinkx.hive.EStoreType.PARQUET; +import static com.dtstack.flinkx.hive.EStoreType.TEXT; /** * @author jiangbo diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java index 2c36ff3c74..4cb1fc6d09 100644 --- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java +++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveDbUtil.java @@ -21,6 +21,7 @@ import com.dtstack.flinkx.authenticate.KerberosUtil; import com.dtstack.flinkx.util.ExceptionUtil; import com.dtstack.flinkx.util.FileSystemUtil; +import com.dtstack.flinkx.util.RetryUtil; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.collections.MapUtils; @@ -55,6 +56,8 @@ public final class HiveDbUtil { public static final String SQLSTATE_CANNOT_ACQUIRE_CONNECT = "08004"; + public static final int JDBC_PART_SIZE = 2; + public static final String JDBC_REGEX = "[\\?|;|#]"; public static final String KEY_VAL_DELIMITER = "="; public static final String PARAM_DELIMITER = "&"; @@ -103,7 +106,7 @@ private static Connection getConnectionWithKerberos(ConnectionInfo connectionInf String keytabFileName = KerberosUtil.getPrincipalFileName(connectionInfo.getHiveConf()); keytabFileName = KerberosUtil.loadFile(connectionInfo.getHiveConf(), keytabFileName); - String principal = KerberosUtil.findPrincipalFromKeytab(keytabFileName); + String principal = KerberosUtil.getPrincipal(connectionInfo.getHiveConf(), keytabFileName); KerberosUtil.loadKrb5Conf(connectionInfo.getHiveConf()); Configuration conf = FileSystemUtil.getConfiguration(connectionInfo.getHiveConf(), null); @@ -126,7 +129,7 @@ public Connection run(){ private static boolean openKerberos(final String jdbcUrl){ String[] splits = jdbcUrl.split(JDBC_REGEX); - if (splits.length != 2) { + if (splits.length != JDBC_PART_SIZE) { return false; } @@ -232,10 +235,10 @@ private static Connection getHiveConnection(String url, Properties prop) throws url = String.format("jdbc:hive2://%s:%s/%s", host, port, param); Connection connection = DriverManager.getConnection(url, prop); if (StringUtils.isNotEmpty(db)) { - try { - connection.createStatement().execute("use " + db); + try (Statement statement = connection.createStatement()) { + statement.execute("use " + db); } catch (SQLException e) { - if (connection != null) { + if (null != connection) { connection.close(); } diff --git a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java index 7e0c0193fa..75209ebdfa 100644 --- a/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java +++ b/flinkx-hive/flinkx-hive-core/src/main/java/com/dtstack/flinkx/hive/util/HiveUtil.java @@ -28,7 +28,8 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.hive.EStoreType.*; +import static com.dtstack.flinkx.hive.EStoreType.ORC; +import static com.dtstack.flinkx.hive.EStoreType.TEXT; /** * @author toutian @@ -52,26 +53,27 @@ public class HiveUtil { private HiveDbUtil.ConnectionInfo connectionInfo; - enum HiveReleaseVersion{ - /** - * apache hive 1.x - */ - APACHE_1, - - /** - * apache hive 2.x - */ - APACHE_2, - - /** - * cdh hive 1.x - */ - CDH_1, - - /** - * cdh hive 2.x - */ - CDH_2 + public static String getHiveColumnType(String originType) { + originType = originType.trim(); + int indexOfBrackets = originType.indexOf(LEFT_BRACKETS); + if (indexOfBrackets > -1) { + String params = originType.substring(indexOfBrackets); + int index = params.indexOf(","); + int right = Integer.parseInt(params.substring(index+1, params.length()-1).trim()); + if(right == 0){ + int left = Integer.parseInt(params.substring(1, index).trim()); + if(left <= 4){ + return "SMALLINT"; + }else if(left <= 9){ + return "INT"; + }else if(left <= 18){ + return "BIGINT"; + } + } + return "DECIMAL" + params; + } else { + return convertType(originType); + } } public HiveUtil() { @@ -176,70 +178,6 @@ private AbstractHiveMetadataParser getMetadataParser(HiveReleaseVersion hiveVers } } - public HiveReleaseVersion getHiveVersion(Connection connection){ - HiveReleaseVersion version = HiveReleaseVersion.APACHE_2; - try { - ResultSet resultSet = connection.createStatement().executeQuery("select version()"); - if (resultSet.next()) { - String versionMsg = resultSet.getString(1); - if (versionMsg.contains("cdh")){ - // 结果示例:2.1.1-cdh6.3.1 re8d55f408b4f9aa2648bc9e34a8f802d53d6aab3 - if (versionMsg.startsWith("2")) { - version = HiveReleaseVersion.CDH_2; - } else if(versionMsg.startsWith("1")){ - version = HiveReleaseVersion.CDH_1; - } - } else { - // FIXME spark thrift server不支持 version()函数,所以使用默认的版本 - } - } - } catch (Exception ignore) { - } - - return version; - } - - public static String getCreateTableHql(TableInfo tableInfo) { - //不要使用create table if not exist,可能以后会在业务逻辑中判断表是否已经存在 - StringBuilder fieldsb = new StringBuilder("CREATE TABLE %s ("); - for (int i = 0; i < tableInfo.getColumns().size(); i++) { - fieldsb.append(String.format("`%s` %s", tableInfo.getColumns().get(i), tableInfo.getColumnTypes().get(i))); - if (i != tableInfo.getColumns().size() - 1) { - fieldsb.append(","); - } - } - fieldsb.append(") "); - if (!tableInfo.getPartitions().isEmpty()) { - fieldsb.append(" PARTITIONED BY ("); - for (String partitionField : tableInfo.getPartitions()) { - fieldsb.append(String.format("`%s` string", partitionField)); - } - fieldsb.append(") "); - } - if (TEXT.name().equalsIgnoreCase(tableInfo.getStore())) { - fieldsb.append(" ROW FORMAT DELIMITED FIELDS TERMINATED BY '"); - fieldsb.append(tableInfo.getDelimiter()); - fieldsb.append("' LINES TERMINATED BY '\\n' STORED AS TEXTFILE "); - } else if(ORC.name().equalsIgnoreCase(tableInfo.getStore())) { - fieldsb.append(" STORED AS ORC "); - }else{ - fieldsb.append(" STORED AS PARQUET "); - } - return fieldsb.toString(); - } - - public static String getHiveColumnType(String originType) { - originType = originType.trim(); - int indexOfBrackets = originType.indexOf(LEFT_BRACKETS); - if (indexOfBrackets > -1) { - String type = originType.substring(0, indexOfBrackets); - String params = originType.substring(indexOfBrackets); - return convertType(type) + params; - } else { - return convertType(originType); - } - } - private static String convertType(String type) { switch (type.toUpperCase()) { case "BIT": @@ -258,6 +196,8 @@ private static String convertType(String type) { case "INT8": type = "INT"; break; + case "NUMERIC": + case "NUMBER": case "BIGINT": type = "BIGINT"; break; @@ -272,8 +212,6 @@ private static String convertType(String type) { case "BINARY_DOUBLE": type = "DOUBLE"; break; - case "NUMERIC": - case "NUMBER": case "DECIMAL": type = "DECIMAL"; break; @@ -310,4 +248,102 @@ private static String convertType(String type) { } return type; } + + public static String getCreateTableHql(TableInfo tableInfo) { + //不要使用create table if not exist,可能以后会在业务逻辑中判断表是否已经存在 + StringBuilder fieldsb = new StringBuilder("CREATE TABLE %s ("); + for (int i = 0; i < tableInfo.getColumns().size(); i++) { + fieldsb.append(String.format("`%s` %s", tableInfo.getColumns().get(i), tableInfo.getColumnTypes().get(i))); + if (i != tableInfo.getColumns().size() - 1) { + fieldsb.append(","); + } + } + fieldsb.append(") "); + if (!tableInfo.getPartitions().isEmpty()) { + fieldsb.append(" PARTITIONED BY ("); + for (String partitionField : tableInfo.getPartitions()) { + fieldsb.append(String.format("`%s` string", partitionField)); + } + fieldsb.append(") "); + } + if (TEXT.name().equalsIgnoreCase(tableInfo.getStore())) { + fieldsb.append(" ROW FORMAT DELIMITED FIELDS TERMINATED BY '"); + fieldsb.append(tableInfo.getDelimiter()); + fieldsb.append("' LINES TERMINATED BY '\\n' STORED AS TEXTFILE "); + } else if(ORC.name().equalsIgnoreCase(tableInfo.getStore())) { + fieldsb.append(" STORED AS ORC "); + }else{ + fieldsb.append(" STORED AS PARQUET "); + } + return fieldsb.toString(); + } + + public HiveReleaseVersion getHiveVersion(Connection connection){ + HiveReleaseVersion version = HiveReleaseVersion.APACHE_2; + try (ResultSet resultSet = connection.createStatement().executeQuery("select version()")) { + if (resultSet.next()) { + String versionMsg = resultSet.getString(1); + if (versionMsg.contains(HiveReleaseVersion.CDH_1.getName())){ + // 结果示例:2.1.1-cdh6.3.1 re8d55f408b4f9aa2648bc9e34a8f802d53d6aab3 + if (versionMsg.startsWith(HiveReleaseVersion.CDH_2.getVersion())) { + version = HiveReleaseVersion.CDH_2; + } else if(versionMsg.startsWith(HiveReleaseVersion.CDH_1.getVersion())){ + version = HiveReleaseVersion.CDH_1; + } + } else { + //spark thrift server不支持 version()函数,所以使用默认的版本 + } + } + } catch (Exception ignore) { + } + + return version; + } + + enum HiveReleaseVersion{ + /** + * apache hive 1.x + */ + APACHE_1("apache", "1"), + + /** + * apache hive 2.x + */ + APACHE_2("apache", "2"), + + /** + * cdh hive 1.x + */ + CDH_1("cdh", "1"), + + /** + * cdh hive 2.x + */ + CDH_2("cdh", "2"); + + private String name; + + private String version; + + HiveReleaseVersion(String name, String version) { + this.name = name; + this.version = version; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + } } diff --git a/flinkx-hive/flinkx-hive-core/src/test/java/com/dtstack/flinkx/hive/test/DBUtilTest.java b/flinkx-hive/flinkx-hive-core/src/test/java/com/dtstack/flinkx/hive/test/DBUtilTest.java deleted file mode 100644 index 0b7c2e533d..0000000000 --- a/flinkx-hive/flinkx-hive-core/src/test/java/com/dtstack/flinkx/hive/test/DBUtilTest.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package com.dtstack.flinkx.hive.test; - -import com.dtstack.flinkx.hive.util.HiveDbUtil; - -import java.sql.Connection; -import java.sql.ResultSet; -import java.util.HashMap; -import java.util.Map; - -/** - * @author jiangbo - * @date 2019/8/29 - */ -public class DBUtilTest { - - public static void main(String[] args) throws Exception{ - Map sftpConf = new HashMap<>(); - sftpConf.put("host", "172.16.10.79"); - sftpConf.put("port", "22"); - sftpConf.put("username", "root"); - sftpConf.put("password", "abc123"); - - Map hiveConf = new HashMap<>(); - hiveConf.put("hive.server2.authentication.kerberos.principal", "hive/cdh02@HADOOP.COM"); - hiveConf.put("hive.server2.authentication.kerberos.keytab", "D:\\cdh_cluster\\hive.keytab"); - hiveConf.put("java.security.krb5.conf", "D:\\cdh_cluster\\krb5.conf"); - hiveConf.put("useLocalFile", "true"); - hiveConf.put("sftpConf", sftpConf); - hiveConf.put("remoteDir", "/home/sftp/keytab/jiangbo"); - - HiveDbUtil.ConnectionInfo connectionInfo = new HiveDbUtil.ConnectionInfo(); - connectionInfo.setJdbcUrl("jdbc:hive2://172.16.10.75:10000/default;principal=hive/cdh02@HADOOP.COM"); - connectionInfo.setUsername(""); - connectionInfo.setPassword(""); - connectionInfo.setHiveConf(hiveConf); - - Connection connection = HiveDbUtil.getConnection(connectionInfo); - ResultSet rs = connection.createStatement().executeQuery("show tables"); - while (rs.next()) { - System.out.println(rs.getObject(2)); - } - - connection.close(); - } -} diff --git a/flinkx-hive/flinkx-hive-writer/pom.xml b/flinkx-hive/flinkx-hive-writer/pom.xml index 25357e479f..e93421f32a 100644 --- a/flinkx-hive/flinkx-hive-writer/pom.xml +++ b/flinkx-hive/flinkx-hive-writer/pom.xml @@ -92,6 +92,16 @@ under the License. + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java index 217163c261..67731f0503 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormat.java @@ -155,7 +155,7 @@ public FormatState getFormatState() { } private Map flushOutputFormat() { - Map formatStateMap = new HashMap<>(); + Map formatStateMap = new HashMap<>(outputFormats.size()); Iterator> entryIterator = outputFormats.entrySet().iterator(); while (entryIterator.hasNext()) { Map.Entry entry = entryIterator.next(); @@ -178,6 +178,7 @@ private Map flushOutputFormat() { @Override protected void writeMultipleRecordsInternal() throws Exception { + notSupportBatchWrite("HiveWriter"); } @Override @@ -187,6 +188,11 @@ public void writeRecord(Row row) throws IOException { Map event = null; if (row.getField(0) instanceof Map) { event = (Map) row.getField(0); + + if (null != event && event.containsKey("message")) { + event = MapUtils.getMap(event, "message"); + } + tablePath = PathConverterUtil.regaxByRules(event, tableBasePath, distributeTableMapping); fromLogData = true; } else { @@ -210,11 +216,11 @@ public void writeRecord(Row row) throws IOException { //row包含map嵌套的数据内容和channel, 而rowData是非常简单的纯数据,此处补上数据差额 if (fromLogData && bytesWriteCounter != null) { - bytesWriteCounter.add(row.toString().length() - rowData.toString().length()); + bytesWriteCounter.add((long)row.toString().length() - rowData.toString().length()); } } catch (Exception e) { // 写入产生的脏数据已经由hdfsOutputFormat处理了,这里不用再处理了,只打印日志 - if (numWriteCounter.getLocalValue() % 1000 == 0) { + if (numWriteCounter.getLocalValue() % LOG_PRINT_INTERNAL == 0) { LOG.warn("写入hdfs异常:", e); } } diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java index f8c69886b1..529d8d743b 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveOutputFormatBuilder.java @@ -131,9 +131,12 @@ protected void checkFormat() { if (this.format.tableBasePath == null || this.format.tableBasePath.length() == 0) { throw new IllegalArgumentException("No tableBasePath supplied."); } + if (this.format.tableInfos.isEmpty()){ throw new IllegalArgumentException("No tableInfos supplied."); } + + notSupportBatchWrite("HiveWriter"); } } diff --git a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java index 5168b850a6..a3483caaea 100644 --- a/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java +++ b/flinkx-hive/flinkx-hive-writer/src/main/java/com/dtstack/flinkx/hive/writer/HiveWriter.java @@ -19,31 +19,34 @@ import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.config.WriterConfig; +import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.hive.TableInfo; import com.dtstack.flinkx.hive.TimePartitionFormat; import com.dtstack.flinkx.hive.util.HiveUtil; import com.dtstack.flinkx.writer.BaseDataWriter; import com.dtstack.flinkx.writer.WriteMode; -import com.google.gson.Gson; +import com.google.gson.internal.LinkedTreeMap; +import com.google.gson.reflect.TypeToken; import org.apache.commons.collections.MapUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSink; import org.apache.flink.types.Row; import parquet.hadoop.ParquetWriter; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import static com.dtstack.flinkx.hdfs.HdfsConfigKeys.KEY_ROW_GROUP_SIZE; import static com.dtstack.flinkx.hive.HiveConfigKeys.*; +import static com.dtstack.flinkx.util.GsonUtil.GSON; /** * @author toutian */ public class HiveWriter extends BaseDataWriter { + private String readerName; + private String defaultFs; private String fileType; @@ -80,10 +83,9 @@ public class HiveWriter extends BaseDataWriter { private boolean autoCreateTable; - private Gson gson = new Gson(); - public HiveWriter(DataTransferConfig config) { super(config); + readerName = config.getJob().getContent().get(0).getReader().getName(); WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter(); hadoopConfig = (Map) writerConfig.getParameter().getVal(KEY_HADOOP_CONFIG); defaultFs = writerConfig.getParameter().getStringVal(KEY_DEFAULT_FS); @@ -95,9 +97,9 @@ public HiveWriter(DataTransferConfig config) { partition = writerConfig.getParameter().getStringVal(KEY_PARTITION, "pt"); delimiter = writerConfig.getParameter().getStringVal(KEY_FIELD_DELIMITER, "\u0001"); charSet = writerConfig.getParameter().getStringVal(KEY_CHARSET_NAME); - maxFileSize = writerConfig.getParameter().getLongVal(KEY_MAX_FILE_SIZE, 1024 * 1024 * 1024); + maxFileSize = writerConfig.getParameter().getLongVal(KEY_MAX_FILE_SIZE, ConstantValue.STORE_SIZE_G); compress = writerConfig.getParameter().getStringVal(KEY_COMPRESS); - bufferSize = writerConfig.getParameter().getLongVal(KEY_BUFFER_SIZE, 128 * 1024 * 1024); + bufferSize = writerConfig.getParameter().getLongVal(KEY_BUFFER_SIZE, 128 * ConstantValue.STORE_SIZE_M); rowGroupSize = writerConfig.getParameter().getIntVal(KEY_ROW_GROUP_SIZE, ParquetWriter.DEFAULT_BLOCK_SIZE); mode = writerConfig.getParameter().getStringVal(KEY_WRITE_MODE, WriteMode.APPEND.name()); @@ -120,18 +122,18 @@ public HiveWriter(DataTransferConfig config) { } } + /** + * 分表的映射关系 + * distributeTableMapping 的数据结构为 + * tableInfos的数据结构为 + */ private void formatHiveDistributeInfo(String distributeTable) { - /** - * 分表的映射关系 - * distributeTableMapping 的数据结构为 - * tableInfos的数据结构为 - */ distributeTableMapping = new HashMap<>(32); if (StringUtils.isNotBlank(distributeTable)) { - Map distributeTableMap = gson.fromJson(distributeTable, Map.class); - for (Map.Entry entry : distributeTableMap.entrySet()) { + Map> distributeTableMap = GSON.fromJson(distributeTable, new TypeToken>>(){}.getType()); + for (Map.Entry> entry : distributeTableMap.entrySet()) { String groupName = entry.getKey(); - List groupTables = (List) entry.getValue(); + List groupTables = entry.getValue(); for (String tableName : groupTables) { distributeTableMapping.put(tableName, groupName); } @@ -142,10 +144,12 @@ private void formatHiveDistributeInfo(String distributeTable) { private void formatHiveTableInfo(String tablesColumn) { tableInfos = new HashMap<>(16); if (StringUtils.isNotEmpty(tablesColumn)) { - Map tableColumnMap = gson.fromJson(tablesColumn, Map.class); - for (Map.Entry entry : tableColumnMap.entrySet()) { + Map>> tableColumnMap = GSON.fromJson(tablesColumn, new TypeToken> >>(){}.getType()); + List> extraTableColumnList = getExtraTableColumn(); + for (Map.Entry>> entry : tableColumnMap.entrySet()) { String tableName = entry.getKey(); - List> tableColumns = (List>) entry.getValue(); + List> tableColumns = entry.getValue(); + tableColumns.addAll(extraTableColumnList); TableInfo tableInfo = new TableInfo(tableColumns.size()); tableInfo.addPartition(partition); tableInfo.setDelimiter(delimiter); @@ -162,6 +166,32 @@ private void formatHiveTableInfo(String tablesColumn) { } } + /** + * 增加hive表字段 + */ + @SuppressWarnings("unchecked") + private List> getExtraTableColumn(){ + if(StringUtils.equalsIgnoreCase(readerName, "oraclelogminerreader")){ + List> list = new ArrayList<>(2); + Map opTime = new LinkedTreeMap<>(); + opTime.put("type", "BIGINT"); + opTime.put("key", "opTime"); + opTime.put("comment", ""); + + Map scn = new LinkedTreeMap<>(); + scn.put("type", "BIGINT"); + scn.put("key", "scn"); + scn.put("comment", ""); + + list.add(opTime); + list.add(scn); + + return list; + }else{ + return Collections.EMPTY_LIST; + } + } + @Override public DataStreamSink writeData(DataStream dataSet) { HiveOutputFormatBuilder builder = new HiveOutputFormatBuilder(); diff --git a/flinkx-kafka/flinkx-kafka-reader/pom.xml b/flinkx-kafka/flinkx-kafka-reader/pom.xml index d6cd16c26f..d9a711a6ab 100644 --- a/flinkx-kafka/flinkx-kafka-reader/pom.xml +++ b/flinkx-kafka/flinkx-kafka-reader/pom.xml @@ -33,6 +33,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaClient.java b/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaClient.java index 61b483abd0..95016e4956 100644 --- a/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaClient.java +++ b/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaClient.java @@ -16,7 +16,7 @@ */ package com.dtstack.flinkx.kafka.reader; -import com.dtstack.flinkx.kafkabase.decoder.IDecode; +import com.dtstack.flinkx.decoder.IDecode; import com.dtstack.flinkx.kafkabase.reader.IClient; import com.dtstack.flinkx.kafkabase.reader.KafkaBaseInputFormat; import com.dtstack.flinkx.util.ExceptionUtil; diff --git a/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaInputFormat.java b/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaInputFormat.java index f209be623c..7f4c37f576 100644 --- a/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaInputFormat.java +++ b/flinkx-kafka/flinkx-kafka-reader/src/main/java/com/dtstack/flinkx/kafka/reader/KafkaInputFormat.java @@ -15,6 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + + package com.dtstack.flinkx.kafka.reader; import com.dtstack.flinkx.kafkabase.reader.KafkaBaseInputFormat; diff --git a/flinkx-kafka/flinkx-kafka-writer/pom.xml b/flinkx-kafka/flinkx-kafka-writer/pom.xml index b62d13da11..dad9361a4f 100644 --- a/flinkx-kafka/flinkx-kafka-writer/pom.xml +++ b/flinkx-kafka/flinkx-kafka-writer/pom.xml @@ -33,6 +33,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kafka09/flinkx-kafka09-reader/pom.xml b/flinkx-kafka09/flinkx-kafka09-reader/pom.xml index 980db83475..8c3e855d9c 100644 --- a/flinkx-kafka09/flinkx-kafka09-reader/pom.xml +++ b/flinkx-kafka09/flinkx-kafka09-reader/pom.xml @@ -33,6 +33,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/reader/Kafka09Client.java b/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/reader/Kafka09Client.java index 8ad201b782..9633e6e4ad 100644 --- a/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/reader/Kafka09Client.java +++ b/flinkx-kafka09/flinkx-kafka09-reader/src/main/java/com/dtstack/flinkx/kafka09/reader/Kafka09Client.java @@ -16,7 +16,7 @@ */ package com.dtstack.flinkx.kafka09.reader; -import com.dtstack.flinkx.kafkabase.decoder.IDecode; +import com.dtstack.flinkx.decoder.IDecode; import com.dtstack.flinkx.kafkabase.reader.IClient; import com.dtstack.flinkx.kafkabase.reader.KafkaBaseInputFormat; import com.dtstack.flinkx.util.ExceptionUtil; diff --git a/flinkx-kafka09/flinkx-kafka09-writer/pom.xml b/flinkx-kafka09/flinkx-kafka09-writer/pom.xml index cea09d8368..42fb23d6b2 100644 --- a/flinkx-kafka09/flinkx-kafka09-writer/pom.xml +++ b/flinkx-kafka09/flinkx-kafka09-writer/pom.xml @@ -33,6 +33,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kafka10/flinkx-kafka10-reader/pom.xml b/flinkx-kafka10/flinkx-kafka10-reader/pom.xml index 8fbbdb883d..31467b287f 100644 --- a/flinkx-kafka10/flinkx-kafka10-reader/pom.xml +++ b/flinkx-kafka10/flinkx-kafka10-reader/pom.xml @@ -33,6 +33,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kafka10/flinkx-kafka10-reader/src/main/java/com/dtstack/flinkx/kafka10/reader/Kafka10Client.java b/flinkx-kafka10/flinkx-kafka10-reader/src/main/java/com/dtstack/flinkx/kafka10/reader/Kafka10Client.java index 69c5747cf0..f0d10d6f13 100644 --- a/flinkx-kafka10/flinkx-kafka10-reader/src/main/java/com/dtstack/flinkx/kafka10/reader/Kafka10Client.java +++ b/flinkx-kafka10/flinkx-kafka10-reader/src/main/java/com/dtstack/flinkx/kafka10/reader/Kafka10Client.java @@ -17,7 +17,7 @@ */ package com.dtstack.flinkx.kafka10.reader; -import com.dtstack.flinkx.kafkabase.decoder.IDecode; +import com.dtstack.flinkx.decoder.IDecode; import com.dtstack.flinkx.kafkabase.reader.IClient; import com.dtstack.flinkx.kafkabase.reader.KafkaBaseInputFormat; import com.dtstack.flinkx.util.ExceptionUtil; diff --git a/flinkx-kafka10/flinkx-kafka10-writer/pom.xml b/flinkx-kafka10/flinkx-kafka10-writer/pom.xml index f28e37fe50..4d86dbb7d0 100644 --- a/flinkx-kafka10/flinkx-kafka10-writer/pom.xml +++ b/flinkx-kafka10/flinkx-kafka10-writer/pom.xml @@ -39,6 +39,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kafka11/flinkx-kafka11-reader/pom.xml b/flinkx-kafka11/flinkx-kafka11-reader/pom.xml index 4fa02eb7e5..ae62ee9515 100644 --- a/flinkx-kafka11/flinkx-kafka11-reader/pom.xml +++ b/flinkx-kafka11/flinkx-kafka11-reader/pom.xml @@ -33,6 +33,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11Client.java b/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11Client.java index f27889ab25..2e3d2d569f 100644 --- a/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11Client.java +++ b/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11Client.java @@ -1,6 +1,6 @@ package com.dtstack.flinkx.kafka11.reader; -import com.dtstack.flinkx.kafkabase.decoder.IDecode; +import com.dtstack.flinkx.decoder.IDecode; import com.dtstack.flinkx.kafkabase.reader.IClient; import com.dtstack.flinkx.kafkabase.reader.KafkaBaseInputFormat; import com.dtstack.flinkx.util.ExceptionUtil; diff --git a/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11InputFormat.java b/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11InputFormat.java index 0424bb2801..5a7253860f 100644 --- a/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11InputFormat.java +++ b/flinkx-kafka11/flinkx-kafka11-reader/src/main/java/com/dtstack/flinkx/kafka11/reader/Kafka11InputFormat.java @@ -15,6 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + + package com.dtstack.flinkx.kafka11.reader; import com.dtstack.flinkx.kafkabase.reader.KafkaBaseInputFormat; diff --git a/flinkx-kafka11/flinkx-kafka11-writer/pom.xml b/flinkx-kafka11/flinkx-kafka11-writer/pom.xml index fe3808dd63..22d37d38b4 100644 --- a/flinkx-kafka11/flinkx-kafka11-writer/pom.xml +++ b/flinkx-kafka11/flinkx-kafka11-writer/pom.xml @@ -34,6 +34,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseInputFormat.java b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseInputFormat.java index ec610798dc..38983693fb 100644 --- a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseInputFormat.java +++ b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseInputFormat.java @@ -18,10 +18,11 @@ package com.dtstack.flinkx.kafkabase.reader; import com.dtstack.flinkx.config.RestoreConfig; +import com.dtstack.flinkx.decoder.DecodeEnum; +import com.dtstack.flinkx.decoder.IDecode; +import com.dtstack.flinkx.decoder.JsonDecoder; +import com.dtstack.flinkx.decoder.PlainDecoder; import com.dtstack.flinkx.inputformat.BaseRichInputFormat; -import com.dtstack.flinkx.kafkabase.decoder.IDecode; -import com.dtstack.flinkx.kafkabase.decoder.JsonDecoder; -import com.dtstack.flinkx.kafkabase.decoder.PlainDecoder; import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.ExceptionUtil; import com.dtstack.flinkx.util.StringUtil; @@ -66,7 +67,7 @@ public void openInputFormat() throws IOException { super.openInputFormat(); queue = new SynchronousQueue<>(false); - if ("json".equals(codec)) { + if (DecodeEnum.JSON.getName().equalsIgnoreCase(codec)) { decode = new JsonDecoder(); } else { decode = new PlainDecoder(); diff --git a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java index 7f3739089a..4e4f20cf02 100644 --- a/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java +++ b/flinkx-kb/flinkx-kb-reader/src/main/java/com/dtstack/flinkx/kafkabase/reader/KafkaBaseReader.java @@ -28,7 +28,11 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.*; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_BLANK_IGNORE; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_CODEC; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_CONSUMER_SETTINGS; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_GROUP_ID; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_TOPIC; /** * Date: 2019/11/21 diff --git a/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseOutputFormat.java b/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseOutputFormat.java index 482c1ca40a..6b338a13ad 100644 --- a/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseOutputFormat.java +++ b/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseOutputFormat.java @@ -18,8 +18,8 @@ package com.dtstack.flinkx.kafkabase.writer; import com.dtstack.flinkx.config.RestoreConfig; +import com.dtstack.flinkx.decoder.JsonDecoder; import com.dtstack.flinkx.exception.WriteRecordException; -import com.dtstack.flinkx.kafkabase.decoder.JsonDecoder; import com.dtstack.flinkx.outputformat.BaseRichOutputFormat; import com.dtstack.flinkx.util.ExceptionUtil; import org.apache.flink.configuration.Configuration; @@ -29,7 +29,11 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.*; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; /** * Date: 2019/11/21 @@ -100,7 +104,7 @@ public void closeInternal() throws IOException { @Override protected void writeMultipleRecordsInternal() throws Exception { - throw new UnsupportedOperationException(); + notSupportBatchWrite("KafkaWriter"); } @Override diff --git a/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseWriter.java b/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseWriter.java index d8823c01f0..3ae4c05c7f 100644 --- a/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseWriter.java +++ b/flinkx-kb/flinkx-kb-writer/src/main/java/com/dtstack/flinkx/kafkabase/writer/KafkaBaseWriter.java @@ -27,7 +27,10 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.*; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_PRODUCER_SETTINGS; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_TABLE_FIELDS; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_TIMEZONE; +import static com.dtstack.flinkx.kafkabase.KafkaConfigKeys.KEY_TOPIC; /** * Date: 2019/11/21 diff --git a/flinkx-kudu/flinkx-kudu-core/pom.xml b/flinkx-kudu/flinkx-kudu-core/pom.xml index abafab1604..e419766ddc 100644 --- a/flinkx-kudu/flinkx-kudu-core/pom.xml +++ b/flinkx-kudu/flinkx-kudu-core/pom.xml @@ -24,11 +24,5 @@ 1.6 provided - - junit - junit - 4.12 - test - \ No newline at end of file diff --git a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java index 928580a166..4eb6ec865a 100644 --- a/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java +++ b/flinkx-kudu/flinkx-kudu-core/src/main/java/com/dtstack/flinkx/kudu/core/KuduUtil.java @@ -26,13 +26,22 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.kudu.ColumnSchema; import org.apache.kudu.Type; -import org.apache.kudu.client.*; +import org.apache.kudu.client.AsyncKuduClient; +import org.apache.kudu.client.AsyncKuduScanner; +import org.apache.kudu.client.KuduClient; +import org.apache.kudu.client.KuduPredicate; +import org.apache.kudu.client.KuduScanToken; +import org.apache.kudu.client.KuduTable; import java.io.IOException; import java.math.BigDecimal; import java.security.PrivilegedExceptionAction; import java.sql.Timestamp; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/flinkx-kudu/flinkx-kudu-reader/pom.xml b/flinkx-kudu/flinkx-kudu-reader/pom.xml index c8a99fea22..f9a1580d71 100644 --- a/flinkx-kudu/flinkx-kudu-reader/pom.xml +++ b/flinkx-kudu/flinkx-kudu-reader/pom.xml @@ -49,8 +49,12 @@ shade.kudureader.io.netty - com.google - shade.kudureader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java index f0ddce2857..6be5434a52 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormat.java @@ -26,7 +26,11 @@ import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; import org.apache.kudu.Type; -import org.apache.kudu.client.*; +import org.apache.kudu.client.KuduClient; +import org.apache.kudu.client.KuduScanToken; +import org.apache.kudu.client.KuduScanner; +import org.apache.kudu.client.RowResult; +import org.apache.kudu.client.RowResultIterator; import java.io.IOException; import java.util.List; diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java index 48d0e43fc5..4959731c76 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduInputFormatBuilder.java @@ -19,6 +19,7 @@ package com.dtstack.flinkx.kudu.reader; +import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder; import com.dtstack.flinkx.kudu.core.KuduConfig; import com.dtstack.flinkx.reader.MetaColumn; @@ -50,5 +51,9 @@ protected void checkFormat() { if (format.columns == null || format.columns.size() == 0){ throw new IllegalArgumentException("columns can not be empty"); } + + if (format.kuduConfig.getBatchSizeBytes() > ConstantValue.STORE_SIZE_G) { + throw new IllegalArgumentException("批量读取字节数必须小于[1G]"); + } } } diff --git a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java index 7b09c9048d..604b920a6e 100644 --- a/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java +++ b/flinkx-kudu/flinkx-kudu-reader/src/main/java/com/dtstack/flinkx/kudu/reader/KuduReader.java @@ -32,7 +32,19 @@ import java.util.List; -import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_ADMIN_OPERATION_TIMEOUT; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_AUTHENTICATION; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_BATCH_SIZE_BYTES; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_BOSS_COUNT; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_FILTER; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_KEYTABFILE; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_MASTER_ADDRESSES; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_OPERATION_TIMEOUT; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_PRINCIPAL; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_QUERY_TIMEOUT; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_READ_MODE; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_TABLE; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_WORKER_COUNT; /** * @author jiangbo diff --git a/flinkx-kudu/flinkx-kudu-writer/pom.xml b/flinkx-kudu/flinkx-kudu-writer/pom.xml index 742fae63ab..256505e5ab 100644 --- a/flinkx-kudu/flinkx-kudu-writer/pom.xml +++ b/flinkx-kudu/flinkx-kudu-writer/pom.xml @@ -49,8 +49,12 @@ shade.kuduwriter.io.netty - com.google - shade.kuduwriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java index df7a521186..69f346dcba 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduOutputFormat.java @@ -28,7 +28,12 @@ import com.dtstack.flinkx.util.ExceptionUtil; import org.apache.commons.lang3.StringUtils; import org.apache.flink.types.Row; -import org.apache.kudu.client.*; +import org.apache.kudu.client.KuduClient; +import org.apache.kudu.client.KuduException; +import org.apache.kudu.client.KuduSession; +import org.apache.kudu.client.KuduTable; +import org.apache.kudu.client.Operation; +import org.apache.kudu.client.SessionConfiguration; import java.io.IOException; import java.util.List; diff --git a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java index 265e5610c2..2cb5532262 100644 --- a/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java +++ b/flinkx-kudu/flinkx-kudu-writer/src/main/java/com/dtstack/flinkx/kudu/writer/KuduWriter.java @@ -32,7 +32,16 @@ import java.util.List; -import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.*; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_ADMIN_OPERATION_TIMEOUT; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_AUTHENTICATION; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_BOSS_COUNT; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_FLUSH_MODE; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_KEYTABFILE; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_MASTER_ADDRESSES; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_OPERATION_TIMEOUT; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_PRINCIPAL; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_TABLE; +import static com.dtstack.flinkx.kudu.core.KuduConfigKeys.KEY_WORKER_COUNT; /** * @author jiangbo diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java index 540db38056..d287c44a70 100644 --- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java +++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClassLoaderType.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.launcher; /** diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java index 1b823c77eb..c5318c00eb 100644 --- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java +++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/ClusterClientFactory.java @@ -40,8 +40,6 @@ import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.util.ConverterUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.File; import java.net.InetSocketAddress; @@ -97,7 +95,7 @@ public static ClusterClient createYarnClient(Options launcherOptions) { ApplicationId applicationId; if (StringUtils.isEmpty(launcherOptions.getAppId())) { - applicationId = getAppIdFromYarn(yarnClient); + applicationId = getAppIdFromYarn(yarnClient, launcherOptions); if(applicationId != null && StringUtils.isEmpty(applicationId.toString())) { throw new RuntimeException("No flink session found on yarn cluster."); } @@ -156,7 +154,7 @@ private static AbstractYarnClusterDescriptor getClusterDescriptor(Options launch return yarnClusterDescriptor; } - private static ApplicationId getAppIdFromYarn(YarnClient yarnClient) throws Exception{ + private static ApplicationId getAppIdFromYarn(YarnClient yarnClient, Options launcherOptions) throws Exception{ Set set = new HashSet<>(); set.add("Apache Flink"); EnumSet enumSet = EnumSet.noneOf(YarnApplicationState.class); @@ -175,6 +173,10 @@ private static ApplicationId getAppIdFromYarn(YarnClient yarnClient) throws Exce continue; } + if(!report.getQueue().equals(launcherOptions.getQueue())) { + continue; + } + int thisMemory = report.getApplicationResourceUsageReport().getNeededResources().getMemory(); int thisCores = report.getApplicationResourceUsageReport().getNeededResources().getVirtualCores(); diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java index 91a4bd1409..c5c19fc9f4 100644 --- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java +++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/Launcher.java @@ -25,6 +25,7 @@ import com.dtstack.flinkx.launcher.perjob.PerJobSubmitter; import com.dtstack.flinkx.options.OptionParser; import com.dtstack.flinkx.options.Options; +import com.dtstack.flinkx.util.JsonModifyUtil; import com.dtstack.flinkx.util.SysUtil; import org.apache.commons.lang.StringUtils; import org.apache.flink.client.program.ClusterClient; @@ -41,6 +42,7 @@ import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; /** @@ -94,8 +96,25 @@ public static void main(String[] args) throws Exception { String mode = launcherOptions.getMode(); List argList = optionParser.getProgramExeArgList(); + + // 将argList转化为HashMap,方便通过参数名称来获取参数值 + HashMap temp = new HashMap<>(16); + for (int i = 0; i < argList.size(); i += 2) { + temp.put(argList.get(i), argList.get(i + 1)); + } + // 对json中的值进行修改 + HashMap parameter = JsonModifyUtil.CommandTransform(temp.get("-p")); + temp.put("-job", JsonModifyUtil.JsonValueReplace(temp.get("-job"), parameter)); + + // 清空list,填充修改后的参数值 + argList.clear(); + for (int i = 0; i < temp.size(); i++) { + argList.add(temp.keySet().toArray()[i].toString()); + argList.add(temp.values().toArray()[i].toString()); + } + if(mode.equals(ClusterMode.local.name())) { - String[] localArgs = argList.toArray(new String[argList.size()]); + String[] localArgs = argList.toArray(new String[0]); com.dtstack.flinkx.Main.main(localArgs); } else { String pluginRoot = launcherOptions.getPluginRoot(); @@ -237,9 +256,8 @@ public boolean accept(File dir, String name) { } private static String readJob(String job) { - try { - File file = new File(job); - FileInputStream in = new FileInputStream(file); + File file = new File(job); + try (FileInputStream in = new FileInputStream(file)) { byte[] fileContent = new byte[(int) file.length()]; in.read(fileContent); in.close(); diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java index 6707c8f138..b88eb078b8 100644 --- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java +++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/YarnConfLoader.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.launcher; +import com.dtstack.flinkx.constants.ConstantValue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -42,7 +43,7 @@ public static YarnConfiguration getYarnConf(String yarnConfDir) { if(dir.exists() && dir.isDirectory()) { File[] xmlFileList = new File(yarnConfDir).listFiles((dir1, name) -> { - if(name.endsWith(".xml")){ + if(name.endsWith(ConstantValue.FILE_SUFFIX_XML)){ return true; } return false; diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java index 1d64181e81..508098067d 100644 --- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java +++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/FlinkPerJobResourceUtil.java @@ -52,10 +52,10 @@ public static ClusterSpecification createClusterSpecification(Properties conProp int slotsPerTaskManager = 1; if(conProp != null){ - if(conProp.contains(JOBMANAGER_MEMORY_MB)){ + if(conProp.containsKey(JOBMANAGER_MEMORY_MB)){ jobmanagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(JOBMANAGER_MEMORY_MB))); } - if(conProp.contains(TASKMANAGER_MEMORY_MB)){ + if(conProp.containsKey(TASKMANAGER_MEMORY_MB)){ taskmanagerMemoryMb = Math.max(MIN_JM_MEMORY, ValueUtil.getInt(conProp.getProperty(TASKMANAGER_MEMORY_MB))); } diff --git a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java index d9025e2ed9..81c6387395 100644 --- a/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java +++ b/flinkx-launcher/src/main/java/com/dtstack/flinkx/launcher/perjob/PerJobSubmitter.java @@ -21,7 +21,7 @@ import com.dtstack.flinkx.launcher.ClassLoaderType; import com.dtstack.flinkx.launcher.YarnConfLoader; import com.dtstack.flinkx.options.Options; -import com.dtstack.flinkx.util.MapUtil; +import com.dtstack.flinkx.util.GsonUtil; import org.apache.commons.lang.StringUtils; import org.apache.flink.client.deployment.ClusterSpecification; import org.apache.flink.client.program.ClusterClient; @@ -64,7 +64,7 @@ public static String submit(Options options, File jarFile, String[] programArgs) Configuration flinkConfig = StringUtils.isEmpty(options.getFlinkconf()) ? new Configuration() : GlobalConfiguration.loadConfiguration(options.getFlinkconf()); flinkConfig.setString("classloader.resolve-order", "child-first"); - Properties conProp = MapUtil.jsonStrToObject(options.getConfProp(), Properties.class); + Properties conProp = GsonUtil.GSON.fromJson(options.getConfProp(), Properties.class); ClusterSpecification clusterSpecification = FlinkPerJobResourceUtil.createClusterSpecification(conProp); clusterSpecification.setCreateProgramDelay(true); clusterSpecification.setConfiguration(flinkConfig); diff --git a/flinkx-launcher/src/test/java/bigdata/FlinkConfigTest.java b/flinkx-launcher/src/test/java/bigdata/FlinkConfigTest.java deleted file mode 100644 index b974511769..0000000000 --- a/flinkx-launcher/src/test/java/bigdata/FlinkConfigTest.java +++ /dev/null @@ -1,19 +0,0 @@ -package bigdata; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.GlobalConfiguration; -import org.apache.flink.configuration.JobManagerOptions; - -import java.io.File; - -/** - * Created by softfly on 18/4/24. - */ -public class FlinkConfigTest { - public static void main(String[] args) { - //Configuration config = GlobalConfiguration.loadConfiguration("/hadoop/flink-1.4.0/conf"); - //System.out.println(config.getString(JobManagerOptions.ADDRESS)); - String msg = "xxx" + File.separator; - System.out.println(msg); - } -} diff --git a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java index 26438d51fe..b5d78ffecc 100644 --- a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java +++ b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbClientUtil.java @@ -19,7 +19,13 @@ package com.dtstack.flinkx.mongodb; -import com.mongodb.*; +import com.mongodb.AuthenticationMechanism; +import com.mongodb.MongoClient; +import com.mongodb.MongoClientOptions; +import com.mongodb.MongoClientURI; +import com.mongodb.MongoCredential; +import com.mongodb.ServerAddress; +import com.mongodb.WriteConcern; import com.mongodb.client.MongoCursor; import org.apache.commons.lang.StringUtils; import org.bson.Document; diff --git a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java index 425dc21aa4..c419a2672e 100644 --- a/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java +++ b/flinkx-mongodb/flinkx-mongodb-core/src/main/java/com/dtstack/flinkx/mongodb/MongodbUtil.java @@ -29,9 +29,11 @@ import java.math.BigDecimal; import java.sql.Timestamp; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.Arrays; +import java.util.Date; +import java.util.List; -import static com.dtstack.flinkx.enums.ColumnType.*; +import static com.dtstack.flinkx.enums.ColumnType.getType; /** * Utilities for mongodb database connection and data format conversion diff --git a/flinkx-mongodb/flinkx-mongodb-reader/pom.xml b/flinkx-mongodb/flinkx-mongodb-reader/pom.xml index 09814f6c3f..a9caa60251 100644 --- a/flinkx-mongodb/flinkx-mongodb-reader/pom.xml +++ b/flinkx-mongodb/flinkx-mongodb-reader/pom.xml @@ -56,8 +56,12 @@ shade.mongodbreader.io.netty - com.google - shade.mongodbreader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java index 3f3ec2d863..ac8e65d705 100644 --- a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java +++ b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormat.java @@ -23,6 +23,7 @@ import com.dtstack.flinkx.mongodb.MongodbClientUtil; import com.dtstack.flinkx.mongodb.MongodbConfig; import com.dtstack.flinkx.reader.MetaColumn; +import com.dtstack.flinkx.util.ExceptionUtil; import com.dtstack.flinkx.util.StringUtil; import com.mongodb.BasicDBObject; import com.mongodb.MongoClient; @@ -37,7 +38,8 @@ import org.bson.conversions.Bson; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.List; /** * Read plugin for reading static data @@ -149,6 +151,9 @@ public InputSplit[] createInputSplitsInternal(int minNumSplits) throws IOExcepti if(size * minNumSplits < docNum){ splits.add(new MongodbInputSplit((int)(size * minNumSplits), (int)(docNum - size * minNumSplits))); } + } catch (Exception e){ + LOG.error("error to create inputSplits, e = {}", ExceptionUtil.getErrorMessage(e)); + throw e; } finally { MongodbClientUtil.close(client, null); } diff --git a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java index 918fb6b8f3..e1661838c2 100644 --- a/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java +++ b/flinkx-mongodb/flinkx-mongodb-reader/src/main/java/com/dtstack/flinkx/mongodb/reader/MongodbInputFormatBuilder.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.mongodb.reader; +import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder; import com.dtstack.flinkx.mongodb.MongodbConfig; import com.dtstack.flinkx.reader.MetaColumn; @@ -55,5 +56,9 @@ protected void checkFormat() { if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){ throw new UnsupportedOperationException("This plugin not support restore from failed state"); } + + if (format.mongodbConfig.getFetchSize() > ConstantValue.MAX_BATCH_SIZE) { + throw new IllegalArgumentException("批量读取条数必须小于[200000]条"); + } } } diff --git a/flinkx-mongodb/flinkx-mongodb-writer/pom.xml b/flinkx-mongodb/flinkx-mongodb-writer/pom.xml index 1d81ef36ef..a411920d71 100644 --- a/flinkx-mongodb/flinkx-mongodb-writer/pom.xml +++ b/flinkx-mongodb/flinkx-mongodb-writer/pom.xml @@ -56,8 +56,12 @@ shade.mongodbwriter.io.netty - com.google - shade.mongodbwriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-mysql/flinkx-mysql-dreader/pom.xml b/flinkx-mysql/flinkx-mysql-dreader/pom.xml index 047e273350..21e5e39d4f 100644 --- a/flinkx-mysql/flinkx-mysql-dreader/pom.xml +++ b/flinkx-mysql/flinkx-mysql-dreader/pom.xml @@ -62,8 +62,12 @@ shade.mysqldreader.io.netty - com.google - shade.mysqldreader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java index 9eae9b3dee..60181af17c 100644 --- a/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java +++ b/flinkx-mysql/flinkx-mysql-dreader/src/main/java/com/dtstack/flinkx/mysqld/reader/MysqldReader.java @@ -29,7 +29,6 @@ import java.util.ArrayList; import java.util.Collections; -import java.util.List; /** * @author toutian @@ -47,8 +46,8 @@ protected DistributedJdbcInputFormatBuilder getBuilder(){ } @Override - protected List buildConnections(){ - List sourceList = new ArrayList<>(connectionConfigs.size()); + protected ArrayList buildConnections(){ + ArrayList sourceList = new ArrayList<>(connectionConfigs.size()); for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { String curUsername = (connectionConfig.getUsername() == null || connectionConfig.getUsername().length() == 0) ? username : connectionConfig.getUsername(); diff --git a/flinkx-mysql/flinkx-mysql-reader/pom.xml b/flinkx-mysql/flinkx-mysql-reader/pom.xml index 8856cdf063..a52536b7ec 100644 --- a/flinkx-mysql/flinkx-mysql-reader/pom.xml +++ b/flinkx-mysql/flinkx-mysql-reader/pom.xml @@ -62,9 +62,13 @@ shade.mysqlreader.io.netty - com.google - shade.mysqlreader.com.google - + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + diff --git a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java index 9836df273a..148d4bbb95 100644 --- a/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java +++ b/flinkx-mysql/flinkx-mysql-reader/src/main/java/com/dtstack/flinkx/mysql/format/MysqlInputFormat.java @@ -18,11 +18,10 @@ package com.dtstack.flinkx.mysql.format; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputSplit; import com.dtstack.flinkx.rdb.util.DbUtil; import com.dtstack.flinkx.util.ClassUtil; import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.ExceptionUtil; -import com.google.gson.Gson; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.flink.core.io.InputSplit; @@ -51,7 +50,9 @@ public void openInternal(InputSplit inputSplit) throws IOException { String startLocation = incrementConfig.getStartLocation(); if (incrementConfig.isPolling()) { - endLocationAccumulator.add(Long.parseLong(startLocation)); + if (StringUtils.isNotEmpty(startLocation)) { + endLocationAccumulator.add(Long.parseLong(startLocation)); + } isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType()); } else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) { getMaxValue(inputSplit); @@ -64,8 +65,10 @@ public void openInternal(InputSplit inputSplit) throws IOException { } querySql = buildQuerySql(inputSplit); - //MySQL流式读取 - fetchSize = Integer.MIN_VALUE; + JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit; + if (null != jdbcInputSplit.getStartLocation()) { + startLocation = jdbcInputSplit.getStartLocation(); + } executeQuery(startLocation); columnCount = resultSet.getMetaData().getColumnCount(); boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); @@ -112,7 +115,6 @@ public Row nextRecordInternal(Row row) throws IOException { } return super.nextRecordInternal(row); }catch (Exception e) { - LOG.error("error to get next record, row = {}, descColumnTypeList = {}, e = {}", row, new Gson().toJson(descColumnTypeList), ExceptionUtil.getErrorMessage(e)); throw new IOException("Couldn't read data - " + e.getMessage(), e); } } diff --git a/flinkx-mysql/flinkx-mysql-reader/src/test/java/bigdata/TestMysqlPk.java b/flinkx-mysql/flinkx-mysql-reader/src/test/java/bigdata/TestMysqlPk.java deleted file mode 100644 index 50dff8f343..0000000000 --- a/flinkx-mysql/flinkx-mysql-reader/src/test/java/bigdata/TestMysqlPk.java +++ /dev/null @@ -1,22 +0,0 @@ -package bigdata; - -import com.dtstack.flinkx.mysql.MySqlDatabaseMeta; -import com.dtstack.flinkx.rdb.util.DbUtil; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.util.Map; - - -public class TestMysqlPk { - public static void main(String[] args) throws ClassNotFoundException, SQLException { - MySqlDatabaseMeta databaseMeta = new MySqlDatabaseMeta(); - Class.forName(databaseMeta.getDriverClass()); - Connection conn = DriverManager.getConnection("jdbc:mysql://172.16.8.104:3306/test?useCursorFetch=true", "dtstack", "abc123"); - //List list = databaseMeta.listUniqueKeys("sb250", conn); - //System.out.println(list); - Map map = DbUtil.getPrimaryOrUniqueKeys("sb252", conn); - System.out.println(map); - } -} diff --git a/flinkx-mysql/flinkx-mysql-writer/pom.xml b/flinkx-mysql/flinkx-mysql-writer/pom.xml index 82e3aa55c4..94e5d95a50 100644 --- a/flinkx-mysql/flinkx-mysql-writer/pom.xml +++ b/flinkx-mysql/flinkx-mysql-writer/pom.xml @@ -62,8 +62,12 @@ shade.mysqlwriter.io.netty - com.google - shade.mysqlwriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java b/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java index dd54b0ba18..1cb857e3ec 100644 --- a/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java +++ b/flinkx-odps/flinkx-odps-core/src/main/java/com/dtstack/flinkx/odps/OdpsUtil.java @@ -56,7 +56,7 @@ public class OdpsUtil { public static int MAX_RETRY_TIME = 3; - public static final long BUFFER_SIZE_DEFAULT = 64 * 1024 * 1024; + public static final long BUFFER_SIZE_DEFAULT = 64 * 1024 * 1024L; public static Odps initOdps(Map odpsConfig) { String odpsServer = odpsConfig.get(OdpsConfigKeys.KEY_ODPS_SERVER); diff --git a/flinkx-odps/flinkx-odps-reader/pom.xml b/flinkx-odps/flinkx-odps-reader/pom.xml index 866fd622fe..8007e473c0 100644 --- a/flinkx-odps/flinkx-odps-reader/pom.xml +++ b/flinkx-odps/flinkx-odps-reader/pom.xml @@ -50,6 +50,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java index 1bd4db402b..eba6fc7175 100644 --- a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java +++ b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsInputFormatBuilder.java @@ -19,11 +19,11 @@ package com.dtstack.flinkx.odps.reader; import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder; +import com.dtstack.flinkx.odps.OdpsConfigKeys; import com.dtstack.flinkx.reader.MetaColumn; import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.odps.OdpsConfigKeys.*; /** * The Builder of OdpsInputFormat @@ -41,7 +41,7 @@ public OdpsInputFormatBuilder() { public void setOdpsConfig(Map odpsConfig) { format.odpsConfig = odpsConfig; - format.projectName = odpsConfig.get(KEY_PROJECT); + format.projectName = odpsConfig.get(OdpsConfigKeys.KEY_PROJECT); } public void setTableName(String tableName) { diff --git a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java index 4a076ce122..f45ee2d76a 100644 --- a/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java +++ b/flinkx-odps/flinkx-odps-reader/src/main/java/com/dtstack/flinkx/odps/reader/OdpsReader.java @@ -29,7 +29,9 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.odps.OdpsConfigKeys.*; +import static com.dtstack.flinkx.odps.OdpsConfigKeys.KEY_ODPS_CONFIG; +import static com.dtstack.flinkx.odps.OdpsConfigKeys.KEY_PARTITION; +import static com.dtstack.flinkx.odps.OdpsConfigKeys.KEY_TABLE; /** * The reader plugin of Odps diff --git a/flinkx-odps/flinkx-odps-writer/pom.xml b/flinkx-odps/flinkx-odps-writer/pom.xml index b268bf43b1..f1622adc0b 100644 --- a/flinkx-odps/flinkx-odps-writer/pom.xml +++ b/flinkx-odps/flinkx-odps-writer/pom.xml @@ -55,6 +55,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java index 7e6510104d..cdf8b9c6e8 100644 --- a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java +++ b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormat.java @@ -116,7 +116,7 @@ public void writeSingleRecordInternal(Row row) throws WriteRecordException{ @Override protected void writeMultipleRecordsInternal() throws Exception { - throw new UnsupportedOperationException(); + notSupportBatchWrite("OdpsWriter"); } private Record row2record(Row row, String[] columnTypes) throws WriteRecordException { diff --git a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java index 95c7dad246..ee3d21d10a 100644 --- a/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java +++ b/flinkx-odps/flinkx-odps-writer/src/main/java/com/dtstack/flinkx/odps/writer/OdpsOutputFormatBuilder.java @@ -76,5 +76,7 @@ protected void checkFormat() { if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){ throw new UnsupportedOperationException("This plugin not support restore from failed state"); } + + notSupportBatchWrite("OdpsWriter"); } } diff --git a/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java b/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java index 5033bf64af..bc4b88f3ba 100644 --- a/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java +++ b/flinkx-oracle/flinkx-oracle-core/src/main/java/com/dtstack/flinkx/oracle/OracleDatabaseMeta.java @@ -36,7 +36,7 @@ public class OracleDatabaseMeta extends BaseDatabaseMeta { public String quoteTable(String table) { table = table.replace("\"",""); String[] part = table.split("\\."); - if(part.length == 2) { + if(part.length == DB_TABLE_PART_SIZE) { table = getStartQuote() + part[0] + getEndQuote() + "." + getStartQuote() + part[1] + getEndQuote(); } else { table = getStartQuote() + table + getEndQuote(); diff --git a/flinkx-oracle/flinkx-oracle-reader/pom.xml b/flinkx-oracle/flinkx-oracle-reader/pom.xml index 52ff1880be..225d8e2554 100644 --- a/flinkx-oracle/flinkx-oracle-reader/pom.xml +++ b/flinkx-oracle/flinkx-oracle-reader/pom.xml @@ -62,8 +62,12 @@ shade.oracleReader.io.netty - com.google.guava - shade.oracleReader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-oracle/flinkx-oracle-reader/src/test/java/com/dtstack/flinkx/oracle/reader/test/OracleLocalTest.java b/flinkx-oracle/flinkx-oracle-reader/src/test/java/com/dtstack/flinkx/oracle/reader/test/OracleLocalTest.java deleted file mode 100644 index d4227ff774..0000000000 --- a/flinkx-oracle/flinkx-oracle-reader/src/test/java/com/dtstack/flinkx/oracle/reader/test/OracleLocalTest.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.dtstack.flinkx.oracle.reader.test; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; - -/** - * Created by softfly on 18/2/1. - */ -public class OracleLocalTest { - public static void main(String[] args) throws ClassNotFoundException, SQLException { - final String DRIVER = "oracle.jdbc.OracleDriver"; - final String URL = "jdbc:oracle:thin:dev/pass1234@172.16.8.121:1521:dtstack"; - final String USER = "dev"; - final String PASSWORD = "pass1234"; - Connection connection = null; - Class.forName(DRIVER); - connection = DriverManager.getConnection(URL); - - - } -} diff --git a/flinkx-oracle/flinkx-oracle-writer/pom.xml b/flinkx-oracle/flinkx-oracle-writer/pom.xml index c6fc7b4e2b..6e9c1850e7 100644 --- a/flinkx-oracle/flinkx-oracle-writer/pom.xml +++ b/flinkx-oracle/flinkx-oracle-writer/pom.xml @@ -63,8 +63,12 @@ shade.oracleWriter.io.netty - com.google.guava - shade.oracleWriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java index ed77a77ea4..98840d5b10 100644 --- a/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java +++ b/flinkx-oracle/flinkx-oracle-writer/src/main/java/com/dtstack/flinkx/oracle/format/OracleOutputFormat.java @@ -18,11 +18,16 @@ package com.dtstack.flinkx.oracle.format; import com.dtstack.flinkx.enums.ColumnType; +import com.dtstack.flinkx.oracle.OracleDatabaseMeta; import com.dtstack.flinkx.rdb.outputformat.JdbcOutputFormat; import com.dtstack.flinkx.util.DateUtil; import org.apache.flink.types.Row; -import java.sql.*; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashMap; @@ -63,7 +68,7 @@ protected List probeFullColumns(String table, Connection dbConn) throws String schema =null; String[] parts = table.split("\\."); - if(parts.length == 2) { + if(parts.length == OracleDatabaseMeta.DB_TABLE_PART_SIZE) { schema = parts[0].toUpperCase(); table = parts[1]; } @@ -79,24 +84,26 @@ protected List probeFullColumns(String table, Connection dbConn) throws @Override protected Map> probePrimaryKeys(String table, Connection dbConn) throws SQLException { Map> map = new HashMap<>(16); - PreparedStatement ps = dbConn.prepareStatement(String.format(GET_INDEX_SQL,table)); - ResultSet rs = ps.executeQuery(); - while(rs.next()) { - String indexName = rs.getString("INDEX_NAME"); - if(!map.containsKey(indexName)) { - map.put(indexName,new ArrayList<>()); + try (PreparedStatement ps = dbConn.prepareStatement(String.format(GET_INDEX_SQL, table)); + ResultSet rs = ps.executeQuery()) { + while(rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if(!map.containsKey(indexName)) { + map.put(indexName,new ArrayList<>()); + } + map.get(indexName).add(rs.getString("COLUMN_NAME")); } - map.get(indexName).add(rs.getString("COLUMN_NAME")); - } - Map> retMap = new HashMap<>((map.size()<<2)/3); - for(Map.Entry> entry: map.entrySet()) { - String k = entry.getKey(); - List v = entry.getValue(); - if(v!=null && v.size() != 0 && v.get(0) != null) { - retMap.put(k, v); + + Map> retMap = new HashMap<>((map.size()<<2)/3); + for(Map.Entry> entry: map.entrySet()) { + String k = entry.getKey(); + List v = entry.getValue(); + if(v!=null && v.size() != 0 && v.get(0) != null) { + retMap.put(k, v); + } } + return retMap; } - return retMap; } } diff --git a/flinkx-pgwal/flinkx-pgwal-reader/pom.xml b/flinkx-pgwal/flinkx-pgwal-reader/pom.xml index 182ea07067..04f327d64e 100644 --- a/flinkx-pgwal/flinkx-pgwal-reader/pom.xml +++ b/flinkx-pgwal/flinkx-pgwal-reader/pom.xml @@ -33,6 +33,16 @@ false + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-phoenix/flinkx-phoenix-reader/pom.xml b/flinkx-phoenix/flinkx-phoenix-reader/pom.xml index 180385dc01..b067a02a7e 100644 --- a/flinkx-phoenix/flinkx-phoenix-reader/pom.xml +++ b/flinkx-phoenix/flinkx-phoenix-reader/pom.xml @@ -56,6 +56,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java b/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java index 6d6b416559..89bdc80968 100644 --- a/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java +++ b/flinkx-phoenix/flinkx-phoenix-reader/src/main/java/com/dtstack/flinkx/phoenix/format/PhoenixInputFormat.java @@ -29,7 +29,6 @@ import java.io.IOException; import java.sql.SQLException; -import java.sql.Statement; import static com.dtstack.flinkx.rdb.util.DbUtil.clobToString; @@ -65,7 +64,7 @@ public void openInternal(InputSplit inputSplit) throws IOException { // 部分驱动需要关闭事务自动提交,fetchSize参数才会起作用 dbConn.setAutoCommit(false); - Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement = dbConn.createStatement(resultSetType, resultSetConcurrency); statement.setFetchSize(0); diff --git a/flinkx-phoenix/flinkx-phoenix-writer/pom.xml b/flinkx-phoenix/flinkx-phoenix-writer/pom.xml index b97dab27c8..1dc16435a8 100644 --- a/flinkx-phoenix/flinkx-phoenix-writer/pom.xml +++ b/flinkx-phoenix/flinkx-phoenix-writer/pom.xml @@ -56,6 +56,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-polardb/flinkx-polardb-dreader/pom.xml b/flinkx-polardb/flinkx-polardb-dreader/pom.xml index d477787cac..26060ebcf6 100644 --- a/flinkx-polardb/flinkx-polardb-dreader/pom.xml +++ b/flinkx-polardb/flinkx-polardb-dreader/pom.xml @@ -56,6 +56,14 @@ com.mysql shade.polardb.com.mysql + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + diff --git a/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java b/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java index b7b3e95855..5e21aec025 100644 --- a/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java +++ b/flinkx-polardb/flinkx-polardb-dreader/src/main/java/com/dtstack/flinkx/polardbd/reader/PolardbdReader.java @@ -21,7 +21,6 @@ import com.dtstack.flinkx.mysqld.reader.MysqldReader; import com.dtstack.flinkx.polardbd.PolardbDatabaseMeta; import com.dtstack.flinkx.polardbd.format.PolardbdInputFormat; -import com.dtstack.flinkx.rdb.inputformat.DistributedJdbcInputFormat; import com.dtstack.flinkx.rdb.inputformat.DistributedJdbcInputFormatBuilder; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; diff --git a/flinkx-polardb/flinkx-polardb-reader/pom.xml b/flinkx-polardb/flinkx-polardb-reader/pom.xml index 093bdb5152..77ca955ae3 100644 --- a/flinkx-polardb/flinkx-polardb-reader/pom.xml +++ b/flinkx-polardb/flinkx-polardb-reader/pom.xml @@ -55,6 +55,14 @@ com.mysql shade.polardb.com.mysql + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + diff --git a/flinkx-polardb/flinkx-polardb-writer/pom.xml b/flinkx-polardb/flinkx-polardb-writer/pom.xml index d088128085..4ca3fb7511 100644 --- a/flinkx-polardb/flinkx-polardb-writer/pom.xml +++ b/flinkx-polardb/flinkx-polardb-writer/pom.xml @@ -56,6 +56,14 @@ com.mysql shade.polardb.com.mysql + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + diff --git a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java index 985b99efaa..4af93b9480 100644 --- a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java +++ b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlDatabaseMeta.java @@ -20,10 +20,7 @@ import com.dtstack.flinkx.enums.EDatabaseType; import com.dtstack.flinkx.rdb.BaseDatabaseMeta; -import org.apache.commons.lang3.StringUtils; -import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Map; diff --git a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java index 190ff818b1..04f5616f23 100644 --- a/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java +++ b/flinkx-postgresql/flinkx-postgresql-core/src/main/java/com/dtstack/flinkx/postgresql/PostgresqlTypeConverter.java @@ -19,8 +19,10 @@ package com.dtstack.flinkx.postgresql; import com.dtstack.flinkx.rdb.type.TypeConverterInterface; +import org.apache.commons.lang3.StringUtils; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** @@ -35,9 +37,9 @@ public class PostgresqlTypeConverter implements TypeConverterInterface { private List byteTypes = Arrays.asList("bytea","bit varying"); - private List bitTypes = Arrays.asList("bit"); + private List bitTypes = Collections.singletonList("bit"); - private List doubleTypes = Arrays.asList("money"); + private List doubleTypes = Collections.singletonList("money"); private List intTypes = Arrays.asList("int","int2","int4","int8"); @@ -46,19 +48,24 @@ public Object convert(Object data,String typeName) { if (data == null){ return null; } - + String dataValue = data.toString(); + if(stringTypes.contains(typeName)){ + return dataValue; + } + if(StringUtils.isBlank(dataValue)){ + return null; + } if(doubleTypes.contains(typeName)){ - data = Double.parseDouble(String.valueOf(data)); + if(StringUtils.startsWith(dataValue, "$")){ + dataValue = StringUtils.substring(dataValue, 1); + } + data = Double.parseDouble(dataValue); } else if(bitTypes.contains(typeName)){ // - } else if(stringTypes.contains(typeName)){ - data = String.valueOf(data); - } else if(byteTypes.contains(typeName)){ - data = Byte.valueOf(String.valueOf(data)); + }else if(byteTypes.contains(typeName)){ + data = Byte.valueOf(dataValue); } else if(intTypes.contains(typeName)){ - if(data instanceof String){ - data = Integer.parseInt(data.toString()); - } + data = Integer.parseInt(dataValue); } return data; diff --git a/flinkx-postgresql/flinkx-postgresql-reader/pom.xml b/flinkx-postgresql/flinkx-postgresql-reader/pom.xml index f103f55d85..6e3b8612d6 100644 --- a/flinkx-postgresql/flinkx-postgresql-reader/pom.xml +++ b/flinkx-postgresql/flinkx-postgresql-reader/pom.xml @@ -63,8 +63,12 @@ shade.postgresqlreader.io.netty - com.google - shade.postgresqlreader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java index 5ba10faad7..790f66f7e9 100644 --- a/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java +++ b/flinkx-postgresql/flinkx-postgresql-reader/src/main/java/com/dtstack/flinkx/postgresql/format/PostgresqlInputFormat.java @@ -19,6 +19,7 @@ package com.dtstack.flinkx.postgresql.format; import com.dtstack.flinkx.rdb.inputformat.JdbcInputFormat; +import com.dtstack.flinkx.rdb.inputformat.JdbcInputSplit; import com.dtstack.flinkx.rdb.util.DbUtil; import com.dtstack.flinkx.util.ClassUtil; import org.apache.commons.collections.CollectionUtils; @@ -49,7 +50,9 @@ public void openInternal(InputSplit inputSplit) throws IOException { String startLocation = incrementConfig.getStartLocation(); if (incrementConfig.isPolling()) { - endLocationAccumulator.add(Long.parseLong(startLocation)); + if (StringUtils.isNotEmpty(startLocation)) { + endLocationAccumulator.add(Long.parseLong(startLocation)); + } isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType()); } else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) { getMaxValue(inputSplit); @@ -62,6 +65,10 @@ public void openInternal(InputSplit inputSplit) throws IOException { } querySql = buildQuerySql(inputSplit); + JdbcInputSplit jdbcInputSplit = (JdbcInputSplit) inputSplit; + if (null != jdbcInputSplit.getStartLocation()) { + startLocation = jdbcInputSplit.getStartLocation(); + } executeQuery(startLocation); columnCount = resultSet.getMetaData().getColumnCount(); boolean splitWithRowCol = numPartitions > 1 && StringUtils.isNotEmpty(splitKey) && splitKey.contains("("); diff --git a/flinkx-postgresql/flinkx-postgresql-writer/pom.xml b/flinkx-postgresql/flinkx-postgresql-writer/pom.xml index 2264b0dddb..7f6857ea04 100644 --- a/flinkx-postgresql/flinkx-postgresql-writer/pom.xml +++ b/flinkx-postgresql/flinkx-postgresql-writer/pom.xml @@ -62,8 +62,12 @@ shade.postgresqlwriter.io.netty - com.google - shade.postgresqlwriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-pulsar/flinkx-pulsar-writer/pom.xml b/flinkx-pulsar/flinkx-pulsar-writer/pom.xml index 72c8493094..88396482f4 100644 --- a/flinkx-pulsar/flinkx-pulsar-writer/pom.xml +++ b/flinkx-pulsar/flinkx-pulsar-writer/pom.xml @@ -11,11 +11,8 @@ flinkx-pulsar-writer - - - org.apache.maven.plugins maven-shade-plugin diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java index b12d1f8ece..28cd09eaad 100644 --- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java +++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/Constants.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.pulsar.writer; /** diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java index 55aeee7dda..fdfab032da 100644 --- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java +++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormat.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.pulsar.writer; import com.dtstack.flinkx.exception.WriteRecordException; @@ -92,7 +109,7 @@ protected void emit(Map event) throws IOException { } @Override - protected void writeMultipleRecordsInternal() throws Exception { + protected void writeMultipleRecordsInternal() { throw new UnsupportedOperationException(); } diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java index 637a97e67a..c6e35cc79c 100644 --- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java +++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarOutputFormatBuilder.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.pulsar.writer; import com.dtstack.flinkx.outputformat.BaseRichOutputFormatBuilder; diff --git a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java index ae84b857f1..2068128992 100644 --- a/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java +++ b/flinkx-pulsar/flinkx-pulsar-writer/src/main/java/com/dtstack/flinkx/pulsar/writer/PulsarWriter.java @@ -1,8 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.dtstack.flinkx.pulsar.writer; import com.dtstack.flinkx.config.DataTransferConfig; import com.dtstack.flinkx.writer.BaseDataWriter; -import static com.dtstack.flinkx.pulsar.writer.Constants.*; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSink; import org.apache.flink.types.Row; @@ -10,6 +26,8 @@ import java.util.List; import java.util.Map; +import static com.dtstack.flinkx.pulsar.writer.Constants.*; + /** * @author: pierre @@ -22,6 +40,7 @@ public class PulsarWriter extends BaseDataWriter { protected List tableFields; protected Map producerSettings; + @SuppressWarnings("unchecked") public PulsarWriter(DataTransferConfig config){ super(config); topic = config.getJob().getContent().get(0).getWriter().getParameter().getStringVal(KEY_TOPIC); diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java index ad667ceb13..758be23ace 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/BaseDatabaseMeta.java @@ -21,7 +21,11 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import java.io.Serializable; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; /** * Abstract base parent class of other database prototype implementations @@ -31,6 +35,8 @@ */ public abstract class BaseDatabaseMeta implements DatabaseInterface, Serializable { + public static final int DB_TABLE_PART_SIZE = 2; + @Override public String getStartQuote() { return "\""; diff --git a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java index 26269c7c34..88477b16b3 100644 --- a/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java +++ b/flinkx-rdb/flinkx-rdb-core/src/main/java/com/dtstack/flinkx/rdb/util/DbUtil.java @@ -18,21 +18,17 @@ package com.dtstack.flinkx.rdb.util; import com.dtstack.flinkx.constants.ConstantValue; -import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.ParameterValuesProvider; -import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.ClassUtil; import com.dtstack.flinkx.util.ExceptionUtil; import com.dtstack.flinkx.util.SysUtil; import com.dtstack.flinkx.util.TelnetUtil; -import com.google.gson.Gson; import org.apache.commons.lang.StringUtils; import org.apache.flink.util.CollectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; -import java.math.BigDecimal; import java.sql.*; import java.util.ArrayList; import java.util.HashMap; @@ -41,9 +37,9 @@ import java.util.regex.Pattern; /** + * * Utilities for relational database connection and sql execution * company: www.dtstack.com - * * @author huyifan_zju@ */ public class DbUtil { @@ -72,6 +68,9 @@ public class DbUtil { */ private static int NANOS_LENGTH = 19; + public static int NANOS_PART_LENGTH = 9; + private static int FORMAT_TIME_NANOS_LENGTH = 29; + /** * jdbc连接URL的分割正则,用于获取URL?后的连接参数 */ @@ -93,16 +92,15 @@ public class DbUtil { /** * 获取jdbc连接(超时10S) - * - * @param url url - * @param username 账号 - * @param password 密码 + * @param url url + * @param username 账号 + * @param password 密码 * @return * @throws SQLException */ private static Connection getConnectionInternal(String url, String username, String password) throws SQLException { Connection dbConn; - synchronized (ClassUtil.LOCK_STR) { + synchronized (ClassUtil.LOCK_STR){ DriverManager.setLoginTimeout(10); // telnet @@ -120,10 +118,9 @@ private static Connection getConnectionInternal(String url, String username, Str /** * 获取jdbc连接(重试3次) - * - * @param url url - * @param username 账号 - * @param password 密码 + * @param url url + * @param username 账号 + * @param password 密码 * @return * @throws SQLException */ @@ -136,8 +133,10 @@ public static Connection getConnection(String url, String username, String passw for (int i = 0; i < MAX_RETRY_TIMES && failed; ++i) { try { dbConn = getConnectionInternal(url, username, password); - dbConn.createStatement().execute("select 111"); - failed = false; + try (Statement statement = dbConn.createStatement()){ + statement.execute("select 111"); + failed = false; + } } catch (Exception e) { if (dbConn != null) { dbConn.close(); @@ -157,10 +156,9 @@ public static Connection getConnection(String url, String username, String passw /** * 关闭连接资源 - * - * @param rs ResultSet - * @param stmt Statement - * @param conn Connection + * @param rs ResultSet + * @param stmt Statement + * @param conn Connection * @param commit */ public static void closeDbResources(ResultSet rs, Statement stmt, Connection conn, boolean commit) { @@ -182,7 +180,7 @@ public static void closeDbResources(ResultSet rs, Statement stmt, Connection con if (null != conn) { try { - if (commit) { + if(commit){ commit(conn); } @@ -195,76 +193,49 @@ public static void closeDbResources(ResultSet rs, Statement stmt, Connection con /** * 手动提交事物 - * * @param conn Connection */ - public static void commit(Connection conn) { + public static void commit(Connection conn){ try { - if (!conn.isClosed() && !conn.getAutoCommit()) { + if (!conn.isClosed() && !conn.getAutoCommit()){ conn.commit(); } - } catch (SQLException e) { + } catch (SQLException e){ LOG.warn("commit error:{}", ExceptionUtil.getErrorMessage(e)); } } /** * 批量执行sql - * * @param dbConn Connection * @param sqls sql列表 */ public static void executeBatch(Connection dbConn, List sqls) { - if (sqls == null || sqls.size() == 0) { + if(sqls == null || sqls.size() == 0) { return; } - try { - Statement stmt = dbConn.createStatement(); - for (String sql : sqls) { + try (Statement stmt = dbConn.createStatement()) { + for(String sql : sqls) { stmt.addBatch(sql); } stmt.executeBatch(); } catch (SQLException e) { - throw new RuntimeException("execute batch sql error:{}", e); + throw new RuntimeException("execute batch sql error:{}",e); } finally { commit(dbConn); } } - /** - * 获取某数据库某表的主键和唯一索引 - * - * @param table 表名 - * @param dbConn 数据库连接 - * @return - * @throws SQLException - */ - public static Map> getPrimaryOrUniqueKeys(String table, Connection dbConn) throws SQLException { - Map> keyMap = new HashMap<>(16); - DatabaseMetaData meta = dbConn.getMetaData(); - ResultSet rs = meta.getIndexInfo(null, null, table, true, false); - while (rs.next()) { - String pkName = rs.getString(6); - String columnName = rs.getString(9); - if (!keyMap.containsKey(pkName)) { - keyMap.put(pkName, new ArrayList<>()); - } - keyMap.get(pkName).add(columnName); - } - return keyMap; - } - /** * 封装channel通道顺序 - * * @param channels * @return */ - public static Object[][] getParameterValues(final int channels) { + public static Object[][] getParameterValues(final int channels){ ParameterValuesProvider provider = () -> { Integer[][] parameters = new Integer[channels][]; - for (int i = 0; i < channels; ++i) { + for(int i = 0; i < channels; ++i) { parameters[i] = new Integer[2]; parameters[i][0] = channels; parameters[i][1] = i; @@ -295,62 +266,20 @@ public static List analyzeColumnType(ResultSet resultSet){ return columnTypeList; } - /** - * 占位符设值 - * - * @param param 参数 - * @param statement PreparedStatement - * @param i 占位符位置 - * @throws SQLException - */ - public static void setParameterValue(Object param, PreparedStatement statement, int i) throws SQLException { - if (param instanceof String) { - statement.setString(i + 1, (String) param); - } else if (param instanceof Long) { - statement.setLong(i + 1, (Long) param); - } else if (param instanceof Integer) { - statement.setInt(i + 1, (Integer) param); - } else if (param instanceof Double) { - statement.setDouble(i + 1, (Double) param); - } else if (param instanceof Boolean) { - statement.setBoolean(i + 1, (Boolean) param); - } else if (param instanceof Float) { - statement.setFloat(i + 1, (Float) param); - } else if (param instanceof BigDecimal) { - statement.setBigDecimal(i + 1, (BigDecimal) param); - } else if (param instanceof Byte) { - statement.setByte(i + 1, (Byte) param); - } else if (param instanceof Short) { - statement.setShort(i + 1, (Short) param); - } else if (param instanceof Date) { - statement.setDate(i + 1, (Date) param); - } else if (param instanceof Time) { - statement.setTime(i + 1, (Time) param); - } else if (param instanceof Timestamp) { - statement.setTimestamp(i + 1, (Timestamp) param); - } else if (param instanceof Array) { - statement.setArray(i + 1, (Array) param); - } else { - //extends with other types if needed - throw new IllegalArgumentException("open() failed. Parameter " + i + " of type " + param.getClass() + " is not handled (yet)."); - } - } - /** * clob转string - * - * @param obj clob + * @param obj clob * @return * @throws Exception */ - public static Object clobToString(Object obj) throws Exception { + public static Object clobToString(Object obj) throws Exception{ String dataStr; - if (obj instanceof Clob) { - Clob clob = (Clob) obj; + if(obj instanceof Clob){ + Clob clob = (Clob)obj; BufferedReader bf = new BufferedReader(clob.getCharacterStream()); StringBuilder stringBuilder = new StringBuilder(); String line; - while ((line = bf.readLine()) != null) { + while ((line = bf.readLine()) != null){ stringBuilder.append(line); } dataStr = stringBuilder.toString(); @@ -363,34 +292,32 @@ public static Object clobToString(Object obj) throws Exception { /** * 获取纳秒字符串 - * - * @param timeStr + * @param timeStr 2020-03-23 11:03:22.000000000 * @return */ - public static String getNanosTimeStr(String timeStr) { - if (timeStr.length() < 29) { - timeStr += StringUtils.repeat("0", 29 - timeStr.length()); + public static String getNanosTimeStr(String timeStr){ + if(timeStr.length() < FORMAT_TIME_NANOS_LENGTH){ + timeStr += StringUtils.repeat("0",FORMAT_TIME_NANOS_LENGTH - timeStr.length()); } return timeStr; } /** * 将边界位置时间转换成对应饿的纳秒时间 - * * @param startLocation 边界位置(起始/结束) * @return */ - public static int getNanos(long startLocation) { + public static int getNanos(long startLocation){ String timeStr = String.valueOf(startLocation); int nanos; - if (timeStr.length() == SECOND_LENGTH) { + if (timeStr.length() == SECOND_LENGTH){ nanos = 0; - } else if (timeStr.length() == MILLIS_LENGTH) { - nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH, MILLIS_LENGTH)) * 1000000; - } else if (timeStr.length() == MICRO_LENGTH) { - nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH, MICRO_LENGTH)) * 1000; - } else if (timeStr.length() == NANOS_LENGTH) { - nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH, NANOS_LENGTH)); + } else if (timeStr.length() == MILLIS_LENGTH){ + nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH,MILLIS_LENGTH)) * 1000000; + } else if (timeStr.length() == MICRO_LENGTH){ + nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH,MICRO_LENGTH)) * 1000; + } else if (timeStr.length() == NANOS_LENGTH){ + nanos = Integer.parseInt(timeStr.substring(SECOND_LENGTH,NANOS_LENGTH)); } else { throw new IllegalArgumentException("Unknown time unit:startLocation=" + startLocation); } @@ -400,20 +327,19 @@ public static int getNanos(long startLocation) { /** * 将边界位置时间转换成对应饿的毫秒时间 - * - * @param startLocation 边界位置(起始/结束) + * @param startLocation 边界位置(起始/结束) * @return */ - public static long getMillis(long startLocation) { + public static long getMillis(long startLocation){ String timeStr = String.valueOf(startLocation); long millisSecond; - if (timeStr.length() == SECOND_LENGTH) { + if (timeStr.length() == SECOND_LENGTH){ millisSecond = startLocation * 1000; - } else if (timeStr.length() == MILLIS_LENGTH) { + } else if (timeStr.length() == MILLIS_LENGTH){ millisSecond = startLocation; - } else if (timeStr.length() == MICRO_LENGTH) { + } else if (timeStr.length() == MICRO_LENGTH){ millisSecond = startLocation / 1000; - } else if (timeStr.length() == NANOS_LENGTH) { + } else if (timeStr.length() == NANOS_LENGTH){ millisSecond = startLocation / 1000000; } else { throw new IllegalArgumentException("Unknown time unit:startLocation=" + startLocation); @@ -424,24 +350,23 @@ public static long getMillis(long startLocation) { /** * 格式化jdbc连接 - * - * @param dbUrl 原jdbc连接 - * @param extParamMap 需要额外添加的参数 - * @return 格式化后jdbc连接URL字符串 + * @param dbUrl 原jdbc连接 + * @param extParamMap 需要额外添加的参数 + * @return 格式化后jdbc连接URL字符串 */ - public static String formatJdbcUrl(String dbUrl, Map extParamMap) { + public static String formatJdbcUrl(String dbUrl, Map extParamMap){ String[] splits = DB_PATTERN.split(dbUrl); - Map paramMap = new HashMap<>(16); - if (splits.length > 1) { + Map paramMap = new HashMap<>(16); + if(splits.length > 1) { String[] pairs = splits[1].split("&"); - for (String pair : pairs) { + for(String pair : pairs) { String[] leftRight = pair.split("="); paramMap.put(leftRight[0], leftRight[1]); } } - if (!CollectionUtil.isNullOrEmpty(extParamMap)) { + if(!CollectionUtil.isNullOrEmpty(extParamMap)){ paramMap.putAll(extParamMap); } paramMap.put("useCursorFetch", "true"); @@ -450,8 +375,8 @@ public static String formatJdbcUrl(String dbUrl, Map extParamMap StringBuffer sb = new StringBuffer(dbUrl.length() + 128); sb.append(splits[0]).append("?"); int index = 0; - for (Map.Entry entry : paramMap.entrySet()) { - if (index != 0) { + for(Map.Entry entry : paramMap.entrySet()) { + if(index != 0) { sb.append("&"); } sb.append(entry.getKey()).append("=").append(entry.getValue()); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java index 4faa32e0ce..f4442111d7 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.datareader/DistributedJdbcDataReader.java @@ -107,8 +107,8 @@ protected DistributedJdbcInputFormatBuilder getBuilder(){ throw new RuntimeException("子类必须覆盖getBuilder方法"); } - protected List buildConnections(){ - List sourceList = new ArrayList<>(connectionConfigs.size()); + protected ArrayList buildConnections(){ + ArrayList sourceList = new ArrayList<>(connectionConfigs.size()); for (ReaderConfig.ParameterConfig.ConnectionConfig connectionConfig : connectionConfigs) { String curUsername = (StringUtils.isBlank(connectionConfig.getUsername())) ? username : connectionConfig.getUsername(); String curPassword = (StringUtils.isBlank(connectionConfig.getPassword())) ? password : connectionConfig.getPassword(); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java index 2d33a29161..6e9534b554 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormat.java @@ -30,7 +30,11 @@ import org.apache.flink.core.io.InputSplit; import org.apache.flink.types.Row; -import java.io.*; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; @@ -65,7 +69,7 @@ public class DistributedJdbcInputFormat extends BaseRichInputFormat { protected List descColumnTypeList; - protected List sourceList; + protected ArrayList sourceList; protected transient int sourceIndex; @@ -197,7 +201,7 @@ protected void closeCurrentSource(){ @Override protected void closeInternal() throws IOException { - + closeCurrentSource(); } @Override @@ -208,7 +212,7 @@ public InputSplit[] createInputSplitsInternal(int minPart) throws IOException { Object[][] parmeter = DbUtil.getParameterValues(numPartitions); for (int j = 0; j < numPartitions; j++) { DistributedJdbcInputSplit split = new DistributedJdbcInputSplit(j,numPartitions); - List sourceCopy = deepCopyList(sourceList); + ArrayList sourceCopy = deepCopyList(sourceList); for (int i = 0; i < sourceCopy.size(); i++) { sourceCopy.get(i).setSplitByKey(true); sourceCopy.get(i).setParameterValues(parmeter[j]); @@ -221,7 +225,9 @@ public InputSplit[] createInputSplitsInternal(int minPart) throws IOException { if (partNum == 0){ for (int i = 0; i < sourceList.size(); i++) { DistributedJdbcInputSplit split = new DistributedJdbcInputSplit(i,numPartitions); - split.setSourceList(Arrays.asList(sourceList.get(i))); + ArrayList arrayList = new ArrayList<>(); + arrayList.add(sourceList.get(i)); + split.setSourceList(arrayList); inputSplits[i] = split; } } else { @@ -250,7 +256,7 @@ public boolean reachedEnd() throws IOException { return readNextRecord(); } - public List deepCopyList(List src) throws IOException{ + public ArrayList deepCopyList(ArrayList src) throws IOException{ try { ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream(byteOut); @@ -258,7 +264,7 @@ public List deepCopyList(List src) throws IOException{ ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray()); ObjectInputStream in = new ObjectInputStream(byteIn); - List dest = (List) in.readObject(); + ArrayList dest = (ArrayList) in.readObject(); return dest; } catch (Exception e){ diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java index 28f2c2c5f1..0c8b21c07f 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputFormatBuilder.java @@ -24,6 +24,7 @@ import com.dtstack.flinkx.reader.MetaColumn; import org.apache.commons.lang.StringUtils; +import java.util.ArrayList; import java.util.List; /** @@ -65,7 +66,7 @@ public void setSplitKey(String splitKey){ format.splitKey = splitKey; } - public void setSourceList(List sourceList){ + public void setSourceList(ArrayList sourceList){ format.sourceList = sourceList; } diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java index 724f9532d5..43df7f5c88 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/DistributedJdbcInputSplit.java @@ -32,7 +32,7 @@ */ public class DistributedJdbcInputSplit extends GenericInputSplit { - private List sourceList; + private ArrayList sourceList; public DistributedJdbcInputSplit(int partitionNumber, int totalNumberOfPartitions) { super(partitionNumber, totalNumberOfPartitions); @@ -54,11 +54,11 @@ public void addSource(DataSource source){ this.sourceList.add(source); } - public List getSourceList() { + public ArrayList getSourceList() { return sourceList; } - public void setSourceList(List sourceList) { + public void setSourceList(ArrayList sourceList) { this.sourceList = sourceList; } } diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java index 706600d712..d71fa41d16 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormat.java @@ -27,12 +27,8 @@ import com.dtstack.flinkx.rdb.type.TypeConverterInterface; import com.dtstack.flinkx.rdb.util.DbUtil; import com.dtstack.flinkx.reader.MetaColumn; -import com.dtstack.flinkx.util.*; import com.dtstack.flinkx.restore.FormatState; -import com.dtstack.flinkx.util.ClassUtil; -import com.dtstack.flinkx.util.DateUtil; -import com.dtstack.flinkx.util.StringUtil; -import com.dtstack.flinkx.util.UrlUtil; +import com.dtstack.flinkx.util.*; import com.google.gson.Gson; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.accumulators.LongMaximum; @@ -48,7 +44,9 @@ import java.io.IOException; import java.sql.*; import java.util.Date; -import java.util.*; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; /** @@ -161,7 +159,9 @@ public void openInternal(InputSplit inputSplit) throws IOException { initMetric(inputSplit); String startLocation = incrementConfig.getStartLocation(); if (incrementConfig.isPolling()) { - endLocationAccumulator.add(Long.parseLong(startLocation)); + if (StringUtils.isNotEmpty(startLocation)) { + endLocationAccumulator.add(Long.parseLong(startLocation)); + } isTimestamp = "timestamp".equalsIgnoreCase(incrementConfig.getColumnType()); } else if ((incrementConfig.isIncrement() && incrementConfig.isUseMaxFunc())) { getMaxValue(inputSplit); @@ -252,6 +252,12 @@ public Row nextRecordInternal(Row row) throws IOException { boolean isUpdateLocation = incrementConfig.isPolling() || (incrementConfig.isIncrement() && !incrementConfig.isUseMaxFunc()); if (isUpdateLocation) { Object incrementVal = resultSet.getObject(incrementConfig.getColumnName()); + if(incrementVal != null) { + if((incrementVal instanceof java.util.Date + || incrementVal.getClass().getSimpleName().toUpperCase().contains("TIMESTAMP")) ) { + incrementVal = resultSet.getTimestamp(incrementConfig.getColumnName()); + } + } String location; if(incrementConfig.isPolling()){ location = String.valueOf(incrementVal); @@ -371,7 +377,7 @@ public String getMaxValueFromApi(){ } String url = monitorUrls; - if (monitorUrls.startsWith("http")) { + if (monitorUrls.startsWith(ConstantValue.PROTOCOL_HTTP)) { url = String.format("%s/jobs/%s/accumulators", monitorUrls, jobId); } @@ -707,10 +713,10 @@ private String getLocation(String columnType, Object columnVal) { long time = ((Timestamp) columnVal).getTime() / 1000; String nanosStr = String.valueOf(((Timestamp) columnVal).getNanos()); - if (nanosStr.length() == 9) { + if (nanosStr.length() == DbUtil.NANOS_PART_LENGTH) { location = time + nanosStr; } else { - String fillZeroStr = StringUtils.repeat("0", 9 - nanosStr.length()); + String fillZeroStr = StringUtils.repeat("0", DbUtil.NANOS_PART_LENGTH - nanosStr.length()); location = time + fillZeroStr + nanosStr; } } else { @@ -808,7 +814,7 @@ protected void executeQuery(String startLocation) throws SQLException { queryForPolling(startLocation); } } else { - Statement statement = dbConn.createStatement(resultSetType, resultSetConcurrency); + statement = dbConn.createStatement(resultSetType, resultSetConcurrency); statement.setFetchSize(fetchSize); statement.setQueryTimeout(queryTimeOut); resultSet = statement.executeQuery(querySql); diff --git a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java index cb48f98b59..ae788328f2 100644 --- a/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java +++ b/flinkx-rdb/flinkx-rdb-reader/src/main/java/com.dtstack.flinkx.rdb.inputformat/JdbcInputFormatBuilder.java @@ -18,6 +18,7 @@ package com.dtstack.flinkx.rdb.inputformat; +import com.dtstack.flinkx.constants.ConstantValue; import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder; import com.dtstack.flinkx.rdb.DatabaseInterface; import com.dtstack.flinkx.rdb.datareader.IncrementConfig; @@ -128,6 +129,10 @@ protected void checkFormat() { if (StringUtils.isEmpty(format.splitKey) && format.numPartitions > 1){ throw new IllegalArgumentException("Must specify the split column when the channel is greater than 1"); } + + if (format.fetchSize > ConstantValue.MAX_BATCH_SIZE) { + throw new IllegalArgumentException("批量读取条数必须小于[200000]条"); + } } } diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java index 2a94afb8ff..a07eb3473c 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com.dtstack.flinkx.rdb.datawriter/JdbcDataWriter.java @@ -32,7 +32,15 @@ import java.util.List; import java.util.Map; -import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.*; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_BATCH_SIZE; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_FULL_COLUMN; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_INSERT_SQL_MODE; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_PASSWORD; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_POST_SQL; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_PRE_SQL; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_UPDATE_KEY; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_USERNAME; +import static com.dtstack.flinkx.rdb.datawriter.JdbcConfigKeys.KEY_WRITE_MODE; /** * The Writer plugin for any database that can be connected via JDBC. diff --git a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java index bd2dc5afe1..47ab767ef3 100644 --- a/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java +++ b/flinkx-rdb/flinkx-rdb-writer/src/main/java/com/dtstack/flinkx/rdb/outputformat/JdbcOutputFormat.java @@ -30,12 +30,19 @@ import com.google.gson.Gson; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ObjectUtils; +import org.apache.commons.lang.StringUtils; import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.sql.*; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -107,6 +114,7 @@ public class JdbcOutputFormat extends BaseRichOutputFormat { "AND t.table_name = '%s'"; protected final static String CONN_CLOSE_ERROR_MSG = "No operations allowed"; + protected static List STRING_TYPES = Arrays.asList("CHAR", "VARCHAR","TINYBLOB","TINYTEXT","BLOB","TEXT", "MEDIUMBLOB", "MEDIUMTEXT", "LONGBLOB", "LONGTEXT"); protected PreparedStatement prepareTemplates() throws SQLException { if(CollectionUtils.isEmpty(fullColumn)) { @@ -202,7 +210,13 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException { int index = 0; try { for (; index < row.getArity(); index++) { - preparedStatement.setObject(index+1,getField(row,index)); + Object object = row.getField(index); + if( object instanceof String && StringUtils.isBlank((String) object)){ + if(!STRING_TYPES.contains(columnType.get(index))){ + object = null; + } + } + preparedStatement.setObject(index+1, object); } preparedStatement.execute(); @@ -233,8 +247,14 @@ protected String recordConvertDetailErrorMessage(int pos, Row row) { protected void writeMultipleRecordsInternal() throws Exception { try { for (Row row : rows) { - for (int j = 0; j < row.getArity(); ++j) { - preparedStatement.setObject(j + 1, getField(row, j)); + for (int index = 0; index < row.getArity(); index++) { + Object object = row.getField(index); + if( object instanceof String && StringUtils.isBlank((String) object)){ + if(!STRING_TYPES.contains(columnType.get(index))){ + object = null; + } + } + preparedStatement.setObject(index+1, object); } preparedStatement.addBatch(); diff --git a/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java b/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java index 31f40ecb64..9aed6f527b 100644 --- a/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java +++ b/flinkx-redis/flinkx-redis-core/src/main/java/com/dtstack/flinkx/redis/JedisUtil.java @@ -19,13 +19,18 @@ package com.dtstack.flinkx.redis; import com.dtstack.flinkx.util.TelnetUtil; -import redis.clients.jedis.*; +import redis.clients.jedis.Jedis; +import redis.clients.jedis.JedisPool; +import redis.clients.jedis.JedisPoolConfig; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; -import static com.dtstack.flinkx.redis.RedisConfigKeys.*; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DB; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_HOST_PORT; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_PASSWORD; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TIMEOUT; /** * Utilities for redis database connection diff --git a/flinkx-redis/flinkx-redis-writer/pom.xml b/flinkx-redis/flinkx-redis-writer/pom.xml index 4a92436c6b..891f05acf5 100644 --- a/flinkx-redis/flinkx-redis-writer/pom.xml +++ b/flinkx-redis/flinkx-redis-writer/pom.xml @@ -56,8 +56,12 @@ shade.rediswriter.io.netty - com.google - shade.rediswriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java index 7fe2fe9358..2fc85d1fd3 100644 --- a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java +++ b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormat.java @@ -30,9 +30,15 @@ import java.io.IOException; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Properties; -import static com.dtstack.flinkx.redis.RedisConfigKeys.*; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DB; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_HOST_PORT; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_PASSWORD; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TIMEOUT; /** * OutputFormat for writing data to redis database. @@ -70,6 +76,8 @@ public class RedisOutputFormat extends BaseRichOutputFormat { private static final int CRITICAL_TIME = 60 * 60 * 24 * 30; + private static final int KEY_VALUE_SIZE = 2; + @Override public void configure(Configuration parameters) { super.configure(parameters); @@ -138,7 +146,7 @@ private void processTimeFormat(Row row){ } private List getFieldAndValue(Row row){ - if(row.getArity() - keyIndexes.size() != 2){ + if(row.getArity() - keyIndexes.size() != KEY_VALUE_SIZE){ throw new IllegalArgumentException("Each row record can have only one pair of attributes and values except key"); } @@ -185,7 +193,7 @@ private String concatKey(Row row){ @Override protected void writeMultipleRecordsInternal() throws Exception { - // Still not supported + notSupportBatchWrite("RedisWriter"); } @Override diff --git a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java index e9480525ae..3b291ece1c 100644 --- a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java +++ b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisOutputFormatBuilder.java @@ -95,5 +95,7 @@ protected void checkFormat() { if (format.getRestoreConfig() != null && format.getRestoreConfig().isRestore()){ throw new UnsupportedOperationException("This plugin not support restore from failed state"); } + + notSupportBatchWrite("RedisWriter"); } } diff --git a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java index c64e2a3b29..fa33f2c739 100644 --- a/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java +++ b/flinkx-redis/flinkx-redis-writer/src/main/java/com/dtstack/flinkx/redis/writer/RedisWriter.java @@ -31,7 +31,18 @@ import java.util.ArrayList; import java.util.List; -import static com.dtstack.flinkx.redis.RedisConfigKeys.*; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_BATCH_SIZE; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DATE_FORMAT; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_DB; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_EXPIRE_TIME; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_HOST_PORT; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_KEY_FIELD_DELIMITER; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_KEY_INDEXES; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_MODE; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_PASSWORD; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TIMEOUT; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_TYPE; +import static com.dtstack.flinkx.redis.RedisConfigKeys.KEY_VALUE_FIELD_DELIMITER; /** * The writer plugin for redis database diff --git a/flinkx-restapi/flinkx-restapi-core/pom.xml b/flinkx-restapi/flinkx-restapi-core/pom.xml new file mode 100644 index 0000000000..742c77488d --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-core/pom.xml @@ -0,0 +1,20 @@ + + + + flinkx-restapi + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-restapi-core + + + org.apache.httpcomponents + httpclient + ${http.version} + + + \ No newline at end of file diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpMethod.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpMethod.java new file mode 100644 index 0000000000..338ba84b1c --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpMethod.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flinkx.restapi.common; + +/** + * @author : tiezhu + * @date : 2020/3/13 + */ +public enum HttpMethod { + // http 请求方式 + GET, + POST, + PUT, + PATCH, + DELETE, + COPY, + HEAD, + OPTIONS, + LINK, + UNLINK, + PURGE, + LOCK, + UNLOCK, + PROPFIND, + VIEW + ; + +} diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpUtil.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpUtil.java new file mode 100644 index 0000000000..b37770ecb6 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/HttpUtil.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.common; + +import com.dtstack.flinkx.util.GsonUtil; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Map; + +/** + * @author : tiezhu + * @date : 2020/3/16 + */ +public class HttpUtil { + protected static final Logger LOG = LoggerFactory.getLogger(HttpUtil.class); + private static final int COUNT = 32; + private static final int TOTAL_COUNT = 1000; + private static final int TIME_OUT = 5000; + private static final int EXECUTION_COUNT = 5; + + public static CloseableHttpClient getHttpClient() { + // 设置自定义的重试策略 + MyServiceUnavailableRetryStrategy strategy = new MyServiceUnavailableRetryStrategy + .Builder() + .executionCount(EXECUTION_COUNT) + .retryInterval(1000) + .build(); + // 设置自定义的重试Handler + MyHttpRequestRetryHandler retryHandler = new MyHttpRequestRetryHandler + .Builder() + .executionCount(EXECUTION_COUNT) + .build(); + // 设置超时时间 + RequestConfig requestConfig = RequestConfig.custom() + .setConnectTimeout(TIME_OUT) + .setConnectionRequestTimeout(TIME_OUT) + .setSocketTimeout(TIME_OUT) + .build(); + // 设置Http连接池 + PoolingHttpClientConnectionManager pcm = new PoolingHttpClientConnectionManager(); + pcm.setDefaultMaxPerRoute(COUNT); + pcm.setMaxTotal(TOTAL_COUNT); + + return HttpClientBuilder.create() + .setServiceUnavailableRetryStrategy(strategy) + .setRetryHandler(retryHandler) + .setDefaultRequestConfig(requestConfig) + .setConnectionManager(pcm) + .build(); +// return HttpClientBuilder.create().build(); + } + + public static HttpRequestBase getRequest(String method, + Map requestBody, + Map header, + String url) { + LOG.debug("current request url: {} current method:{} \n", url, method); + HttpRequestBase request = null; + + if (HttpMethod.GET.name().equalsIgnoreCase(method)) { + request = new HttpGet(url); + } else if (HttpMethod.POST.name().equalsIgnoreCase(method)) { + HttpPost post = new HttpPost(url); + post.setEntity(getEntityData(requestBody)); + request = post; + } else { + throw new RuntimeException("Unsupported method:" + method); + } + + for (Map.Entry entry : header.entrySet()) { + request.addHeader(entry.getKey(), entry.getValue()); + } + return request; + } + + public static void closeClient(CloseableHttpClient httpClient) { + try { + httpClient.close(); + } catch (IOException e) { + throw new RuntimeException("close client error"); + } + } + + public static StringEntity getEntityData(Map body) { + StringEntity stringEntity = new StringEntity(GsonUtil.GSON.toJson(body), StandardCharsets.UTF_8); + stringEntity.setContentEncoding(StandardCharsets.UTF_8.name()); + return stringEntity; + } +} diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyHttpRequestRetryHandler.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyHttpRequestRetryHandler.java new file mode 100644 index 0000000000..e0689cb658 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyHttpRequestRetryHandler.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.common; + +import org.apache.http.HttpEntityEnclosingRequest; +import org.apache.http.HttpRequest; +import org.apache.http.NoHttpResponseException; +import org.apache.http.client.HttpRequestRetryHandler; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.protocol.HttpContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.SSLException; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.UnknownHostException; + +/** + * @author : tiezhu + * @date : 2020/3/12 + */ +public class MyHttpRequestRetryHandler implements HttpRequestRetryHandler { + protected static final Logger LOG = LoggerFactory.getLogger(MyHttpRequestRetryHandler.class); + + private int executionMaxCount; + + public MyHttpRequestRetryHandler(Builder builder) { + this.executionMaxCount = builder.executionMaxCount; + } + + @Override + public boolean retryRequest(IOException exception, int executionCount, HttpContext context) { + LOG.info("第" + executionCount + "次重试"); + + if (executionCount >= this.executionMaxCount) { + // Do not retry if over max retry count + return false; + } + if (exception instanceof InterruptedIOException) { + // Timeout + return true; + } + if (exception instanceof UnknownHostException) { + // Unknown host + return true; + } + if (exception instanceof SSLException) { + // SSL handshake exception + return true; + } + if (exception instanceof NoHttpResponseException) { + // No response + return true; + } + + HttpClientContext clientContext = HttpClientContext.adapt(context); + HttpRequest request = clientContext.getRequest(); + boolean idempotent = !(request instanceof HttpEntityEnclosingRequest); + // Retry if the request is considered idempotent + return !idempotent; + } + + + public static final class Builder { + private int executionMaxCount; + + public Builder() { + executionMaxCount = 5; + } + + public Builder executionCount(int executionCount) { + this.executionMaxCount = executionCount; + return this; + } + + public MyHttpRequestRetryHandler build() { + return new MyHttpRequestRetryHandler(this); + } + } +} diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyServiceUnavailableRetryStrategy.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyServiceUnavailableRetryStrategy.java new file mode 100644 index 0000000000..c215926588 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/MyServiceUnavailableRetryStrategy.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.common; + +import org.apache.http.HttpResponse; +import org.apache.http.client.ServiceUnavailableRetryStrategy; +import org.apache.http.protocol.HttpContext; + +/** + * @author : tiezhu + * @date : 2020/3/12 + * 自定义httpClient重试策略,默认重试次数为5,重试时间间隔为2s + */ +public class MyServiceUnavailableRetryStrategy implements ServiceUnavailableRetryStrategy { + private int executionCount; + private long retryInterval; + + public MyServiceUnavailableRetryStrategy(Builder builder) { + this.executionCount = builder.executionCount; + this.retryInterval = builder.retryInterval; + } + + @Override + public boolean retryRequest(HttpResponse httpResponse, int executionCount, HttpContext httpContext) { + int successCode = 200; + return httpResponse.getStatusLine().getStatusCode() != successCode + && executionCount < this.executionCount; + } + + @Override + public long getRetryInterval() { + return this.retryInterval; + } + + public static final class Builder { + private int executionCount; + private long retryInterval; + + public Builder() { + executionCount = 5; + retryInterval = 2000; + } + + public Builder executionCount(int executionCount) { + this.executionCount = executionCount; + return this; + } + + public Builder retryInterval(long retryInterval) { + this.retryInterval = retryInterval; + return this; + } + + public MyServiceUnavailableRetryStrategy build() { + return new MyServiceUnavailableRetryStrategy(this); + } + } +} diff --git a/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/RestapiKeys.java b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/RestapiKeys.java new file mode 100644 index 0000000000..8821e3777c --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-core/src/main/java/com/dtstack/flinkx/restapi/common/RestapiKeys.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.common; + +/** + * @author : tiezhu + * @date : 2020/3/19 + */ +public class RestapiKeys { + public static final String KEY_METHOD = "method"; + public static final String KEY_HEADER = "header"; + public static final String KEY_BODY = "body"; + public static final String KEY_PARAMS = "params"; + public static final String KEY_COLUMN = "column"; + public static final String KEY_URL = "url"; + public static final String KEY_BATCH_INTERVAL = "batchInterval"; +} diff --git a/flinkx-restapi/flinkx-restapi-reader/pom.xml b/flinkx-restapi/flinkx-restapi-reader/pom.xml new file mode 100644 index 0000000000..8e27c23493 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-reader/pom.xml @@ -0,0 +1,99 @@ + + + + flinkx-restapi + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-restapi-reader + + + + com.dtstack.flinkx + flinkx-restapi-core + 1.6 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.1.0 + + + package + + shade + + + false + + + org.slf4j:slf4j-api + log4j:log4j + ch.qos.logback:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + io.netty + shade.restapireader.io.netty + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormat.java b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormat.java new file mode 100644 index 0000000000..59c6b535de --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormat.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.inputformat; + +import com.dtstack.flinkx.inputformat.BaseRichInputFormat; +import com.dtstack.flinkx.restapi.common.HttpUtil; +import com.dtstack.flinkx.util.GsonUtil; +import org.apache.flink.core.io.GenericInputSplit; +import org.apache.flink.core.io.InputSplit; +import org.apache.flink.types.Row; +import org.apache.http.HttpEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.util.EntityUtils; + +import java.io.IOException; +import java.util.Map; + +/** + * @author : tiezhu + * @date : 2020/3/12 + */ +public class RestapiInputFormat extends BaseRichInputFormat { + + protected String url; + + protected String method; + + protected transient CloseableHttpClient httpClient; + + protected Map header; + + protected Map entityDataToMap; + + protected boolean getData; + + @Override + public void openInputFormat() throws IOException { + super.openInputFormat(); + httpClient = HttpUtil.getHttpClient(); + } + + @Override + public void closeInputFormat() { + HttpUtil.closeClient(httpClient); + } + + + @Override + @SuppressWarnings("unchecked") + protected void openInternal(InputSplit inputSplit) throws IOException { + HttpUriRequest request = HttpUtil.getRequest(method, header,null, url); + try { + CloseableHttpResponse httpResponse = httpClient.execute(request); + HttpEntity entity = httpResponse.getEntity(); + if (entity != null) { + String entityData = EntityUtils.toString(entity); + entityDataToMap = GsonUtil.GSON.fromJson(entityData, Map.class); + getData = true; + } else { + throw new RuntimeException("entity is null"); + } + } catch (Exception e) { + throw new RuntimeException("get entity error"); + } + } + + @Override + protected InputSplit[] createInputSplitsInternal(int minNumSplits) throws Exception { + InputSplit[] inputSplits = new InputSplit[minNumSplits]; + for (int i = 0; i < minNumSplits; i++) { + inputSplits[i] = new GenericInputSplit(i, minNumSplits); + } + return inputSplits; + } + + @Override + protected Row nextRecordInternal(Row row) throws IOException { + row = new Row(1); + row.setField(0, entityDataToMap); + getData = false; + return row; + } + + @Override + protected void closeInternal() throws IOException { + } + + @Override + public boolean reachedEnd() throws IOException { + return !getData; + } +} diff --git a/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormatBuilder.java b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormatBuilder.java new file mode 100644 index 0000000000..ac636b4038 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/inputformat/RestapiInputFormatBuilder.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.inputformat; + +import com.dtstack.flinkx.inputformat.BaseRichInputFormatBuilder; + +import java.util.Map; + +/** + * @author : tiezhu + * @date : 2020/3/12 + */ +public class RestapiInputFormatBuilder extends BaseRichInputFormatBuilder { + protected RestapiInputFormat format; + + public RestapiInputFormatBuilder(){ super.format = format = new RestapiInputFormat();} + + public void setUrl(String url){this.format.url = url;} + public void setHeader(Map header){ this.format.header = header;} + public void setMethod(String method){ this.format.method = method;} + + @Override + protected void checkFormat() { + if(format.url.isEmpty()){ + throw new IllegalArgumentException("缺少url"); + } + if (format.method.isEmpty()) { + throw new IllegalArgumentException("缺少method"); + } + } +} diff --git a/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/reader/RestapiReader.java b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/reader/RestapiReader.java new file mode 100644 index 0000000000..162de36054 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-reader/src/main/java/com/dtstack/flinkx/restapi/reader/RestapiReader.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.reader; + +import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.config.ReaderConfig; +import com.dtstack.flinkx.reader.BaseDataReader; +import com.dtstack.flinkx.restapi.inputformat.RestapiInputFormatBuilder; +import com.google.common.collect.Maps; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.types.Row; + +import java.util.ArrayList; +import java.util.Map; + +/** + * @author : tiezhu + * @date : 2020/3/12 + */ +public class RestapiReader extends BaseDataReader { + + private String url; + + private String method; + + private Map header = Maps.newHashMap(); + + private ArrayList> temp; + + @SuppressWarnings("unchecked") + public RestapiReader(DataTransferConfig config, StreamExecutionEnvironment env) { + super(config, env); + ReaderConfig readerConfig = config.getJob().getContent().get(0).getReader(); + + url = readerConfig.getParameter().getStringVal("url"); + method = readerConfig.getParameter().getStringVal("method"); + temp = (ArrayList>) readerConfig.getParameter().getVal("header"); + if (temp != null) { + for (Map map : temp) { + header.putAll(map); + } + } + } + + @Override + public DataStream readData() { + RestapiInputFormatBuilder builder = new RestapiInputFormatBuilder(); + + builder.setHeader(header); + builder.setMethod(method); + builder.setUrl(url); + + return createInput(builder.finish()); + } +} diff --git a/flinkx-restapi/flinkx-restapi-writer/pom.xml b/flinkx-restapi/flinkx-restapi-writer/pom.xml new file mode 100644 index 0000000000..00a801c2b1 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-writer/pom.xml @@ -0,0 +1,99 @@ + + + + flinkx-restapi + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-restapi-writer + + + + com.dtstack.flinkx + flinkx-restapi-core + 1.6 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.1.0 + + + package + + shade + + + false + + + org.slf4j:slf4j-api + log4j:log4j + ch.qos.logback:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + io.netty + shade.restapiwriter.io.netty + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormat.java b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormat.java new file mode 100644 index 0000000000..ce96f57a74 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormat.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.outputformat; + +import com.dtstack.flinkx.exception.WriteRecordException; +import com.dtstack.flinkx.outputformat.BaseRichOutputFormat; +import com.dtstack.flinkx.restapi.common.HttpUtil; +import com.dtstack.flinkx.util.GsonUtil; +import com.google.common.collect.Maps; +import org.apache.flink.types.Row; +import org.apache.http.HttpStatus; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.impl.client.CloseableHttpClient; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * @author : tiezhu + * @date : 2020/3/12 + * 当前只考虑了元数据读取,和带有字段名column读取的情况,其他情况暂未考虑 + */ +public class RestapiOutputFormat extends BaseRichOutputFormat { + + protected String url; + + protected String method; + + protected ArrayList column; + + protected Map params; + + protected Map body; + + protected Map header; + + @Override + protected void openInternal(int taskNumber, int numTasks) throws IOException { + // Nothing to do + } + + @Override + protected void writeSingleRecordInternal(Row row) throws WriteRecordException { + LOG.info("start write single record"); + CloseableHttpClient httpClient = HttpUtil.getHttpClient(); + int index = 0; + Map requestBody = Maps.newHashMap(); + Object dataRow; + try { + dataRow = getDataFromRow(row, column); + if (!params.isEmpty()) { + Iterator iterator = params.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry entry = (Map.Entry) iterator.next(); + body.put((String) entry.getKey(), entry.getValue()); + } + } + body.put("data", dataRow); + requestBody.put("json", body); + LOG.debug("当前发送的数据为:{}", GsonUtil.GSON.toJson(requestBody)); + sendRequest(httpClient, requestBody, method, header, url); + } catch (Exception e) { + requestErrorMessage(e, index, row); + } finally { + // 最后不管发送是否成功,都要关闭client + HttpUtil.closeClient(httpClient); + } + } + + @Override + protected void writeMultipleRecordsInternal() throws Exception { + LOG.info("start write multiple records"); + try { + CloseableHttpClient httpClient = HttpUtil.getHttpClient(); + List dataRow = new ArrayList<>(); + Map requestBody = Maps.newHashMap(); + for (Row row : rows) { + dataRow.add(getDataFromRow(row, column)); + } + if (!params.isEmpty()) { + Iterator iterator = params.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry entry = (Map.Entry) iterator.next(); + body.put((String) entry.getKey(), entry.getValue()); + } + } + body.put("data", dataRow); + requestBody.put("json", body); + LOG.debug("当前发送的数据为:{}", GsonUtil.GSON.toJson(requestBody)); + sendRequest(httpClient, requestBody, method, header, url); + } catch (Exception e) { + LOG.warn("write record error !", e); + } + } + + private void requestErrorMessage(Exception e, int index, Row row) { + if (index < row.getArity()) { + recordConvertDetailErrorMessage(index, row); + LOG.warn("添加脏数据:" + row.getField(index)); + } + } + + private Object getDataFromRow(Row row, List column) throws IOException { + Map columnData = Maps.newHashMap(); + int index = 0; + if (!column.isEmpty()) { + // 如果column不为空,那么将数据和字段名一一对应 + for (; index < row.getArity(); index++) { + columnData.put(column.get(index), row.getField(index)); + } + return GsonUtil.GSON.toJson(columnData); + } else { + return row.getField(index); + } + } + + + private void sendRequest(CloseableHttpClient httpClient, + Map requestBody, + String method, + Map header, + String url) throws IOException { + LOG.debug("当前发送的数据为:{}", GsonUtil.GSON.toJson(requestBody)); + HttpRequestBase request = HttpUtil.getRequest(method, requestBody, header, url); + CloseableHttpResponse httpResponse = httpClient.execute(request); + // 重试之后返回状态码不为200 + if (httpResponse.getStatusLine().getStatusCode() != HttpStatus.SC_OK) { + LOG.warn("重试之后当前请求状态码为" + httpResponse.getStatusLine().getStatusCode()); + } + } +} diff --git a/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormatBuilder.java b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormatBuilder.java new file mode 100644 index 0000000000..3ab4751ea0 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/outputformat/RestapiOutputFormatBuilder.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.outputformat; + +import com.dtstack.flinkx.outputformat.BaseRichOutputFormatBuilder; + +import java.util.ArrayList; +import java.util.Map; + +/** + * @author : tiezhu + * @date : 2020/3/12 + */ +public class RestapiOutputFormatBuilder extends BaseRichOutputFormatBuilder { + + private RestapiOutputFormat format; + + public RestapiOutputFormatBuilder() { + super.format = format = new RestapiOutputFormat(); + } + + public void setUrl(String url) { + this.format.url = url; + } + + public void setHeader(Map header) { + this.format.header = header; + } + + public void setMethod(String method) { + this.format.method = method; + } + + public void setBody(Map body) { + this.format.body = body; + } + + public void setColumn(ArrayList column) { + format.column = column; + } + + public void setParams(Map params){ + format.params = params; + } + + + @Override + protected void checkFormat() { + if (format.url.isEmpty()) { + throw new IllegalArgumentException("缺少url"); + } + if (format.method.isEmpty()) { + throw new IllegalArgumentException("缺少method"); + } + } +} diff --git a/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/writer/RestapiWriter.java b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/writer/RestapiWriter.java new file mode 100644 index 0000000000..f050abb1b9 --- /dev/null +++ b/flinkx-restapi/flinkx-restapi-writer/src/main/java/com/dtstack/flinkx/restapi/writer/RestapiWriter.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flinkx.restapi.writer; + +import com.dtstack.flinkx.config.DataTransferConfig; +import com.dtstack.flinkx.config.WriterConfig; +import com.dtstack.flinkx.restapi.common.RestapiKeys; +import com.dtstack.flinkx.restapi.outputformat.RestapiOutputFormatBuilder; +import com.dtstack.flinkx.writer.BaseDataWriter; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.DataStreamSink; +import org.apache.flink.types.Row; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +/** + * @author : tiezhu + * @date : 2020/3/12 + */ +public class RestapiWriter extends BaseDataWriter { + + protected String url; + + protected String method; + + protected Map header = Maps.newHashMap(); + + protected Map body =Maps.newHashMap(); + + protected ArrayList column = Lists.newArrayList(); + + protected Map params = Maps.newHashMap(); + + protected int batchInterval; + + @SuppressWarnings("unchecked") + public RestapiWriter(DataTransferConfig config) { + super(config); + Object tempObj; + + WriterConfig writerConfig = config.getJob().getContent().get(0).getWriter(); + + url = writerConfig.getParameter().getStringVal(RestapiKeys.KEY_URL); + method = writerConfig.getParameter().getStringVal(RestapiKeys.KEY_METHOD); + batchInterval = writerConfig.getParameter().getIntVal(RestapiKeys.KEY_BATCH_INTERVAL, 1); + tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_COLUMN); + if (tempObj != null) { + column.addAll((ArrayList) tempObj); + } + + tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_HEADER); + if (tempObj != null) { + for (Map map : (ArrayList>) tempObj) { + header.putAll(map); + } + } + + tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_BODY); + if (tempObj != null) { + for (Map map : (ArrayList>) tempObj) { + body.putAll(map); + } + } + tempObj = writerConfig.getParameter().getVal(RestapiKeys.KEY_PARAMS); + if (tempObj != null) { + params = (HashMap) tempObj; + } + } + + @Override + public DataStreamSink writeData(DataStream dataSet) { + RestapiOutputFormatBuilder builder = new RestapiOutputFormatBuilder(); + + builder.setHeader(header); + builder.setMethod(method); + builder.setUrl(url); + builder.setBody(body); + builder.setColumn(column); + builder.setParams(params); + builder.setBatchInterval(batchInterval); + + return createOutput(dataSet, builder.finish()); + } +} diff --git a/flinkx-restapi/pom.xml b/flinkx-restapi/pom.xml new file mode 100644 index 0000000000..876877c87a --- /dev/null +++ b/flinkx-restapi/pom.xml @@ -0,0 +1,28 @@ + + + + flinkx-all + com.dtstack.flinkx + 1.6 + + 4.0.0 + + flinkx-restapi + pom + + flinkx-restapi-core + flinkx-restapi-writer + flinkx-restapi-reader + + + + + com.dtstack.flinkx + flinkx-core + 1.6 + provided + + + \ No newline at end of file diff --git a/flinkx-saphana/flinkx-saphana-core/pom.xml b/flinkx-saphana/flinkx-saphana-core/pom.xml index bed09b714f..5ac7f520b5 100644 --- a/flinkx-saphana/flinkx-saphana-core/pom.xml +++ b/flinkx-saphana/flinkx-saphana-core/pom.xml @@ -1,6 +1,6 @@ - flinkx-saphana diff --git a/flinkx-saphana/flinkx-saphana-reader/pom.xml b/flinkx-saphana/flinkx-saphana-reader/pom.xml index ba9e5c4375..8b170aecf7 100644 --- a/flinkx-saphana/flinkx-saphana-reader/pom.xml +++ b/flinkx-saphana/flinkx-saphana-reader/pom.xml @@ -1,6 +1,6 @@ - flinkx-saphana diff --git a/flinkx-saphana/flinkx-saphana-writer/pom.xml b/flinkx-saphana/flinkx-saphana-writer/pom.xml index 8c19fad040..233c1fc5ae 100644 --- a/flinkx-saphana/flinkx-saphana-writer/pom.xml +++ b/flinkx-saphana/flinkx-saphana-writer/pom.xml @@ -1,6 +1,6 @@ - flinkx-saphana diff --git a/flinkx-saphana/pom.xml b/flinkx-saphana/pom.xml index a9ecb0501e..e36168c903 100644 --- a/flinkx-saphana/pom.xml +++ b/flinkx-saphana/pom.xml @@ -1,6 +1,6 @@ - flinkx-all diff --git a/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml b/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml index 7300b9251f..1cd8f983b1 100644 --- a/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml +++ b/flinkx-sqlserver/flinkx-sqlserver-reader/pom.xml @@ -62,8 +62,12 @@ shade.sqlserverreader.io.netty - com.google - shade.sqlserverreader.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml b/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml index e4f6ed9b36..f9d45aeb84 100644 --- a/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml +++ b/flinkx-sqlserver/flinkx-sqlserver-writer/pom.xml @@ -63,8 +63,12 @@ shade.sqlserverwriter.io.netty - com.google - shade.sqlserverwriter.com.google + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty diff --git a/flinkx-stream/flinkx-stream-reader/pom.xml b/flinkx-stream/flinkx-stream-reader/pom.xml index d2dfa853cc..3586e32383 100644 --- a/flinkx-stream/flinkx-stream-reader/pom.xml +++ b/flinkx-stream/flinkx-stream-reader/pom.xml @@ -49,6 +49,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-stream/flinkx-stream-writer/pom.xml b/flinkx-stream/flinkx-stream-writer/pom.xml index f106065763..8466152676 100644 --- a/flinkx-stream/flinkx-stream-writer/pom.xml +++ b/flinkx-stream/flinkx-stream-writer/pom.xml @@ -45,6 +45,16 @@ + + + com.google.common + shade.core.com.google.common + + + com.google.thirdparty + shade.core.com.google.thirdparty + + diff --git a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java index da9a167b59..ac578d2c86 100644 --- a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java +++ b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamOutputFormat.java @@ -48,7 +48,12 @@ protected void openInternal(int taskNumber, int numTasks) throws IOException { @Override protected void writeSingleRecordInternal(Row row) throws WriteRecordException { if (print) { - LOG.info("subTaskIndex[{}]:{}", taskNumber, row); + LOG.info("subTaskIndex[{}]:{}", taskNumber, rowToStringWithDelimiter(row, writeDelimiter)); + } + + if (restoreConfig.isRestore()) { + formatState.setState(row.getField(restoreConfig.getRestoreColumnIndex())); + LOG.info("print data subTaskIndex[{}]:{}", taskNumber, rowToStringWithDelimiter(row, writeDelimiter)); } } @@ -56,8 +61,19 @@ protected void writeSingleRecordInternal(Row row) throws WriteRecordException { protected void writeMultipleRecordsInternal() throws Exception { if (print) { for (Row row : rows) { - LOG.info(String.valueOf(row)); + LOG.info(rowToStringWithDelimiter(row, writeDelimiter)); + } + } + } + + public String rowToStringWithDelimiter(Row row, String writeDelimiter) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < row.getArity(); i++) { + if (i > 0) { + sb.append(writeDelimiter); } + sb.append(StringUtils.arrayAwareToString(row.getField(i))); } + return sb.toString(); } } diff --git a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java index 246a4eb8bc..05344b6494 100644 --- a/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java +++ b/flinkx-stream/flinkx-stream-writer/src/main/java/com/dtstack/flinkx/stream/writer/StreamWriter.java @@ -46,7 +46,7 @@ public StreamWriter(DataTransferConfig config) { super(config); print = config.getJob().getContent().get(0).getWriter().getParameter().getBooleanVal("print",false); writeDelimiter = config.getJob().getContent().get(0).getWriter().getParameter().getStringVal("writeDelimiter", "|"); - batchInterval = config.getJob().getContent().get(0).getWriter().getParameter().getIntVal("batchInterval", 20); + batchInterval = config.getJob().getContent().get(0).getWriter().getParameter().getIntVal("batchInterval", 1); List column = config.getJob().getContent().get(0).getWriter().getParameter().getColumn(); metaColumns = MetaColumn.getMetaColumns(column); diff --git a/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java b/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java index 3b42c064c3..1c84adcfa4 100644 --- a/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java +++ b/flinkx-teradata/flinkx-teradata-core/src/main/java/com/dtstack/flinkx/teradata/util/DBUtil.java @@ -1,15 +1,10 @@ package com.dtstack.flinkx.teradata.util; -import com.dtstack.flinkx.rdb.DatabaseInterface; -import com.dtstack.flinkx.rdb.util.DbUtil; -import com.dtstack.flinkx.reader.MetaColumn; import com.dtstack.flinkx.util.ClassUtil; -import java.sql.*; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; /** * @author wuhui @@ -37,38 +32,4 @@ public static Connection getConnection(String url, String username, String passw return dbConn; } - - /** - * 获取表列名类型列表 - * @param dbURL jdbc url - * @param username 数据库账号 - * @param password 数据库密码 - * @param databaseInterface DatabaseInterface - * @param table 表名 - * @param sql sql - * @return - */ - public static List analyzeTable(String dbURL, String username, String password, DatabaseInterface databaseInterface, - String table, String sql) { - List descColumnTypeList = new ArrayList<>(); - Connection dbConn = null; - Statement stmt = null; - ResultSet rs = null; - try { - dbConn = getConnection(dbURL, username, password); - stmt = dbConn.createStatement(); - rs = stmt.executeQuery(databaseInterface.getSqlQuerySqlFields(sql)); - ResultSetMetaData rd = rs.getMetaData(); - - for (int i = 1; i <= rd.getColumnCount(); i++) { - descColumnTypeList.add(rd.getColumnTypeName(i)); - } - } catch (SQLException e) { - throw new RuntimeException(e); - } finally { - DbUtil.closeDbResources(rs, stmt, dbConn, false); - } - - return descColumnTypeList; - } } diff --git a/flinkx-test/pom.xml b/flinkx-test/pom.xml index 0d36c0ef17..b37ab81b88 100644 --- a/flinkx-test/pom.xml +++ b/flinkx-test/pom.xml @@ -16,351 +16,432 @@ + ch.qos.logback logback-classic 1.1.7 + com.google.guava guava 19.0 + com.google.code.gson gson 2.7 + hadoop-mapreduce-client-core org.apache.hadoop ${hadoop.version} + hadoop-common org.apache.hadoop ${hadoop.version} + hadoop-hdfs org.apache.hadoop ${hadoop.version} + org.apache.flink flink-metrics-core - ${flink.version} + 1.8.1 + org.apache.flink flink-metrics-prometheus_2.11 ${flink.version} + io.prometheus simpleclient ${prometheus.version} + io.prometheus simpleclient_httpserver ${prometheus.version} + io.prometheus simpleclient_pushgateway ${prometheus.version} + com.dtstack.flinkx flinkx-core - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-stream-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-stream-writer - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-carbondata-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-carbondata-writer - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-db2-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-db2-writer - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-es-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-es-writer - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-ftp-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-ftp-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-hbase-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-hbase-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-hdfs-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-hdfs-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-mongodb-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-mongodb-writer - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-mysql-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-mysql-dreader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-mysql-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-odps-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-odps-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-oracle-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-oracle-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-postgresql-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-postgresql-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-sqlserver-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-sqlserver-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-redis-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-rdb-core - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-rdb-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-rdb-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-gbase-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-gbase-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-binlog-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-hive-writer - ${flinkx.version} + 1.6 + org.apache.hive hive-serde 2.1.0 + com.dtstack.flinkx flinkx-kafka11-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kafka11-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kafka10-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kafka10-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kafka09-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kafka09-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kudu-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kudu-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kafka-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-kafka-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-clickhouse-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-clickhouse-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-polardb-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-polardb-writer - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-phoenix-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-phoenix-writer - ${flinkx.version} + 1.6 + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + test-jar + + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + test-jar + + + + org.apache.derby + derby + 10.14.2.0 + test + com.dtstack.flinkx flinkx-emqx-reader - ${flinkx.version} + 1.6 com.dtstack.flinkx flinkx-emqx-writer - ${flinkx.version} + 1.6 + + com.dtstack.flinkx + flinkx-restapi-writer + 1.6 + + com.dtstack.flinkx flinkx-dm-reader - ${flinkx.version} + 1.6 + com.dtstack.flinkx flinkx-dm-writer - ${flinkx.version} + 1.6 + + + com.dtstack.flinkx + flinkx-greenplum-reader + 1.6 + + + com.dtstack.flinkx + flinkx-greenplum-writer + 1.6 diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java index 33fc1dd035..27883b37a4 100644 --- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java +++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/LocalTest.java @@ -41,6 +41,8 @@ import com.dtstack.flinkx.ftp.writer.FtpWriter; import com.dtstack.flinkx.gbase.reader.GbaseReader; import com.dtstack.flinkx.gbase.writer.GbaseWriter; +import com.dtstack.flinkx.greenplum.reader.GreenplumReader; +import com.dtstack.flinkx.greenplum.writer.GreenplumWriter; import com.dtstack.flinkx.hbase.reader.HbaseReader; import com.dtstack.flinkx.hbase.writer.HbaseWriter; import com.dtstack.flinkx.hdfs.reader.HdfsReader; @@ -73,6 +75,7 @@ import com.dtstack.flinkx.postgresql.writer.PostgresqlWriter; import com.dtstack.flinkx.reader.BaseDataReader; import com.dtstack.flinkx.redis.writer.RedisWriter; +import com.dtstack.flinkx.restapi.writer.RestapiWriter; import com.dtstack.flinkx.sqlserver.reader.SqlserverReader; import com.dtstack.flinkx.sqlserver.writer.SqlserverWriter; import com.dtstack.flinkx.stream.reader.StreamReader; @@ -84,7 +87,9 @@ import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.api.common.time.Time; import org.apache.flink.configuration.Configuration; +import org.apache.flink.core.fs.Path; import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings; +import org.apache.flink.runtime.state.filesystem.FsStateBackend; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSink; @@ -112,21 +117,18 @@ public class LocalTest { public static Logger LOG = LoggerFactory.getLogger(LocalTest.class); public static Configuration conf = new Configuration(); - public static void main(String[] args) throws Exception { - setLogLevel(Level.INFO.toString()); - + public static void main(String[] args) throws Exception{ + setLogLevel(Level.DEBUG.toString()); Properties confProperties = new Properties(); // confProperties.put("flink.checkpoint.interval", "10000"); // confProperties.put("flink.checkpoint.stateBackend", "file:///tmp/flinkx_checkpoint"); -// - conf.setString("akka.ask.timeout", "180 s"); - conf.setString("web.timeout", "100000"); + // conf.setString("metrics.reporter.promgateway.class","org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporter"); -// conf.setString("metrics.reporter.promgateway.host","172.16.8.178"); +// conf.setString("metrics.reporter.promgateway.host","127.0.0.1"); // conf.setString("metrics.reporter.promgateway.port","9091"); -// conf.setString("metrics.reporter.promgateway.jobName","kanata"); +// conf.setString("metrics.reporter.promgateway.jobName","108job"); // conf.setString("metrics.reporter.promgateway.randomJobNameSuffix","true"); -// conf.setString("metrics.reporter.promgateway.deleteOnShutdown","false"); +// conf.setString("metrics.reporter.promgateway.deleteOnShutdown","true"); String jobPath = "D:\\dtstack\\flinkx-all\\flinkx-examples\\examples\\clickhouse_stream.json"; String savePointPath = ""; @@ -134,17 +136,29 @@ public static void main(String[] args) throws Exception { ResultPrintUtil.printResult(result); } - public static JobExecutionResult runJob(File jobFile, Properties confProperties, String savePointPath) throws Exception { + public static JobExecutionResult runJob(File jobFile, Properties confProperties, String savepointPath) throws Exception{ String jobContent = readJob(jobFile); - return runJob(jobContent, confProperties, savePointPath); + return runJob(jobContent, confProperties, savepointPath); } - public static JobExecutionResult runJob(String job, Properties confProperties, String savePointPath) throws Exception { + public static JobExecutionResult runJob(String job, Properties confProperties, String savepointPath) throws Exception{ DataTransferConfig config = DataTransferConfig.parse(job); + + conf.setString("akka.ask.timeout", "180 s"); + conf.setString("web.timeout", String.valueOf(100000)); + MyLocalStreamEnvironment env = new MyLocalStreamEnvironment(conf); + openCheckpointConf(env, confProperties); + env.setParallelism(config.getJob().getSetting().getSpeed().getChannel()); - env.setRestartStrategy(RestartStrategies.noRestart()); + + if (needRestart(config)) { + env.setRestartStrategy(RestartStrategies.fixedDelayRestart( + 10, + Time.of(10, TimeUnit.SECONDS) + )); + } BaseDataReader reader = buildDataReader(config, env); DataStream dataStream = reader.readData(); @@ -163,209 +177,110 @@ public static JobExecutionResult runJob(String job, Properties confProperties, S dataStreamSink.setParallelism(speedConfig.getWriterChannel()); } - if (StringUtils.isNotEmpty(savePointPath)) { - env.setSettings(SavepointRestoreSettings.forPath(savePointPath)); + if(StringUtils.isNotEmpty(savepointPath)){ + env.setSettings(SavepointRestoreSettings.forPath(savepointPath)); } return env.execute(); } + private static boolean needRestart(DataTransferConfig config){ + return config.getJob().getSetting().getRestoreConfig().isRestore(); + } + private static String readJob(File file) { - try(FileInputStream in = new FileInputStream(file);) { + try(FileInputStream in = new FileInputStream(file)) { byte[] fileContent = new byte[(int) file.length()]; in.read(fileContent); return new String(fileContent, StandardCharsets.UTF_8); - } catch (Exception e) { + } catch (Exception e){ throw new RuntimeException(e); } } - private static BaseDataReader buildDataReader(DataTransferConfig config, StreamExecutionEnvironment env) { + private static BaseDataReader buildDataReader(DataTransferConfig config, StreamExecutionEnvironment env){ String readerName = config.getJob().getContent().get(0).getReader().getName(); - BaseDataReader reader; - switch (readerName) { - case PluginNameConstants.STREAM_READER: - reader = new StreamReader(config, env); - break; - case PluginNameConstants.CARBONDATA_READER: - reader = new CarbondataReader(config, env); - break; - case PluginNameConstants.ORACLE_READER: - reader = new OracleReader(config, env); - break; - case PluginNameConstants.POSTGRESQL_READER: - reader = new PostgresqlReader(config, env); - break; - case PluginNameConstants.SQLSERVER_READER: - reader = new SqlserverReader(config, env); - break; - case PluginNameConstants.MYSQLD_READER: - reader = new MysqldReader(config, env); - break; - case PluginNameConstants.MYSQL_READER: - reader = new MysqlReader(config, env); - break; - case PluginNameConstants.DB2_READER: - reader = new Db2Reader(config, env); - break; - case PluginNameConstants.GBASE_READER: - reader = new GbaseReader(config, env); - break; - case PluginNameConstants.ES_READER: - reader = new EsReader(config, env); - break; - case PluginNameConstants.FTP_READER: - reader = new FtpReader(config, env); - break; - case PluginNameConstants.HBASE_READER: - reader = new HbaseReader(config, env); - break; - case PluginNameConstants.HDFS_READER: - reader = new HdfsReader(config, env); - break; - case PluginNameConstants.MONGODB_READER: - reader = new MongodbReader(config, env); - break; - case PluginNameConstants.ODPS_READER: - reader = new OdpsReader(config, env); - break; - case PluginNameConstants.BINLOG_READER: - reader = new BinlogReader(config, env); - break; - case PluginNameConstants.KAFKA09_READER: - reader = new Kafka09Reader(config, env); - break; - case PluginNameConstants.KAFKA10_READER: - reader = new Kafka10Reader(config, env); - break; - case PluginNameConstants.KAFKA11_READER: - reader = new Kafka11Reader(config, env); - break; - case PluginNameConstants.KAFKA_READER: - reader = new KafkaReader(config, env); - break; - case PluginNameConstants.KUDU_READER: - reader = new KuduReader(config, env); - break; - case PluginNameConstants.CLICKHOUSE_READER: - reader = new ClickhouseReader(config, env); - break; - case PluginNameConstants.POLARDB_READER: - reader = new PolardbReader(config, env); - break; - case PluginNameConstants.PHOENIX_READER: - reader = new PhoenixReader(config, env); - break; - case PluginNameConstants.EMQX_READER: - reader = new EmqxReader(config, env); - break; - case PluginNameConstants.DM_READER: - reader = new DmReader(config, env); - break; - default: - throw new IllegalArgumentException("Can not find reader by name:" + readerName); + BaseDataReader reader ; + switch (readerName){ + case PluginNameConstants.STREAM_READER : reader = new StreamReader(config, env); break; + case PluginNameConstants.CARBONDATA_READER : reader = new CarbondataReader(config, env); break; + case PluginNameConstants.ORACLE_READER : reader = new OracleReader(config, env); break; + case PluginNameConstants.POSTGRESQL_READER : reader = new PostgresqlReader(config, env); break; + case PluginNameConstants.SQLSERVER_READER : reader = new SqlserverReader(config, env); break; + case PluginNameConstants.MYSQLD_READER : reader = new MysqldReader(config, env); break; + case PluginNameConstants.MYSQL_READER : reader = new MysqlReader(config, env); break; + case PluginNameConstants.DB2_READER : reader = new Db2Reader(config, env); break; + case PluginNameConstants.GBASE_READER : reader = new GbaseReader(config, env); break; + case PluginNameConstants.ES_READER : reader = new EsReader(config, env); break; + case PluginNameConstants.FTP_READER : reader = new FtpReader(config, env); break; + case PluginNameConstants.HBASE_READER : reader = new HbaseReader(config, env); break; + case PluginNameConstants.HDFS_READER : reader = new HdfsReader(config, env); break; + case PluginNameConstants.MONGODB_READER : reader = new MongodbReader(config, env); break; + case PluginNameConstants.ODPS_READER : reader = new OdpsReader(config, env); break; + case PluginNameConstants.BINLOG_READER : reader = new BinlogReader(config, env); break; + case PluginNameConstants.KAFKA09_READER : reader = new Kafka09Reader(config, env); break; + case PluginNameConstants.KAFKA10_READER : reader = new Kafka10Reader(config, env); break; + case PluginNameConstants.KAFKA11_READER : reader = new Kafka11Reader(config, env); break; + case PluginNameConstants.KAFKA_READER : reader = new KafkaReader(config, env); break; + case PluginNameConstants.KUDU_READER : reader = new KuduReader(config, env); break; + case PluginNameConstants.CLICKHOUSE_READER : reader = new ClickhouseReader(config, env); break; + case PluginNameConstants.POLARDB_READER : reader = new PolardbReader(config, env); break; + case PluginNameConstants.PHOENIX_READER : reader = new PhoenixReader(config, env); break; + case PluginNameConstants.EMQX_READER : reader = new EmqxReader(config, env); break; + case PluginNameConstants.DM_READER : reader = new DmReader(config, env); break; + case PluginNameConstants.GREENPLUM_READER : reader = new GreenplumReader(config, env); break; + default:throw new IllegalArgumentException("Can not find reader by name:" + readerName); } return reader; } - private static BaseDataWriter buildDataWriter(DataTransferConfig config) { + private static BaseDataWriter buildDataWriter(DataTransferConfig config){ String writerName = config.getJob().getContent().get(0).getWriter().getName(); BaseDataWriter writer; - switch (writerName) { - case PluginNameConstants.STREAM_WRITER: - writer = new StreamWriter(config); - break; - case PluginNameConstants.CARBONDATA_WRITER: - writer = new CarbondataWriter(config); - break; - case PluginNameConstants.MYSQL_WRITER: - writer = new MysqlWriter(config); - break; - case PluginNameConstants.SQLSERVER_WRITER: - writer = new SqlserverWriter(config); - break; - case PluginNameConstants.ORACLE_WRITER: - writer = new OracleWriter(config); - break; - case PluginNameConstants.POSTGRESQL_WRITER: - writer = new PostgresqlWriter(config); - break; - case PluginNameConstants.DB2_WRITER: - writer = new Db2Writer(config); - break; - case PluginNameConstants.GBASE_WRITER: - writer = new GbaseWriter(config); - break; - case PluginNameConstants.ES_WRITER: - writer = new EsWriter(config); - break; - case PluginNameConstants.FTP_WRITER: - writer = new FtpWriter(config); - break; - case PluginNameConstants.HBASE_WRITER: - writer = new HbaseWriter(config); - break; - case PluginNameConstants.HDFS_WRITER: - writer = new HdfsWriter(config); - break; - case PluginNameConstants.MONGODB_WRITER: - writer = new MongodbWriter(config); - break; - case PluginNameConstants.ODPS_WRITER: - writer = new OdpsWriter(config); - break; - case PluginNameConstants.REDIS_WRITER: - writer = new RedisWriter(config); - break; - case PluginNameConstants.HIVE_WRITER: - writer = new HiveWriter(config); - break; - case PluginNameConstants.KAFKA09_WRITER: - writer = new Kafka09Writer(config); - break; - case PluginNameConstants.KAFKA10_WRITER: - writer = new Kafka10Writer(config); - break; - case PluginNameConstants.KAFKA11_WRITER: - writer = new Kafka11Writer(config); - break; - case PluginNameConstants.KUDU_WRITER: - writer = new KuduWriter(config); - break; - case PluginNameConstants.CLICKHOUSE_WRITER: - writer = new ClickhouseWriter(config); - break; - case PluginNameConstants.POLARDB_WRITER: - writer = new PolardbWriter(config); - break; - case PluginNameConstants.KAFKA_WRITER: - writer = new KafkaWriter(config); - break; - case PluginNameConstants.PHOENIX_WRITER: - writer = new PhoenixWriter(config); - break; - case PluginNameConstants.EMQX_WRITER: - writer = new EmqxWriter(config); - break; - case PluginNameConstants.DM_WRITER: - writer = new DmWriter(config); - break; - default: - throw new IllegalArgumentException("Can not find writer by name:" + writerName); + switch (writerName){ + case PluginNameConstants.STREAM_WRITER : writer = new StreamWriter(config); break; + case PluginNameConstants.CARBONDATA_WRITER : writer = new CarbondataWriter(config); break; + case PluginNameConstants.MYSQL_WRITER : writer = new MysqlWriter(config); break; + case PluginNameConstants.SQLSERVER_WRITER : writer = new SqlserverWriter(config); break; + case PluginNameConstants.ORACLE_WRITER : writer = new OracleWriter(config); break; + case PluginNameConstants.POSTGRESQL_WRITER : writer = new PostgresqlWriter(config); break; + case PluginNameConstants.DB2_WRITER : writer = new Db2Writer(config); break; + case PluginNameConstants.GBASE_WRITER : writer = new GbaseWriter(config); break; + case PluginNameConstants.ES_WRITER : writer = new EsWriter(config); break; + case PluginNameConstants.FTP_WRITER : writer = new FtpWriter(config); break; + case PluginNameConstants.HBASE_WRITER : writer = new HbaseWriter(config); break; + case PluginNameConstants.HDFS_WRITER : writer = new HdfsWriter(config); break; + case PluginNameConstants.MONGODB_WRITER : writer = new MongodbWriter(config); break; + case PluginNameConstants.ODPS_WRITER : writer = new OdpsWriter(config); break; + case PluginNameConstants.REDIS_WRITER : writer = new RedisWriter(config); break; + case PluginNameConstants.HIVE_WRITER : writer = new HiveWriter(config); break; + case PluginNameConstants.KAFKA09_WRITER : writer = new Kafka09Writer(config); break; + case PluginNameConstants.KAFKA10_WRITER : writer = new Kafka10Writer(config); break; + case PluginNameConstants.KAFKA11_WRITER : writer = new Kafka11Writer(config); break; + case PluginNameConstants.KUDU_WRITER : writer = new KuduWriter(config); break; + case PluginNameConstants.CLICKHOUSE_WRITER : writer = new ClickhouseWriter(config); break; + case PluginNameConstants.POLARDB_WRITER : writer = new PolardbWriter(config); break; + case PluginNameConstants.KAFKA_WRITER : writer = new KafkaWriter(config); break; + case PluginNameConstants.PHOENIX_WRITER : writer = new PhoenixWriter(config); break; + case PluginNameConstants.EMQX_WRITER : writer = new EmqxWriter(config); break; + case PluginNameConstants.RESTAPI_WRITER : writer = new RestapiWriter(config);break; + case PluginNameConstants.DM_WRITER : writer = new DmWriter(config); break; + case PluginNameConstants.GREENPLUM_WRITER : writer = new GreenplumWriter(config); break; + default:throw new IllegalArgumentException("Can not find writer by name:" + writerName); } return writer; } - private static void openCheckpointConf(StreamExecutionEnvironment env, Properties properties) { - if (properties == null) { + private static void openCheckpointConf(StreamExecutionEnvironment env, Properties properties){ + if(properties == null){ return; } - if (properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_INTERVAL_KEY) == null) { + if(properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_INTERVAL_KEY) == null){ return; - } else { + }else{ long interval = Long.parseLong(properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_INTERVAL_KEY).trim()); //start checkpoint every ${interval} @@ -375,7 +290,7 @@ private static void openCheckpointConf(StreamExecutionEnvironment env, Propertie } String checkpointTimeoutStr = properties.getProperty(ConfigConstant.FLINK_CHECKPOINT_TIMEOUT_KEY); - if (checkpointTimeoutStr != null) { + if(checkpointTimeoutStr != null){ long checkpointTimeout = Long.parseLong(checkpointTimeoutStr); //checkpoints have to complete within one min,or are discard env.getCheckpointConfig().setCheckpointTimeout(checkpointTimeout); @@ -387,6 +302,8 @@ private static void openCheckpointConf(StreamExecutionEnvironment env, Propertie env.getCheckpointConfig().setMaxConcurrentCheckpoints(1); env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); + + env.setStateBackend(new FsStateBackend(new Path("file:///tmp/flinkx_checkpoint"))); env.setRestartStrategy(RestartStrategies.failureRateRestart( FAILURE_RATE, Time.of(FAILURE_INTERVAL, TimeUnit.MINUTES), diff --git a/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java b/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java index f0f047e6b5..21d07e7c9e 100644 --- a/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java +++ b/flinkx-test/src/main/java/com/dtstack/flinkx/test/PluginNameConstants.java @@ -53,6 +53,10 @@ public class PluginNameConstants { public static final String SQLSERVER_CDC_READER = "sqlservercdcreader"; public static final String METADATAHIVE2_READER = "metadatahive2reader"; public static final String DM_READER = "dmreader"; + public static final String METADATATIDB_READER = "metadatatidbreader"; + public static final String METADATAORACLE_READER = "metadataoraclereader"; + public static final String METADATAMYSQL_READER = "metadatamysqlreader"; + public static final String GREENPLUM_READER = "greenplumreader"; public static final String STREAM_WRITER = "streamwriter"; public static final String CARBONDATA_WRITER = "carbondatawriter"; @@ -81,4 +85,5 @@ public class PluginNameConstants { public static final String EMQX_WRITER = "emqxwriter"; public static final String RESTAPI_WRITER = "restapiwriter"; public static final String DM_WRITER = "dmwriter"; + public static final String GREENPLUM_WRITER = "greenplumwriter"; } diff --git a/pom.xml b/pom.xml index 8524112890..821397ba25 100644 --- a/pom.xml +++ b/pom.xml @@ -29,7 +29,7 @@ flinkx-clickhouse flinkx-saphana flinkx-teradata - flinkx-cassandra + flinkx-greenplum flinkx-hdfs flinkx-hive @@ -40,22 +40,22 @@ flinkx-phoenix flinkx-carbondata flinkx-kudu + flinkx-cassandra flinkx-redis flinkx-mongodb flinkx-binlog + flinkx-pgwal flinkx-kb flinkx-kafka09 flinkx-kafka10 flinkx-kafka11 flinkx-kafka - flinkx-emqx - flinkx-pgwal flinkx-pulsar - + flinkx-restapi @@ -66,7 +66,7 @@ 2.7.3 4.5.3 ${basedir}/dev - release_1.8.6 + release_1.8.7 @@ -125,9 +125,16 @@ - org.testng - testng - 6.14.3 + junit + junit + 4.12 + test + + + + org.mockito + mockito-core + 3.0.0 test @@ -143,9 +150,24 @@ + + org.jacoco + jacoco-maven-plugin + 0.7.8 + + + + prepare-agent + report + + + + + org.apache.maven.plugins maven-source-plugin + 3.2.1 attach-sources @@ -168,4 +190,20 @@ + + + + org.jacoco + jacoco-maven-plugin + + + + report + + + + + + + \ No newline at end of file