Skip to content

Commit 5a32cfb

Browse files
committed
Merge branch 'feat_1.8_modifyDoc' of http://gitlab.prod.dtstack.cn/dt-insight-engine/flinkStreamSQL into feat_1.8_modifyDoc
2 parents bbb4e9c + b7bc66f commit 5a32cfb

File tree

4 files changed

+407
-129
lines changed

4 files changed

+407
-129
lines changed

docs/plugin/impalaSide.md

Lines changed: 125 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
21
## 1.格式:
3-
```
2+
3+
```sql
44
CREATE TABLE tableName(
55
colName cloType,
66
...
@@ -20,51 +20,65 @@
2020
);
2121
```
2222

23-
# 2.支持版本
23+
## 2.支持版本
24+
2425
2.10.0-cdh5.13.0
25-
26+
2627
## 3.表结构定义
27-
28-
|参数名称|含义|
29-
|----|---|
30-
| tableName | 注册到flink的表名称|
31-
| colName | 列名称|
32-
| colType | 列类型 [colType支持的类型](docs/colType.md)|
33-
| PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息|
34-
| PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开|
35-
36-
## 4.参数
37-
38-
|参数名称|含义|是否必填|默认值|
39-
|----|---|---|----|
40-
| type | 表明维表的类型[impala] |||
41-
| url | 连接postgresql数据库 jdbcUrl |||
42-
| userName | postgresql连接用户名 |||
43-
| password | postgresql连接密码|||
44-
| tableName | postgresql表名称|||
45-
| authMech | 身份验证机制 (0, 1, 2, 3), 暂不支持kerberos ||0|
46-
| principal | kerberos用于登录的principal(authMech=1时独有) |authMech=1为必填|
47-
| keyTabFilePath | keytab文件的路径(authMech=1时独有) |authMech=1为必填 ||
48-
| krb5FilePath | krb5.conf文件路径(authMech=1时独有) |authMech=1为必填||
49-
| krbServiceName | Impala服务器的Kerberos principal名称(authMech=1时独有) |authMech=1为必填||
50-
| krbRealm | Kerberos的域名(authMech=1时独有) || HADOOP.COM |
51-
| enablePartition | 是否支持分区||false|
52-
| partitionfields | 分区字段名|否,enablePartition='true'时为必填||
53-
| partitionFieldTypes | 分区字段类型 |否,enablePartition='true'时为必填||
54-
| partitionValues | 分区值|否,enablePartition='true'时为必填||
55-
| cache | 维表缓存策略(NONE/LRU/ALL)||NONE|
56-
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)||false|
57-
58-
----------
59-
> 缓存策略
60-
* NONE: 不做内存缓存
61-
* LRU:
62-
* cacheSize: 缓存的条目数量
63-
* cacheTTLMs:缓存的过期时间(ms)
64-
65-
66-
## 5.样例
28+
29+
[维表参数信息](docs/plugin/sideParams.md)
30+
31+
impala独有的参数配置
32+
33+
| 参数名称 | 含义 | 是否必填 | 默认值 |
34+
| ------------------- | ------------------------------------------------------------ | --------------------------------- | ---------- |
35+
| type | 表明维表的类型[impala] || |
36+
| url | 连接postgresql数据库 jdbcUrl || |
37+
| userName | postgresql连接用户名 || |
38+
| password | postgresql连接密码 || |
39+
| tableName | postgresql表名称 || |
40+
| authMech | 身份验证机制 (0, 1, 2, 3), 暂不支持kerberos || 0 |
41+
| principal | kerberos用于登录的principal(authMech=1时独有) | authMech=1为必填 | |
42+
| keyTabFilePath | keytab文件的路径(authMech=1时独有) | authMech=1为必填 | |
43+
| krb5FilePath | krb5.conf文件路径(authMech=1时独有) | authMech=1为必填 | |
44+
| krbServiceName | Impala服务器的Kerberos principal名称(authMech=1时独有) | authMech=1为必填 | |
45+
| krbRealm | Kerberos的域名(authMech=1时独有) || HADOOP.COM |
46+
| enablePartition | 是否支持分区 || false |
47+
| partitionfields | 分区字段名 | 否,enablePartition='true'时为必填 | |
48+
| partitionFieldTypes | 分区字段类型 | 否,enablePartition='true'时为必填 | |
49+
| partitionValues | 分区值 | 否,enablePartition='true'时为必填 | |
50+
| cache | 维表缓存策略(NONE/LRU/ALL) || NONE |
51+
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量) || false |
52+
53+
## 4.样例
54+
55+
### ALL全量维表定义
56+
57+
```sql
58+
// 定义全量维表
59+
CREATE TABLE sideTable(
60+
id INT,
61+
name VARCHAR,
62+
PRIMARY KEY(id) ,
63+
PERIOD FOR SYSTEM_TIME
64+
)WITH(
65+
type ='mysql',
66+
url ='jdbc:impala://localhost:21050/mqtest',
67+
userName ='dtstack',
68+
password ='1abc123',
69+
tableName ='test_impala_all',
70+
authMech='3',
71+
cache ='ALL',
72+
cacheTTLMs ='60000',
73+
parallelism ='2',
74+
partitionedJoin='false'
75+
);
76+
6777
```
78+
79+
### LRU异步维表定义
80+
81+
```sql
6882
create table sideTable(
6983
channel varchar,
7084
xccount int,
@@ -84,14 +98,79 @@ create table sideTable(
8498
partitionedJoin='false'
8599
);
86100

101+
```
102+
103+
### MySQL异步维表关联
104+
105+
```sql
106+
CREATE TABLE MyTable(
107+
id int,
108+
name varchar
109+
)WITH(
110+
type ='kafka11',
111+
bootstrapServers ='172.16.8.107:9092',
112+
zookeeperQuorum ='172.16.8.107:2181/kafka',
113+
offsetReset ='latest',
114+
topic ='cannan_yctest01',
115+
timezone='Asia/Shanghai',
116+
enableKeyPartitions ='false',
117+
topicIsPattern ='false',
118+
parallelism ='1'
119+
);
120+
121+
CREATE TABLE MyResult(
122+
id INT,
123+
name VARCHAR
124+
)WITH(
125+
type='impala',
126+
url='jdbc:impala://localhost:21050/mytest',
127+
userName='dtstack',
128+
password='abc123',
129+
tableName ='test_impala_zf',
130+
updateMode ='append',
131+
parallelism ='1',
132+
batchSize ='100',
133+
batchWaitInterval ='1000'
134+
);
135+
136+
CREATE TABLE sideTable(
137+
id INT,
138+
name VARCHAR,
139+
PRIMARY KEY(id) ,
140+
PERIOD FOR SYSTEM_TIME
141+
)WITH(
142+
type='impala',
143+
url='jdbc:impala://localhost:21050/mytest',
144+
userName='dtstack',
145+
password='abc123',
146+
tableName ='test_impala_10',
147+
partitionedJoin ='false',
148+
cache ='LRU',
149+
cacheSize ='10000',
150+
cacheTTLMs ='60000',
151+
asyncPoolSize ='3',
152+
parallelism ='1'
153+
);
154+
155+
insert
156+
into
157+
MyResult
158+
select
159+
m.id,
160+
s.name
161+
from
162+
MyTable m
163+
join
164+
sideTable s
165+
on m.id=s.id;
87166

88167
```
89168

90-
## 6.分区样例
169+
### 分区样例
91170

92171
注:分区字段放在最后面,如下,name是分区字段,放在channel,xccount字段的后面
93172

94-
```
173+
```sql
95174
create table sideTable(
96175
channel varchar,
97176
xccount int,
@@ -118,4 +197,3 @@ create table sideTable(
118197

119198
```
120199

121-

docs/plugin/mongoSide.md

Lines changed: 106 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -26,53 +26,123 @@
2626

2727
## 3.表结构定义
2828

29-
|参数名称|含义|
30-
|----|---|
31-
| tableName | 注册到flink的表名称(可选填;不填默认和hbase对应的表名称相同)|
32-
| colName | 列名称|
33-
| colType | 列类型 [colType支持的类型](docs/colType.md)|
34-
| PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息|
35-
| PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开|
29+
[通用维表参数信息](docs/plugin/sideParams.md)
3630

37-
## 4.参数
38-
39-
|参数名称|含义|是否必填|默认值|
40-
|----|---|---|----|
41-
| type |表明 输出表类型 mongo|||
42-
| address | 连接mongo数据库 jdbcUrl |||
43-
| userName | mongo连接用户名|||
44-
| password | mongo连接密码|||
45-
| tableName | mongo表名称|||
46-
| database | mongo表名称|||
47-
| cache | 维表缓存策略(NONE/LRU)||NONE|
48-
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)||false|
49-
50-
----------
51-
> 缓存策略
52-
* NONE: 不做内存缓存
53-
* LRU:
54-
* cacheSize: 缓存的条目数量
55-
* cacheTTLMs:缓存的过期时间(ms)
56-
31+
32+
mongo相关参数配置:
33+
34+
|参数名称|含义|是否必填|默认值|
35+
|----|---|---|----|
36+
| type |表明 输出表类型 mongo|||
37+
| address | 连接mongo数据库 jdbcUrl |||
38+
| userName | mongo连接用户名|||
39+
| password | mongo连接密码|||
40+
| tableName | mongo表名称|||
41+
| database | mongo表名称|||
42+
43+
## 4.样例
5744

58-
## 5.样例
45+
46+
### 全量维表结构
47+
48+
```aidl
49+
CREATE TABLE source2(
50+
id int,
51+
address VARCHAR,
52+
PERIOD FOR SYSTEM_TIME
53+
)WITH(
54+
type ='mongo',
55+
address ='172.16.8.193:27017',
56+
database ='dtstack',
57+
tableName ='userInfo',
58+
cache ='ALL',
59+
parallelism ='1',
60+
partitionedJoin='false'
61+
);
5962
```
60-
create table sideTable(
61-
CHANNEL varchar,
62-
XCCOUNT int,
63-
PRIMARY KEY(channel),
63+
64+
### 异步维表结构
65+
66+
```aidl
67+
CREATE TABLE source2(
68+
id int,
69+
address VARCHAR,
6470
PERIOD FOR SYSTEM_TIME
65-
)WITH(
71+
)WITH(
6672
type ='mongo',
67-
address ='172.21.32.1:27017,172.21.32.1:27017',
68-
database ='test',
69-
tableName ='sidetest',
73+
address ='172.16.8.193:27017',
74+
database ='dtstack',
75+
tableName ='userInfo',
7076
cache ='LRU',
7177
parallelism ='1',
7278
partitionedJoin='false'
79+
);
80+
81+
```
82+
83+
### 异步维表关联样例
84+
85+
```
86+
87+
CREATE TABLE source1 (
88+
id int,
89+
name VARCHAR
90+
)WITH(
91+
type ='kafka11',
92+
bootstrapServers ='172.16.8.107:9092',
93+
zookeeperQuorum ='172.16.8.107:2181/kafka',
94+
offsetReset ='latest',
95+
topic ='mqTest03',
96+
timezone='Asia/Shanghai',
97+
topicIsPattern ='false'
7398
);
7499
75100
101+
CREATE TABLE source2(
102+
id int,
103+
address VARCHAR,
104+
PERIOD FOR SYSTEM_TIME
105+
)WITH(
106+
type ='mongo',
107+
address ='172.16.8.193:27017',
108+
database ='dtstack',
109+
tableName ='userInfo',
110+
cache ='ALL',
111+
parallelism ='1',
112+
partitionedJoin='false'
113+
);
114+
115+
116+
CREATE TABLE MyResult(
117+
id int,
118+
name VARCHAR,
119+
address VARCHAR,
120+
primary key (id)
121+
)WITH(
122+
type='console'
123+
);
124+
125+
insert into MyResult
126+
select
127+
s1.id,
128+
s1.name,
129+
s2.address
130+
from
131+
source1 s1
132+
left join
133+
source2 s2
134+
on
135+
s1.id = s2.id
136+
137+
76138
```
77139

78140

141+
维表数据:{"id": 1001,"address":"hz""}
142+
143+
源表数据:{"name":"maqi","id":1001}
144+
145+
146+
输出结果: (1001,maqi,hz)
147+
148+

0 commit comments

Comments
 (0)