Skip to content

Commit c598a9b

Browse files
committed
MCOL-5480 LOAD DATA INFILE incorrectly loads values for MEDIUMINT datatype.
Internal memory representation of MEDIUMINT datatype uses 24 bits. This is true for both MariaDB server as well as ColumnStore. MCS plugin code uses TypeHandlerSInt24 and TypeHandlerUInt24 classes to respectively convert the binary representation of the signed and unsigned MEDIUMINT values passed by the server to the plugin. The plugin then outputs the text representation of these values into an open file descriptor which is piped to cpimport for the final load into the MCS db files. The TypeHandlerXInt24 classes were earlier incorrectly using WriteBatchField::ColWriteBatchXInt32() functions which operate on a 4 byte buffer. This resulted in incorrect parsing of MEDIUMINT values. As a fix, we implement WriteBatchField::ColWriteBatchXInt24() functions which correctly handle the 24 bit input buffer used for MEDIUMINT datatype.
1 parent a90535e commit c598a9b

File tree

4 files changed

+316
-2
lines changed

4 files changed

+316
-2
lines changed

datatypes/mcs_datatype.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -1017,6 +1017,8 @@ class WriteBatchField
10171017
virtual size_t ColWriteBatchUInt64(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
10181018
virtual size_t ColWriteBatchSInt32(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
10191019
virtual size_t ColWriteBatchUInt32(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
1020+
virtual size_t ColWriteBatchSInt24(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
1021+
virtual size_t ColWriteBatchUInt24(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
10201022
virtual size_t ColWriteBatchSInt16(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
10211023
virtual size_t ColWriteBatchUInt16(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
10221024
virtual size_t ColWriteBatchSInt8(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0;
@@ -1296,7 +1298,7 @@ class TypeHandlerSInt24 : public TypeHandlerInt
12961298
size_t ColWriteBatch(WriteBatchField* field, const unsigned char* buf, bool nullVal,
12971299
ColBatchWriter& writer) const override
12981300
{
1299-
return field->ColWriteBatchSInt32(buf, nullVal, writer);
1301+
return field->ColWriteBatchSInt24(buf, nullVal, writer);
13001302
}
13011303
int storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const override
13021304
{
@@ -1569,7 +1571,7 @@ class TypeHandlerUInt24 : public TypeHandlerInt
15691571
size_t ColWriteBatch(WriteBatchField* field, const unsigned char* buf, bool nullVal,
15701572
ColBatchWriter& writer) const override
15711573
{
1572-
return field->ColWriteBatchUInt32(buf, nullVal, writer);
1574+
return field->ColWriteBatchUInt24(buf, nullVal, writer);
15731575
}
15741576
int storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const override
15751577
{

dbcon/mysql/ha_mcs_datatype.h

+34
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,40 @@ class WriteBatchFieldMariaDB : public WriteBatchField
453453
return 4;
454454
}
455455

456+
size_t ColWriteBatchSInt24(const uchar* buf, bool nullVal, ColBatchWriter& ci) override
457+
{
458+
if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT))
459+
{
460+
fprintf(ci.filePtr(), "%c", ci.delimiter());
461+
}
462+
else
463+
{
464+
int32_t tmp = (
465+
(*const_cast<uint8_t*>(buf) << 8) |
466+
(*const_cast<uint8_t*>(buf+1) << 16) |
467+
(*const_cast<uint8_t*>(buf+2) << 24)
468+
) >> 8;
469+
fprintf(ci.filePtr(), "%d%c", tmp, ci.delimiter());
470+
}
471+
return 3;
472+
}
473+
474+
size_t ColWriteBatchUInt24(const uchar* buf, bool nullVal, ColBatchWriter& ci) override
475+
{
476+
if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT))
477+
fprintf(ci.filePtr(), "%c", ci.delimiter());
478+
else
479+
{
480+
uint32_t tmp = (
481+
(*const_cast<uint8_t*>(buf)) |
482+
(*const_cast<uint8_t*>(buf+1) << 8) |
483+
(*const_cast<uint8_t*>(buf+2) << 16)
484+
);
485+
fprintf(ci.filePtr(), "%u%c", tmp, ci.delimiter());
486+
}
487+
return 3;
488+
}
489+
456490
size_t ColWriteBatchSInt16(const uchar* buf, bool nullVal, ColBatchWriter& ci) override
457491
{
458492
if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
DROP DATABASE IF EXISTS mcol_5480;
2+
CREATE DATABASE mcol_5480;
3+
USE mcol_5480;
4+
SHOW VARIABLES LIKE 'columnstore_use_import_for_batchinsert';
5+
Variable_name Value
6+
columnstore_use_import_for_batchinsert ON
7+
CREATE TABLE t1 (cmediumint MEDIUMINT, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
8+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
9+
SELECT * FROM t1;
10+
cmediumint ctimestamp ctime
11+
-8388608 2020-08-13 03:14:07 11:58:28
12+
-8388607 2020-08-14 03:14:07 11:58:38
13+
-1000 2020-08-15 03:14:07 11:58:48
14+
-1 2020-08-16 03:14:07 11:58:58
15+
1 2020-08-13 03:14:08 12:58:28
16+
1000 2020-08-13 03:14:09 13:58:28
17+
8388607 2020-08-13 03:14:10 14:58:28
18+
NULL 2020-08-13 03:14:11 15:58:28
19+
DROP TABLE t1;
20+
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT, ctime TIME) engine=columnstore;
21+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
22+
SELECT * FROM t1;
23+
ctimestamp cmediumint ctime
24+
2020-08-13 03:14:07 -8388608 11:58:28
25+
2020-08-14 03:14:07 -8388607 11:58:38
26+
2020-08-15 03:14:07 -1000 11:58:48
27+
2020-08-16 03:14:07 -1 11:58:58
28+
2020-08-13 03:14:08 1 12:58:28
29+
2020-08-13 03:14:09 1000 13:58:28
30+
2020-08-13 03:14:10 8388607 14:58:28
31+
2020-08-13 03:14:11 NULL 15:58:28
32+
DROP TABLE t1;
33+
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
34+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
35+
SELECT * FROM t1;
36+
cmediumint ctimestamp ctime
37+
0 2020-08-12 03:14:08 12:58:18
38+
1 2020-08-13 03:14:08 12:58:28
39+
1000 2020-08-13 03:14:09 13:58:28
40+
8388607 2020-08-13 03:14:10 14:58:28
41+
NULL 2020-08-13 03:14:11 15:58:28
42+
16777215 2020-08-13 03:14:11 15:58:28
43+
DROP TABLE t1;
44+
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT UNSIGNED, ctime TIME) engine=columnstore;
45+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
46+
SELECT * FROM t1;
47+
ctimestamp cmediumint ctime
48+
2020-08-12 03:14:08 0 12:58:18
49+
2020-08-13 03:14:08 1 12:58:28
50+
2020-08-13 03:14:09 1000 13:58:28
51+
2020-08-13 03:14:10 8388607 14:58:28
52+
2020-08-13 03:14:11 NULL 15:58:28
53+
2020-08-13 03:14:11 16777215 15:58:28
54+
DROP TABLE t1;
55+
CREATE TABLE t1 (cmediumint MEDIUMINT) engine=columnstore;
56+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
57+
SELECT * FROM t1;
58+
cmediumint
59+
-8388608
60+
-8388607
61+
-1000
62+
-1
63+
1
64+
1000
65+
8388607
66+
NULL
67+
DROP TABLE t1;
68+
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED) engine=columnstore;
69+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
70+
SELECT * FROM t1;
71+
cmediumint
72+
0
73+
1
74+
1000
75+
8388607
76+
NULL
77+
16777215
78+
DROP TABLE t1;
79+
CREATE TABLE t1 (ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
80+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
81+
SELECT * FROM t1;
82+
ctimestamp ctime
83+
2020-08-13 03:14:07 11:58:28
84+
2020-08-14 03:14:07 11:58:38
85+
2020-08-15 03:14:07 11:58:48
86+
2020-08-16 03:14:07 11:58:58
87+
2020-08-13 03:14:08 12:58:28
88+
2020-08-13 03:14:09 13:58:28
89+
2020-08-13 03:14:10 14:58:28
90+
2020-08-13 03:14:11 15:58:28
91+
DROP TABLE t1;
92+
CREATE TABLE t1 (ctimestamp TIMESTAMP) engine=columnstore;
93+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
94+
SELECT * FROM t1;
95+
ctimestamp
96+
2020-08-13 03:14:07
97+
2020-08-14 03:14:07
98+
2020-08-15 03:14:07
99+
2020-08-16 03:14:07
100+
2020-08-13 03:14:08
101+
2020-08-13 03:14:09
102+
2020-08-13 03:14:10
103+
2020-08-13 03:14:11
104+
DROP TABLE t1;
105+
CREATE TABLE t1 (ctime TIME) engine=columnstore;
106+
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
107+
SELECT * FROM t1;
108+
ctime
109+
11:58:28
110+
11:58:38
111+
11:58:48
112+
11:58:58
113+
12:58:28
114+
13:58:28
115+
14:58:28
116+
15:58:28
117+
DROP TABLE t1;
118+
DROP DATABASE mcol_5480;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#
2+
# MCOL-5480 LDI loads values incorrectly for MEDIUMINT, TIME and TIMESTAMP
3+
# when cpimport is used for batch insert
4+
#
5+
6+
--source ../include/have_columnstore.inc
7+
8+
let $DATADIR=`SELECT @@datadir`;
9+
10+
--disable_warnings
11+
DROP DATABASE IF EXISTS mcol_5480;
12+
--enable_warnings
13+
CREATE DATABASE mcol_5480;
14+
USE mcol_5480;
15+
SHOW VARIABLES LIKE 'columnstore_use_import_for_batchinsert';
16+
17+
# Test for signed medium int with timestamp and time data types.
18+
## Signed medium int as the first column
19+
CREATE TABLE t1 (cmediumint MEDIUMINT, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
20+
--exec rm -f $DATADIR/mcol5480.txt
21+
--exec echo "-8388608|2020-08-13 03:14:07|11:58:28|" >> $DATADIR/mcol5480.txt
22+
--exec echo "-8388607|2020-08-14 03:14:07|11:58:38|" >> $DATADIR/mcol5480.txt
23+
--exec echo "-1000|2020-08-15 03:14:07|11:58:48|" >> $DATADIR/mcol5480.txt
24+
--exec echo "-1|2020-08-16 03:14:07|11:58:58|" >> $DATADIR/mcol5480.txt
25+
--exec echo "1|2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt
26+
--exec echo "1000|2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt
27+
--exec echo "8388607|2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt
28+
--exec echo "\N|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
29+
--replace_result $DATADIR DATADIR
30+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
31+
SELECT * FROM t1;
32+
DROP TABLE t1;
33+
34+
## Signed medium int as the middle column
35+
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT, ctime TIME) engine=columnstore;
36+
--exec rm -f $DATADIR/mcol5480.txt
37+
--exec echo "2020-08-13 03:14:07|-8388608|11:58:28|" >> $DATADIR/mcol5480.txt
38+
--exec echo "2020-08-14 03:14:07|-8388607|11:58:38|" >> $DATADIR/mcol5480.txt
39+
--exec echo "2020-08-15 03:14:07|-1000|11:58:48|" >> $DATADIR/mcol5480.txt
40+
--exec echo "2020-08-16 03:14:07|-1|11:58:58|" >> $DATADIR/mcol5480.txt
41+
--exec echo "2020-08-13 03:14:08|1|12:58:28|" >> $DATADIR/mcol5480.txt
42+
--exec echo "2020-08-13 03:14:09|1000|13:58:28|" >> $DATADIR/mcol5480.txt
43+
--exec echo "2020-08-13 03:14:10|8388607|14:58:28|" >> $DATADIR/mcol5480.txt
44+
--exec echo "2020-08-13 03:14:11|\N|15:58:28|" >> $DATADIR/mcol5480.txt
45+
--replace_result $DATADIR DATADIR
46+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
47+
SELECT * FROM t1;
48+
DROP TABLE t1;
49+
50+
# Test for unsigned medium int with timestamp and time data types.
51+
## Unsigned medium int as the first column
52+
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
53+
--exec rm -f $DATADIR/mcol5480.txt
54+
--exec echo "0|2020-08-12 03:14:08|12:58:18|" >> $DATADIR/mcol5480.txt
55+
--exec echo "1|2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt
56+
--exec echo "1000|2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt
57+
--exec echo "8388607|2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt
58+
--exec echo "\N|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
59+
--exec echo "16777215|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
60+
--replace_result $DATADIR DATADIR
61+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
62+
SELECT * FROM t1;
63+
DROP TABLE t1;
64+
65+
## Unsigned medium int as the middle column
66+
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT UNSIGNED, ctime TIME) engine=columnstore;
67+
--exec rm -f $DATADIR/mcol5480.txt
68+
--exec echo "2020-08-12 03:14:08|0|12:58:18|" >> $DATADIR/mcol5480.txt
69+
--exec echo "2020-08-13 03:14:08|1|12:58:28|" >> $DATADIR/mcol5480.txt
70+
--exec echo "2020-08-13 03:14:09|1000|13:58:28|" >> $DATADIR/mcol5480.txt
71+
--exec echo "2020-08-13 03:14:10|8388607|14:58:28|" >> $DATADIR/mcol5480.txt
72+
--exec echo "2020-08-13 03:14:11|\N|15:58:28|" >> $DATADIR/mcol5480.txt
73+
--exec echo "2020-08-13 03:14:11|16777215|15:58:28|" >> $DATADIR/mcol5480.txt
74+
--replace_result $DATADIR DATADIR
75+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
76+
SELECT * FROM t1;
77+
DROP TABLE t1;
78+
79+
# Test for only signed medium int data type
80+
CREATE TABLE t1 (cmediumint MEDIUMINT) engine=columnstore;
81+
--exec rm -f $DATADIR/mcol5480.txt
82+
--exec echo "-8388608|" >> $DATADIR/mcol5480.txt
83+
--exec echo "-8388607|" >> $DATADIR/mcol5480.txt
84+
--exec echo "-1000|" >> $DATADIR/mcol5480.txt
85+
--exec echo "-1|" >> $DATADIR/mcol5480.txt
86+
--exec echo "1|" >> $DATADIR/mcol5480.txt
87+
--exec echo "1000|" >> $DATADIR/mcol5480.txt
88+
--exec echo "8388607|" >> $DATADIR/mcol5480.txt
89+
--exec echo "\N|" >> $DATADIR/mcol5480.txt
90+
--replace_result $DATADIR DATADIR
91+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
92+
SELECT * FROM t1;
93+
DROP TABLE t1;
94+
95+
# Test for only unsigned medium int data type
96+
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED) engine=columnstore;
97+
--exec rm -f $DATADIR/mcol5480.txt
98+
--exec echo "0|" >> $DATADIR/mcol5480.txt
99+
--exec echo "1|" >> $DATADIR/mcol5480.txt
100+
--exec echo "1000|" >> $DATADIR/mcol5480.txt
101+
--exec echo "8388607|" >> $DATADIR/mcol5480.txt
102+
--exec echo "\N|" >> $DATADIR/mcol5480.txt
103+
--exec echo "16777215|" >> $DATADIR/mcol5480.txt
104+
--replace_result $DATADIR DATADIR
105+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
106+
SELECT * FROM t1;
107+
DROP TABLE t1;
108+
109+
# Test for timestamp and time data types
110+
CREATE TABLE t1 (ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
111+
--exec rm -f $DATADIR/mcol5480.txt
112+
--exec echo "2020-08-13 03:14:07|11:58:28|" >> $DATADIR/mcol5480.txt
113+
--exec echo "2020-08-14 03:14:07|11:58:38|" >> $DATADIR/mcol5480.txt
114+
--exec echo "2020-08-15 03:14:07|11:58:48|" >> $DATADIR/mcol5480.txt
115+
--exec echo "2020-08-16 03:14:07|11:58:58|" >> $DATADIR/mcol5480.txt
116+
--exec echo "2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt
117+
--exec echo "2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt
118+
--exec echo "2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt
119+
--exec echo "2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
120+
--replace_result $DATADIR DATADIR
121+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
122+
SELECT * FROM t1;
123+
DROP TABLE t1;
124+
125+
# Test for only timestamp data type
126+
CREATE TABLE t1 (ctimestamp TIMESTAMP) engine=columnstore;
127+
--exec rm -f $DATADIR/mcol5480.txt
128+
--exec echo "2020-08-13 03:14:07|" >> $DATADIR/mcol5480.txt
129+
--exec echo "2020-08-14 03:14:07|" >> $DATADIR/mcol5480.txt
130+
--exec echo "2020-08-15 03:14:07|" >> $DATADIR/mcol5480.txt
131+
--exec echo "2020-08-16 03:14:07|" >> $DATADIR/mcol5480.txt
132+
--exec echo "2020-08-13 03:14:08|" >> $DATADIR/mcol5480.txt
133+
--exec echo "2020-08-13 03:14:09|" >> $DATADIR/mcol5480.txt
134+
--exec echo "2020-08-13 03:14:10|" >> $DATADIR/mcol5480.txt
135+
--exec echo "2020-08-13 03:14:11|" >> $DATADIR/mcol5480.txt
136+
--replace_result $DATADIR DATADIR
137+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
138+
SELECT * FROM t1;
139+
DROP TABLE t1;
140+
141+
# Test for only time data type
142+
CREATE TABLE t1 (ctime TIME) engine=columnstore;
143+
--exec rm -f $DATADIR/mcol5480.txt
144+
--exec echo "11:58:28|" >> $DATADIR/mcol5480.txt
145+
--exec echo "11:58:38|" >> $DATADIR/mcol5480.txt
146+
--exec echo "11:58:48|" >> $DATADIR/mcol5480.txt
147+
--exec echo "11:58:58|" >> $DATADIR/mcol5480.txt
148+
--exec echo "12:58:28|" >> $DATADIR/mcol5480.txt
149+
--exec echo "13:58:28|" >> $DATADIR/mcol5480.txt
150+
--exec echo "14:58:28|" >> $DATADIR/mcol5480.txt
151+
--exec echo "15:58:28|" >> $DATADIR/mcol5480.txt
152+
--replace_result $DATADIR DATADIR
153+
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
154+
SELECT * FROM t1;
155+
DROP TABLE t1;
156+
157+
--exec rm -f $DATADIR/mcol5480.txt
158+
--disable_warnings
159+
DROP DATABASE mcol_5480;
160+
--enable_warnings

0 commit comments

Comments
 (0)