Skip to content

Commit 8af23b2

Browse files
committed
feat: Update Serde to Hive 4.0.0
1 parent 53c8c96 commit 8af23b2

File tree

4 files changed

+25
-53
lines changed

4 files changed

+25
-53
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ This is intended as an overview of the major changes
22

33
v5.11.0-SNAPSHOT
44
===
5-
- ...
5+
- Update Serde to Hive 4.0.0
66

77
v5.10.0
88
===

httpdlog/httpdlog-serde/src/main/java/nl/basjes/parse/httpdlog/ApacheHttpdlogDeserializer.java

+13-9
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import nl.basjes.parse.core.exceptions.MissingDissectorsException;
2727
import org.apache.hadoop.conf.Configuration;
2828
import org.apache.hadoop.hive.serde.serdeConstants;
29-
import org.apache.hadoop.hive.serde2.AbstractDeserializer;
29+
import org.apache.hadoop.hive.serde2.AbstractSerDe;
3030
import org.apache.hadoop.hive.serde2.SerDeException;
3131
import org.apache.hadoop.hive.serde2.SerDeStats;
3232
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -101,7 +101,7 @@
101101
// RegexSerDe.INPUT_REGEX, RegexSerDe.OUTPUT_FORMAT_STRING,
102102
// RegexSerDe.INPUT_REGEX_CASE_SENSITIVE
103103
//})
104-
public class ApacheHttpdlogDeserializer extends AbstractDeserializer {
104+
public class ApacheHttpdlogDeserializer extends AbstractSerDe {
105105
private static final Logger LOG = LoggerFactory.getLogger(ApacheHttpdlogDeserializer.class);
106106
private static final String FIELD = "field:";
107107

@@ -134,19 +134,18 @@ static class ColumnToGetterMapping {
134134
private final List<ColumnToGetterMapping> columnToGetterMappings = new ArrayList<>();
135135

136136
@Override
137-
public void initialize(Configuration conf, Properties props)
138-
throws SerDeException {
137+
public void initialize(Configuration conf, Properties tableProperties, Properties partitionProperties) throws SerDeException {
139138

140139
boolean usable = true;
141140
linesInput = 0;
142141
linesBad = 0;
143142

144-
String logformat = props.getProperty("logformat");
143+
String logformat = tableProperties.getProperty("logformat");
145144

146145
Map<String, Set<String>> typeRemappings = new HashMap<>();
147146
List<Dissector> additionalDissectors = new ArrayList<>();
148147

149-
for (Map.Entry<Object, Object> property: props.entrySet()){
148+
for (Map.Entry<Object, Object> property: tableProperties.entrySet()){
150149
String key = (String)property.getKey();
151150

152151
if (key.startsWith(MAP_FIELD)) {
@@ -192,8 +191,8 @@ public void initialize(Configuration conf, Properties props)
192191
// List<String> fieldList;
193192
int numColumns;
194193

195-
String columnNameProperty = props.getProperty(serdeConstants.LIST_COLUMNS);
196-
String columnTypeProperty = props.getProperty(serdeConstants.LIST_COLUMN_TYPES);
194+
String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
195+
String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
197196
List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
198197
List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
199198
assert columnNames.size() == columnTypes.size();
@@ -211,7 +210,7 @@ public void initialize(Configuration conf, Properties props)
211210
String columnName = columnNames.get(columnNr);
212211
TypeInfo columnType = columnTypes.get(columnNr);
213212

214-
String fieldValue = props.getProperty(FIELD + columnName);
213+
String fieldValue = tableProperties.getProperty(FIELD + columnName);
215214

216215
if (fieldValue == null) {
217216
LOG.error("MUST have Field value for column \"{}\".", columnName);
@@ -315,6 +314,11 @@ public Object deserialize(Writable writable) throws SerDeException {
315314
return row;
316315
}
317316

317+
@Override
318+
public Class<? extends Writable> getSerializedClass() {
319+
return null; // This is NOT a Serializer, ONLY a Deserializer!
320+
}
321+
318322
@Override
319323
public SerDeStats getSerDeStats() {
320324
return new SerDeStats();

httpdlog/httpdlog-serde/src/test/java/nl/basjes/parse/httpdlog/TestAllDissectorTypes.java

+5-21
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import org.apache.hadoop.conf.Configuration;
2121
import org.apache.hadoop.hive.serde.serdeConstants;
22-
import org.apache.hadoop.hive.serde2.AbstractDeserializer;
22+
import org.apache.hadoop.hive.serde2.AbstractSerDe;
2323
import org.apache.hadoop.hive.serde2.SerDeException;
2424
import org.apache.hadoop.io.Text;
2525
import org.junit.jupiter.api.Test;
@@ -36,26 +36,10 @@ public class TestAllDissectorTypes {
3636

3737
private static final Logger LOG = LoggerFactory.getLogger(TestAllDissectorTypes.class);
3838

39-
/**
40-
* Returns the union of table and partition properties,
41-
* with partition properties taking precedence.
42-
* @param tblProps table properties
43-
* @param partProps partitioning properties
44-
* @return the overlayed properties
45-
*/
46-
private static Properties createOverlayedProperties(Properties tblProps, Properties partProps) {
47-
Properties props = new Properties();
48-
props.putAll(tblProps);
49-
if (partProps != null) {
50-
props.putAll(partProps);
51-
}
52-
return props;
53-
}
54-
5539
@Test
5640
void testAllDissectorOutputTypes() throws Throwable {
5741
// Create the SerDe
58-
AbstractDeserializer serDe = getTestSerDe();
42+
AbstractSerDe serDe = getTestSerDe();
5943

6044
// Data
6145
Text t = new Text("Doesn't matter");
@@ -96,7 +80,7 @@ void testAllDissectorOutputTypes() throws Throwable {
9680
assertEquals(42D, rowArray.get(++index)); // double_double
9781
}
9882

99-
private AbstractDeserializer getTestSerDe() throws SerDeException {
83+
private AbstractSerDe getTestSerDe() throws SerDeException {
10084
// Create the SerDe
10185
Properties schema = new Properties();
10286
schema.setProperty(serdeConstants.LIST_COLUMNS,
@@ -163,8 +147,8 @@ private AbstractDeserializer getTestSerDe() throws SerDeException {
163147
schema.setProperty("field:double_long", "DOUBLE:double");
164148
schema.setProperty("field:double_double", "DOUBLE:double");
165149

166-
AbstractDeserializer serDe = new ApacheHttpdlogDeserializer();
167-
serDe.initialize(new Configuration(), createOverlayedProperties(schema, null));
150+
AbstractSerDe serDe = new ApacheHttpdlogDeserializer();
151+
serDe.initialize(new Configuration(), schema, null);
168152
return serDe;
169153
}
170154

httpdlog/httpdlog-serde/src/test/java/nl/basjes/parse/httpdlog/TestApacheHttpdlogDeserializer.java

+6-22
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import org.apache.hadoop.conf.Configuration;
2121
import org.apache.hadoop.hive.serde.serdeConstants;
22-
import org.apache.hadoop.hive.serde2.AbstractDeserializer;
22+
import org.apache.hadoop.hive.serde2.AbstractSerDe;
2323
import org.apache.hadoop.hive.serde2.SerDeException;
2424
import org.apache.hadoop.io.Text;
2525
import org.junit.jupiter.api.Test;
@@ -39,22 +39,6 @@ class TestApacheHttpdlogDeserializer {
3939

4040
private static final Logger LOG = LoggerFactory.getLogger(TestApacheHttpdlogDeserializer.class);
4141

42-
/**
43-
* Returns the union of table and partition properties,
44-
* with partition properties taking precedence.
45-
* @param tblProps table properties
46-
* @param partProps partitioning properties
47-
* @return the overlayed properties
48-
*/
49-
private static Properties createOverlayedProperties(Properties tblProps, Properties partProps) {
50-
Properties props = new Properties();
51-
props.putAll(tblProps);
52-
if (partProps != null) {
53-
props.putAll(partProps);
54-
}
55-
return props;
56-
}
57-
5842
private final String logformat = "%h %a %A %l %u %t \"%r\" " +
5943
"%>s %b %p \"%q\" \"%{Referer}i\" %D \"%{User-agent}i\" " +
6044
"\"%{Cookie}i\" " +
@@ -75,7 +59,7 @@ private static Properties createOverlayedProperties(Properties tblProps, Propert
7559
@Test
7660
void testBasicParse() throws Throwable {
7761
// Create the SerDe
78-
AbstractDeserializer serDe = getTestSerDe();
62+
AbstractSerDe serDe = getTestSerDe();
7963

8064
// Data
8165
Text t = new Text(testLogLine);
@@ -97,7 +81,7 @@ void testBasicParse() throws Throwable {
9781

9882
@Test
9983
void testHighFailRatio1() throws Throwable {
100-
AbstractDeserializer serDe = getTestSerDe();
84+
AbstractSerDe serDe = getTestSerDe();
10185

10286
// Data
10387
Text goodLine = new Text(testLogLine);
@@ -127,7 +111,7 @@ void testHighFailRatio1() throws Throwable {
127111
});
128112
}
129113

130-
private AbstractDeserializer getTestSerDe() throws SerDeException {
114+
private AbstractSerDe getTestSerDe() throws SerDeException {
131115
// Create the SerDe
132116
Properties schema = new Properties();
133117
schema.setProperty(serdeConstants.LIST_COLUMNS,
@@ -144,8 +128,8 @@ private AbstractDeserializer getTestSerDe() throws SerDeException {
144128
schema.setProperty("field:screenWidth", "SCREENWIDTH:request.firstline.uri.query.s.width");
145129
schema.setProperty("field:screenHeight", "SCREENHEIGHT:request.firstline.uri.query.s.height");
146130

147-
AbstractDeserializer serDe = new ApacheHttpdlogDeserializer();
148-
serDe.initialize(new Configuration(), createOverlayedProperties(schema, null));
131+
AbstractSerDe serDe = new ApacheHttpdlogDeserializer();
132+
serDe.initialize(new Configuration(), schema, null);
149133
return serDe;
150134
}
151135

0 commit comments

Comments
 (0)