Skip to content

Commit

Permalink
PIG-4764: Make Pig work with Hive 3.1 (szita)
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1873947 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
szlta committed Feb 12, 2020
1 parent f89575f commit 9db0980
Show file tree
Hide file tree
Showing 15 changed files with 594 additions and 151 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley via rohini)

IMPROVEMENTS

PIG-4764: Make Pig work with Hive 3.1 (szita)

PIG-5352: Please add OWASP Dependency Check to the build ivy.xml (knoguchi)

PIG-5385: Skip calling extra gc() before spilling large bag when unnecessary (knoguchi)
Expand Down
17 changes: 13 additions & 4 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@
<condition property="isWindows">
<os family="windows"/>
</condition>

<target name="setTezEnv">
<propertyreset name="test.timeout" value="900000" />
<propertyreset name="hadoopversion" value="2" />
Expand Down Expand Up @@ -241,13 +241,15 @@
</if>
<property name="hbaseversion" value="1" />
<property name="sparkversion" value="1" />
<property name="hiveversion" value="1" />

<condition property="src.exclude.dir" value="**/Spark2*.java" else="**/Spark1*.java">
<equals arg1="${sparkversion}" arg2="1"/>
</condition>

<property name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
<property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" />
<property name="src.hive.shims.dir" value="${basedir}/shims/src/hive${hiveversion}" />

<property name="asfrepo" value="https://repository.apache.org"/>
<property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/>
Expand Down Expand Up @@ -353,6 +355,7 @@
<source path="${test.e2e.dir}/udfs/java"/>
<source path="${src.shims.dir}"/>
<source path="${src.shims.test.dir}"/>
<source path="${src.hive.shims.dir}"/>
<source path="tutorial/src"/>
<source path="${test.src.dir}" excluding="e2e/pig/udfs/java/|resources/|perf/"/>
<output path="${build.dir.eclipse-main-classes}" />
Expand Down Expand Up @@ -568,8 +571,8 @@
<echo>*** Building Main Sources ***</echo>
<echo>*** To compile with all warnings enabled, supply -Dall.warnings=1 on command line ***</echo>
<echo>*** Else, you will only be warned about deprecations ***</echo>
<echo>*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ; Spark version used: ${sparkversion} ***</echo>
<compileSources sources="${src.dir};${src.gen.dir};${src.lib.dir}/bzip2;${src.shims.dir}"
<echo>*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ; Spark version used: ${sparkversion} ; Hive version used: ${hiveversion} ***</echo>
<compileSources sources="${src.dir};${src.gen.dir};${src.lib.dir}/bzip2;${src.shims.dir};${src.hive.shims.dir}"
excludes="${src.exclude.dir}" dist="${build.classes}" cp="classpath" warnings="${javac.args.warnings}" />
<copy todir="${build.classes}/META-INF">
<fileset dir="${src.dir}/META-INF" includes="**"/>
Expand Down Expand Up @@ -734,6 +737,7 @@
<fileset dir="${ivy.lib.dir}" includes="metrics-core-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop2*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="hive-*.jar" excludes="hive-shims-0.*.jar, hive-contrib*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="minlog-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="protobuf-java-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="zookeeper-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="accumulo-*.jar" excludes="accumulo-minicluster*.jar"/>
Expand Down Expand Up @@ -1161,6 +1165,10 @@
<fileset dir="${basedir}/shims" />
</copy>

<copy todir="${tar.dist.dir}/hive-shims" includeEmptyDirs="true">
<fileset dir="${basedir}/hive-shims" />
</copy>

<copy todir="${tar.dist.dir}/lib-src" includeEmptyDirs="true">
<fileset dir="${src.lib.dir}" />
</copy>
Expand Down Expand Up @@ -1236,6 +1244,7 @@
<include name="lib-src/**"/>
<include name="license/**"/>
<include name="shims/**"/>
<include name="hive-shims/**"/>
<include name="src/**"/>
<include name="test/**"/>
<exclude name="test/**/*.jar"/>
Expand Down Expand Up @@ -1723,7 +1732,7 @@

<target name="ivy-resolve" depends="ivy-init" unless="ivy.resolved" description="Resolve Ivy dependencies">
<property name="ivy.resolved" value="true"/>
<echo>*** Ivy resolve with Hadoop ${hadoopversion}, Spark ${sparkversion} and HBase ${hbaseversion} ***</echo>
<echo>*** Ivy resolve with Hadoop ${hadoopversion}, Spark ${sparkversion}, HBase ${hbaseversion}, Hive ${hiveversion} ***</echo>
<ivy:resolve log="${loglevel}" settingsRef="${ant.project.name}.ivy.settings" conf="compile"/>
<ivy:report toDir="build/ivy/report"/>
</target>
Expand Down
43 changes: 35 additions & 8 deletions ivy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
<conf name="default" extends="master,runtime"/>
<conf name="runtime" extends="compile,test" description="runtime but not the artifact" />
<!--Private configurations. -->
<conf name="compile" extends="hadoop${hadoopversion},hbase${hbaseversion}" visibility="private" description="compile artifacts"/>
<conf name="compile" extends="hadoop${hadoopversion},hbase${hbaseversion},hive${hiveversion}" visibility="private" description="compile artifacts"/>
<conf name="test" extends="compile" visibility="private"/>
<conf name="javadoc" visibility="private" extends="compile,test"/>
<conf name="releaseaudit" visibility="private"/>
Expand All @@ -43,6 +43,8 @@
<conf name="hbase2" visibility="private"/>
<conf name="spark1" visibility="private" />
<conf name="spark2" visibility="private" />
<conf name="hive1" visibility="private"/>
<conf name="hive3" visibility="private"/>
<conf name="owasp" visibility="private" description="Artifacts required for owasp target"/>
</configurations>
<publications>
Expand Down Expand Up @@ -525,23 +527,48 @@
<!-- for piggybank -->
<dependency org="org.hsqldb" name="hsqldb" rev="${hsqldb.version}"
conf="test->default" />
<dependency org="org.apache.hive" name="hive-exec" rev="${hive.version}" conf="compile->master" changing="true">

<!-- Hive 1 -->
<dependency org="org.apache.hive" name="hive-exec" rev="${hive1.version}" conf="hive1->master" changing="true">
<artifact name="hive-exec" m:classifier="core" />
</dependency>
<dependency org="org.apache.hive" name="hive-serde" rev="${hive1.version}" changing="true"
conf="hive1->master" />
<dependency org="org.apache.hive" name="hive-common" rev="${hive1.version}" changing="true"
conf="hive1->master" />
<dependency org="org.apache.hive.shims" name="hive-shims-common" rev="${hive1.version}" changing="true"
conf="hive1->master" />
<dependency org="org.apache.hive" name="hive-contrib" rev="${hive1.version}" changing="true"
conf="test->master" />
<dependency org="org.apache.hive.shims" name="hive-shims-0.23" rev="${hive1.version}" changing="true"
conf="hive1->master" />

<!-- Hive 3 -->
<dependency org="org.apache.hive" name="hive-exec" rev="${hive.version}" conf="hive3->master" changing="true">
<artifact name="hive-exec" m:classifier="core" />
</dependency>
<dependency org="org.apache.hive" name="hive-serde" rev="${hive.version}" changing="true"
conf="compile->master" />
conf="hive3->master" />
<dependency org="org.apache.hive" name="hive-common" rev="${hive.version}" changing="true"
conf="compile->master" />
conf="hive3->master" />
<dependency org="org.apache.hive.shims" name="hive-shims-common" rev="${hive.version}" changing="true"
conf="compile->master" />
conf="hive3->master" />
<dependency org="org.apache.hive" name="hive-contrib" rev="${hive.version}" changing="true"
conf="test->master" />
<dependency org="org.apache.hive" name="hive-llap-common" rev="${hive.version}" changing="true"
conf="hive3->master" />
<dependency org="org.apache.hive.shims" name="hive-shims-0.23" rev="${hive.version}" changing="true"
conf="hadoop2->master" />
conf="hive3->master" />


<dependency org="org.apache.orc" name="orc-core" rev="${orc.version}" changing="true" conf="hive3->default" />
<dependency org="org.apache.hive" name="hive-storage-api" rev="${hive-storage-api.version}" changing="true" conf="hive3->master" />
<dependency org="org.iq80.snappy" name="snappy" rev="${snappy.version}"
conf="test->master" />
<dependency org="com.esotericsoftware.kryo" name="kryo" rev="${kryo.version}"
conf="compile->master" />
<dependency org="com.esotericsoftware" name="kryo-shaded" rev="${kryo.version}"
conf="hive3->default" />
<dependency org="com.esotericsoftware.kryo" name="kryo" rev="2.22"
conf="hive1->default" />
<dependency org="org.apache.commons" name="commons-lang3" rev="${commons-lang3.version}"
conf="compile->master" />

Expand Down
7 changes: 5 additions & 2 deletions ivy/libraries.properties
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ hadoop-mapreduce.version=2.7.3
hbase1.version=1.2.4
hbase2.version=2.0.0
hsqldb.version=2.4.0
hive.version=1.2.1
hive1.version=1.2.1
hive.version=3.1.2
hive-storage-api.version=2.7.0
orc.version=1.5.6
httpcomponents.version=4.4
jackson.version=1.9.13
jackson-pig-3039-test.version=1.9.9
Expand All @@ -59,7 +62,7 @@ json-simple.version=1.1
junit.version=4.11
jruby.version=1.7.26
jython.version=2.7.1
kryo.version=2.22
kryo.version=3.0.3
rhino.version=1.7R2
antlr.version=3.4
stringtemplate.version=4.0.4
Expand Down
171 changes: 171 additions & 0 deletions shims/src/hive1/org/apache/pig/hive/HiveShims.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.hive;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.Timestamp;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.udf.generic.Collector;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.shims.HadoopShimsSecure;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapreduce.Job;

import com.esotericsoftware.kryo.Serializer;
import com.esotericsoftware.kryo.io.Input;

import org.joda.time.DateTime;

public class HiveShims {
public static String normalizeOrcVersionName(String version) {
return Version.byName(version).getName();
}

public static void addLessThanOpToBuilder(SearchArgument.Builder builder,
String columnName, PredicateLeaf.Type columnType, Object value) {
builder.lessThan(columnName, value);
}

public static void addLessThanEqualsOpToBuilder(SearchArgument.Builder builder,
String columnName, PredicateLeaf.Type columnType, Object value) {
builder.lessThanEquals(columnName, value);
}

public static void addEqualsOpToBuilder(SearchArgument.Builder builder,
String columnName, PredicateLeaf.Type columnType, Object value) {
builder.equals(columnName, value);
}

public static void addBetweenOpToBuilder(SearchArgument.Builder builder,
String columnName, PredicateLeaf.Type columnType, Object low, Object high) {
builder.between(columnName, low, high);
}

public static void addIsNullOpToBuilder(SearchArgument.Builder builder,
String columnName, PredicateLeaf.Type columnType) {
builder.isNull(columnName);
}

public static Class[] getOrcDependentClasses(Class hadoopVersionShimsClass) {
return new Class[]{OrcFile.class, HiveConf.class, AbstractSerDe.class,
org.apache.hadoop.hive.shims.HadoopShims.class, HadoopShimsSecure.class, hadoopVersionShimsClass,
Input.class};
}

public static Class[] getHiveUDFDependentClasses(Class hadoopVersionShimsClass) {
return new Class[]{GenericUDF.class,
PrimitiveObjectInspector.class, HiveConf.class, Serializer.class, ShimLoader.class,
hadoopVersionShimsClass, HadoopShimsSecure.class, Collector.class};
}

public static Object getSearchArgObjValue(Object value) {
if (value instanceof BigInteger) {
return new BigDecimal((BigInteger) value);
} else if (value instanceof DateTime) {
return new Timestamp(((DateTime) value).getMillis());
} else {
return value;
}
}

public static void setOrcConfigOnJob(Job job, Long stripeSize, Integer rowIndexStride, Integer bufferSize, Boolean blockPadding, CompressionKind compress, String versionName) {
if (stripeSize != null) {
job.getConfiguration().setLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname, stripeSize);
}
if (rowIndexStride != null) {
job.getConfiguration().setInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE.varname, rowIndexStride);
}
if (bufferSize != null) {
job.getConfiguration().setInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE.varname, bufferSize);
}
if (blockPadding != null) {
job.getConfiguration().setBoolean(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING.varname, blockPadding);
}
if (compress != null) {
job.getConfiguration().set(HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, compress.toString());
}
if (versionName != null) {
job.getConfiguration().set(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname, versionName);
}
}

public static class PigJodaTimeStampObjectInspector extends
AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector {

public PigJodaTimeStampObjectInspector() {
super(TypeInfoFactory.timestampTypeInfo);
}

@Override
public TimestampWritable getPrimitiveWritableObject(Object o) {
return o == null ? null : new TimestampWritable(new Timestamp(((DateTime) o).getMillis()));
}

@Override
public Timestamp getPrimitiveJavaObject(Object o) {
return o == null ? null : new Timestamp(((DateTime) o).getMillis());
}
}

public static GenericUDAFParameterInfo newSimpleGenericUDAFParameterInfo(ObjectInspector[] arguments,
boolean distinct, boolean allColumns) {
return new SimpleGenericUDAFParameterInfo(arguments, distinct, allColumns);
}

public static class TimestampShim {

public static Timestamp cast(Object ts) {
return (Timestamp) ts;
}

public static long millisFromTimestamp(Object ts) {
return cast(ts).getTime();
}
}

public static class TimestampWritableShim {

public static boolean isAssignableFrom(Object object) {
return object instanceof TimestampWritable;
}

public static TimestampWritable cast(Object ts) {
return (TimestampWritable) ts;
}

public static long millisFromTimestampWritable(Object ts) {
return cast(ts).getTimestamp().getTime();
}
}
}
Loading

0 comments on commit 9db0980

Please sign in to comment.