Skip to content

Commit 48afb33

Browse files
committed
read lzo
1 parent 8e2d685 commit 48afb33

File tree

2 files changed

+60
-22
lines changed

2 files changed

+60
-22
lines changed

hadoop-file-type/pom.xml

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
<transformers>
4343
<transformer
4444
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
45-
<mainClass>com.wxmimperio.hadoop.GetFileOrPathSize</mainClass>
45+
<mainClass>com.wxmimperio.hadoop.ReadLzoFile</mainClass>
4646
</transformer>
4747
</transformers>
4848
</configuration>
@@ -66,16 +66,15 @@
6666
<properties>
6767
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
6868
<maven.test.skip>true</maven.test.skip>
69-
<hadoop.version>2.6.0-cdh5.11.1</hadoop.version>
69+
<hadoop.version>2.8.4</hadoop.version>
7070
<hadoop.scope>compile</hadoop.scope>
7171
</properties>
7272

7373
<dependencies>
7474
<dependency>
75-
<groupId>com.hadoop.lzo</groupId>
75+
<groupId>com.hadoop.compression</groupId>
7676
<artifactId>hadoop-lzo</artifactId>
77-
<version>0.4.15</version>
78-
<scope>${hadoop.scope}</scope>
77+
<version>0.4.20</version>
7978
</dependency>
8079
<dependency>
8180
<groupId>org.apache.hadoop</groupId>
@@ -106,21 +105,4 @@
106105
<scope>test</scope>
107106
</dependency>
108107
</dependencies>
109-
110-
<repositories>
111-
<repository>
112-
<id>nexus-public</id>
113-
<url>http://nexus.dw.sdo.com/content/groups/public/</url>
114-
</repository>
115-
116-
<repository>
117-
<id>dw-releases</id>
118-
<url>http://nexus.dw.sdo.com/content/repositories/dw-releases/</url>
119-
</repository>
120-
121-
<repository>
122-
<id>dw-snapshots</id>
123-
<url>http://nexus.dw.sdo.com/content/repositories/dw-snapshots/</url>
124-
</repository>
125-
</repositories>
126108
</project>
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package com.wxmimperio.hadoop;
2+
3+
import com.hadoop.compression.lzo.LzopCodec;
4+
import org.apache.commons.lang.StringUtils;
5+
import org.apache.hadoop.conf.Configuration;
6+
7+
import java.io.*;
8+
import java.util.Arrays;
9+
10+
/**
11+
* Created with IntelliJ IDEA.
12+
*
13+
* @author weiximing
14+
* @version 1.0.0
15+
* @className ReadLzoFile.java
16+
* @description This is the description of ReadLzoFile.java
17+
* @createTime 2021-01-27 17:07:00
18+
*/
19+
public class ReadLzoFile {
20+
private final static String ORC = ".orc";
21+
private final static String LZO = ".lzo";
22+
23+
public static void main(String[] args) throws Exception {
24+
25+
String delimiter = "\t";
26+
27+
String localPath = args[0];
28+
File[] localFiles = new File(localPath).listFiles();
29+
for (File localFile : localFiles) {
30+
if (localFile.isHidden()) {
31+
continue;
32+
}
33+
try (InputStream inputStream = wrapInputStream(new FileInputStream(localFile), localFile.getName());
34+
BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) {
35+
String line;
36+
while ((line = br.readLine()) != null) {
37+
String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, delimiter);
38+
System.out.println(Arrays.toString(fields));
39+
}
40+
}
41+
}
42+
}
43+
44+
private static InputStream wrapInputStream(InputStream origin, String fileName) throws IOException {
45+
if (StringUtils.endsWithIgnoreCase(fileName, LZO)) {
46+
LzopCodec codec = new LzopCodec();
47+
codec.setConf(new Configuration());
48+
return codec.createInputStream(origin);
49+
}
50+
if (StringUtils.endsWithIgnoreCase(fileName, ORC)) {
51+
// todo
52+
return origin;
53+
}
54+
return origin;
55+
}
56+
}

0 commit comments

Comments
 (0)