Skip to content

Commit a2c9430

Browse files
committed
Generation X and printing Y sorted integers into HDFS via YARN application
to make it work pay attention to sequenceiq/hadoop-docker#56 issue description also minor pom-files fixes
1 parent 59b176c commit a2c9430

File tree

11 files changed

+135
-70
lines changed

11 files changed

+135
-70
lines changed

Homework1/pom.xml

-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@
140140
timeout="120000"
141141
usepty="true"
142142
/>
143-
<!--/etc/bootstrap.sh -bash-->
144143
<echo message="Hadoop cluster in demon mode is running" />
145144

146145
<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->

Homework2/pom.xml

-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,6 @@
182182
timeout="120000"
183183
usepty="true"
184184
/>
185-
<!--/etc/bootstrap.sh -bash-->
186185
<echo message="Hadoop cluster in demon mode is running" />
187186

188187
<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->

Homework3/pom.xml

-1
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@
195195
timeout="120000"
196196
usepty="true"
197197
/>
198-
<!--/etc/bootstrap.sh -bash-->
199198
<echo message="Hadoop cluster in demon mode is running" />
200199

201200
<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->

Homework4/pom.xml

-1
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,6 @@
199199
timeout="120000"
200200
usepty="true"
201201
/>
202-
<!--/etc/bootstrap.sh -bash-->
203202
<echo message="Hadoop cluster in demon mode is running" />
204203

205204
<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->

Homework5/gs-yarn-basic-container/src/main/java/hello/container/ContainerApplication.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ public static void main(String[] args) {
1414
}
1515

1616
@Bean
17-
public HelloPojo helloPojo() {
18-
return new HelloPojo();
17+
public DigitsGeneratorSorter createDigitsGeneratorSorter() {
18+
// TODO YarnContainerBuilder
19+
return new DigitsGeneratorSorter();
1920
}
2021

2122
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package hello.container;
2+
3+
import org.apache.commons.logging.Log;
4+
import org.apache.commons.logging.LogFactory;
5+
import org.apache.hadoop.conf.Configuration;
6+
import org.apache.hadoop.fs.FileSystem;
7+
import org.apache.hadoop.fs.Path;
8+
import org.springframework.beans.factory.annotation.Autowired;
9+
import org.springframework.yarn.annotation.OnContainerStart;
10+
import org.springframework.yarn.annotation.YarnComponent;
11+
12+
import java.io.BufferedWriter;
13+
import java.io.IOException;
14+
import java.io.OutputStream;
15+
import java.io.OutputStreamWriter;
16+
import java.net.URI;
17+
import java.net.URISyntaxException;
18+
import java.util.*;
19+
20+
@YarnComponent
21+
public class DigitsGeneratorSorter {
22+
23+
private static final int NUMBER_OF_GENERATED_RANDOM_DIGITS = 100000;
24+
private static final int NUMBER_OF_DIGITS_WRITE_TO_HDFS = 100;
25+
private static final Log LOGGER = LogFactory.getLog(DigitsGeneratorSorter.class);
26+
private static final String OUTPUT_FILE_NAME = "result";
27+
28+
@Autowired
29+
private Configuration configuration;
30+
31+
@OnContainerStart
32+
public void onContainerStart() throws Exception {
33+
List<Integer> integerList = createAndSortDigits(NUMBER_OF_GENERATED_RANDOM_DIGITS);
34+
writeRecordsToHDFS(integerList, NUMBER_OF_DIGITS_WRITE_TO_HDFS);
35+
}
36+
37+
private List<Integer> createAndSortDigits(int numberOfDigits) {
38+
LOGGER.info("Creating list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " sorted digits ...");
39+
Random random = new Random();
40+
List<Integer> list = new ArrayList<Integer>();
41+
for (int i = 0; i < numberOfDigits; i++) {
42+
list.add(Math.abs(random.nextInt()));
43+
}
44+
45+
LOGGER.info("Sorting created list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " ...");
46+
Collections.sort(list);
47+
LOGGER.info("Sorting created list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " complete");
48+
49+
LOGGER.info("Creation list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " sorted digits complete");
50+
return list;
51+
}
52+
53+
private void writeRecordsToHDFS(List<Integer> integerList, int numberOfIntegerToWrite) throws URISyntaxException, IOException {
54+
LOGGER.info("Writing " + numberOfIntegerToWrite + " in sorted list to HDFS in /" + OUTPUT_FILE_NAME + " ...");
55+
// TODO can be enhanced via
56+
// String hostname = System.getenv("HOSTNAME");
57+
58+
String hostname = "172.17.0.2";
59+
Configuration configuration = new Configuration();
60+
FileSystem fileSystem = FileSystem.get(new URI("hdfs://" + hostname + ":9000"), configuration);
61+
Path file = new Path("hdfs://" + hostname + ":9000/" + OUTPUT_FILE_NAME);
62+
if (fileSystem.exists(file)) {
63+
fileSystem.delete(file, true);
64+
}
65+
66+
OutputStream outputStream = fileSystem.create(file);
67+
BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"));
68+
// TODO can be rewritten via streams
69+
for (int i = 0; i < numberOfIntegerToWrite; i++) {
70+
bufferedWriter.write(String.valueOf(integerList.get(i)));
71+
bufferedWriter.write("\n");
72+
}
73+
74+
bufferedWriter.close();
75+
fileSystem.close();
76+
77+
LOGGER.info("Writing " + numberOfIntegerToWrite + " in sorted list to HDFS in /" + OUTPUT_FILE_NAME + " complete");
78+
}
79+
80+
}

Homework5/gs-yarn-basic-container/src/main/java/hello/container/HelloPojo.java

-50
This file was deleted.

Homework5/gs-yarn-basic-dist/execute-job.sh

-12
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,10 @@ echo "NodeManager started. Hadoop cluster initialized. Leaving NameNode from Saf
1212
bin/hdfs dfsadmin -safemode leave
1313
echo "NameNode leaved SafeMode state"
1414

15-
16-
#echo "Port 9000" >> /etc/ssh/sshd_config
17-
#service sshd restart
18-
19-
#cd $HADOOP_PREFIX/etc/hadoop
20-
#sed 's/<value>.*/<value>hdfs:\/\/localhost:9000<\/value>/' core-site.xml.template > core-site.xml
21-
#
22-
#cd $HADOOP_PREFIX/sbin
23-
#sh stop-all.sh
24-
#sh start-all.sh
25-
2615
echo "Staring history server"
2716
./sbin/mr-jobhistory-daemon.sh start historyserver
2817
echo "History server started"
2918

30-
export HADOOP_CLIENT_OPTS="-Xmx4g -Xmn1g -Xms4g $HADOOP_CLIENT_OPTS"
3119
echo "Running a job ..."
3220
java -jar /opt/gs-yarn-basic-client-1.0-SNAPSHOT.jar
3321
echo "Job has finished"

Homework5/gs-yarn-basic-dist/pom.xml

+7-2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@
111111
todir="${remote.host.name}:${remote.host.password}@${remote.host.ip}:/root/" />
112112
<echo message="Coping ${file.name.to.copy.yarn.client} completed" />
113113

114+
<echo message="Coping ${yarn.site.settings.file.name} ..." />
115+
<scp trust="true" failonerror="true" verbose="off" sftp="true"
116+
file="./${yarn.site.settings.file.name}"
117+
todir="${remote.host.name}:${remote.host.password}@${remote.host.ip}:/root/" />
118+
<echo message="Coping ${yarn.site.settings.file.name} completed" />
119+
114120
<!--Coping job executor script -->
115121
<echo message="Coping execute job file: ${job.executor.script.name} ..." />
116122
<scp trust="true" failonerror="true" verbose="off" sftp="true"
@@ -134,11 +140,10 @@
134140
host="${remote.host.ip}"
135141
username="${remote.host.name}"
136142
password="${remote.host.password}"
137-
command="docker run -d -it -v $PWD:/opt --name ${docker.container.name} sequenceiq/hadoop-docker:2.7.1"
143+
command="docker run -d -it -v $PWD:/opt -v $PWD/yarn-site.xml:/usr/local/hadoop/etc/hadoop/yarn-site.xml --name ${docker.container.name} -p 8088:8088 -p 8042:8042 sequenceiq/hadoop-docker:2.7.1"
138144
timeout="120000"
139145
usepty="true"
140146
/>
141-
<!--/etc/bootstrap.sh -bash-->
142147
<echo message="Hadoop cluster in demon mode is running" />
143148

144149
<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
<!--Patched yarn settings for YARN managers to have limitless virtual memory-->
2+
<!--see https://github.com/sequenceiq/hadoop-docker/issues/56 for more info-->
3+
4+
<configuration>
5+
<property>
6+
<name>yarn.nodemanager.aux-services</name>
7+
<value>mapreduce_shuffle</value>
8+
</property>
9+
10+
<property>
11+
<name>yarn.application.classpath</name>
12+
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
13+
</property>
14+
15+
<property>
16+
<description>
17+
Number of seconds after an application finishes before the nodemanager's
18+
DeletionService will delete the application's localized file directory
19+
and log directory.
20+
21+
To diagnose Yarn application problems, set this property's value large
22+
enough (for example, to 600 = 10 minutes) to permit examination of these
23+
directories. After changing the property's value, you must restart the
24+
nodemanager in order for it to have an effect.
25+
26+
The roots of Yarn applications' work directories is configurable with
27+
the yarn.nodemanager.local-dirs property (see below), and the roots
28+
of the Yarn applications' log directories is configurable with the
29+
yarn.nodemanager.log-dirs property (see also below).
30+
</description>
31+
<name>yarn.nodemanager.delete.debug-delay-sec</name>
32+
<value>600</value>
33+
</property>
34+
<property>
35+
<name>yarn.nodemanager.vmem-check-enabled</name>
36+
<value>false</value>
37+
<description>Whether virtual memory limits will be enforced for containers</description>
38+
</property>
39+
<property>
40+
<name>yarn.nodemanager.vmem-pmem-ratio</name>
41+
<value>4</value>
42+
<description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
43+
</property>
44+
</configuration>

Homework5/pom.xml

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
<remote.host.password>12345678</remote.host.password>
3737
<docker.container.name>homework5</docker.container.name>
3838
<job.executor.script.name>execute-job.sh</job.executor.script.name>
39+
<yarn.site.settings.file.name>yarn-site.xml</yarn.site.settings.file.name>
3940

4041
<container.module.name>gs-yarn-basic-container</container.module.name>
4142
<app.master.module.name>gs-yarn-basic-appmaster</app.master.module.name>

0 commit comments

Comments
 (0)