Skip to content

Commit 20ff392

Browse files
authored
Add files via upload
1 parent d036c18 commit 20ff392

36 files changed

+2695
-0
lines changed

dhadoop/dockerfile

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
FROM arm64v8/ubuntu:22.04
2+
3+
LABEL \
4+
maintainer="TrungNghia <[email protected]>" \
5+
version="1.0"
6+
7+
WORKDIR /root
8+
9+
ENV TZ=Asia/Ho_Chi_Minh
10+
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
11+
12+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
13+
apt-utils\
14+
openssh-server \
15+
openjdk-8-jdk \
16+
wget \
17+
vim-gtk
18+
19+
RUN ssh-keygen -t rsa -f ~/.ssh/id_rsa -P '' && \
20+
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
21+
22+
COPY hadoop-3.3.1 /usr/local/hadoop
23+
24+
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64
25+
ENV HADOOP_HOME /usr/local/hadoop
26+
ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
27+
ENV HADOOP_INSTALL $HADOOP_HOME
28+
ENV HADOOP_MAPRED_HOME $HADOOP_HOME
29+
ENV HADOOP_COMMON_HOME $HADOOP_HOME
30+
ENV HADOOP_HDFS_HOME $HADOOP_HOME
31+
ENV HADOOP_YARN_HOME $HADOOP_HOME
32+
ENV HADOOP_COMMON_LIB_NATIVE $HADOOP_HOME/lib/native
33+
ENV PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
34+
ENV HADOOP_OPTS "-Djava.library.path=$HADOOP_HOME/lib/native"
35+
36+
RUN chmod +x $HADOOP_HOME/sbin/start-dfs.sh && \
37+
chmod +x $HADOOP_HOME/sbin/start-yarn.sh
38+
39+
COPY ssh_config ~/.ssh/config
40+
RUN /usr/local/hadoop/bin/hdfs namenode -format
41+
42+
CMD [ "sh", "-c", "service ssh start; bash"]
43+
44+
COPY entrypoint.sh /root/entrypoint.sh
45+
RUN chmod +x /root/entrypoint.sh
46+
ENTRYPOINT ["/root/entrypoint.sh"]

dhadoop/entrypoint.sh

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
service ssh start
3+
start-all.sh
4+
tail -f /dev/null

dhadoop/log4j.properties

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
log4j.appender.stdout.layout: org.apache.log4j.PatternLayout
2+
log4j.rootLogger: INFO, stdout
3+
log4j.appender.stdout: org.apache.log4j.ConsoleAppender
4+
log4j.appender.stdout.layout.ConversionPattern: %d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n

dhadoop/ssh_config

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Host namenode
2+
StrictHostKeyChecking no
3+
4+
Host 0.0.0.0
5+
StrictHostKeyChecking no
6+
7+
Host datanode
8+
StrictHostKeyChecking no
9+
UserKnownHostsFile=/dev/null
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
<!--
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License. See accompanying LICENSE file.
13+
-->
14+
<configuration>
15+
16+
<property>
17+
<name>yarn.scheduler.capacity.maximum-applications</name>
18+
<value>10000</value>
19+
<description>
20+
Maximum number of applications that can be pending and running.
21+
</description>
22+
</property>
23+
24+
<property>
25+
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
26+
<value>0.1</value>
27+
<description>
28+
Maximum percent of resources in the cluster which can be used to run
29+
application masters i.e. controls number of concurrent running
30+
applications.
31+
</description>
32+
</property>
33+
34+
<property>
35+
<name>yarn.scheduler.capacity.resource-calculator</name>
36+
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
37+
<description>
38+
The ResourceCalculator implementation to be used to compare
39+
Resources in the scheduler.
40+
The default i.e. DefaultResourceCalculator only uses Memory while
41+
DominantResourceCalculator uses dominant-resource to compare
42+
multi-dimensional resources such as Memory, CPU etc.
43+
</description>
44+
</property>
45+
46+
<property>
47+
<name>yarn.scheduler.capacity.root.queues</name>
48+
<value>default</value>
49+
<description>
50+
The queues at the this level (root is the root queue).
51+
</description>
52+
</property>
53+
54+
<property>
55+
<name>yarn.scheduler.capacity.root.default.capacity</name>
56+
<value>100</value>
57+
<description>Default queue target capacity.</description>
58+
</property>
59+
60+
<property>
61+
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
62+
<value>1</value>
63+
<description>
64+
Default queue user limit a percentage from 0.0 to 1.0.
65+
</description>
66+
</property>
67+
68+
<property>
69+
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
70+
<value>100</value>
71+
<description>
72+
The maximum capacity of the default queue.
73+
</description>
74+
</property>
75+
76+
<property>
77+
<name>yarn.scheduler.capacity.root.default.state</name>
78+
<value>RUNNING</value>
79+
<description>
80+
The state of the default queue. State can be one of RUNNING or STOPPED.
81+
</description>
82+
</property>
83+
84+
<property>
85+
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
86+
<value>*</value>
87+
<description>
88+
The ACL of who can submit jobs to the default queue.
89+
</description>
90+
</property>
91+
92+
<property>
93+
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
94+
<value>*</value>
95+
<description>
96+
The ACL of who can administer jobs on the default queue.
97+
</description>
98+
</property>
99+
100+
<property>
101+
<name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>
102+
<value>*</value>
103+
<description>
104+
The ACL of who can submit applications with configured priority.
105+
For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
106+
</description>
107+
</property>
108+
109+
<property>
110+
<name>yarn.scheduler.capacity.root.default.maximum-application-lifetime
111+
</name>
112+
<value>-1</value>
113+
<description>
114+
Maximum lifetime of an application which is submitted to a queue
115+
in seconds. Any value less than or equal to zero will be considered as
116+
disabled.
117+
This will be a hard time limit for all applications in this
118+
queue. If positive value is configured then any application submitted
119+
to this queue will be killed after exceeds the configured lifetime.
120+
User can also specify lifetime per application basis in
121+
application submission context. But user lifetime will be
122+
overridden if it exceeds queue maximum lifetime. It is point-in-time
123+
configuration.
124+
Note : Configuring too low value will result in killing application
125+
sooner. This feature is applicable only for leaf queue.
126+
</description>
127+
</property>
128+
129+
<property>
130+
<name>yarn.scheduler.capacity.root.default.default-application-lifetime
131+
</name>
132+
<value>-1</value>
133+
<description>
134+
Default lifetime of an application which is submitted to a queue
135+
in seconds. Any value less than or equal to zero will be considered as
136+
disabled.
137+
If the user has not submitted application with lifetime value then this
138+
value will be taken. It is point-in-time configuration.
139+
Note : Default lifetime can't exceed maximum lifetime. This feature is
140+
applicable only for leaf queue.
141+
</description>
142+
</property>
143+
144+
<property>
145+
<name>yarn.scheduler.capacity.node-locality-delay</name>
146+
<value>40</value>
147+
<description>
148+
Number of missed scheduling opportunities after which the CapacityScheduler
149+
attempts to schedule rack-local containers.
150+
When setting this parameter, the size of the cluster should be taken into account.
151+
We use 40 as the default value, which is approximately the number of nodes in one rack.
152+
Note, if this value is -1, the locality constraint in the container request
153+
will be ignored, which disables the delay scheduling.
154+
</description>
155+
</property>
156+
157+
<property>
158+
<name>yarn.scheduler.capacity.rack-locality-additional-delay</name>
159+
<value>-1</value>
160+
<description>
161+
Number of additional missed scheduling opportunities over the node-locality-delay
162+
ones, after which the CapacityScheduler attempts to schedule off-switch containers,
163+
instead of rack-local ones.
164+
Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
165+
attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
166+
after 40+20=60 missed opportunities.
167+
When setting this parameter, the size of the cluster should be taken into account.
168+
We use -1 as the default value, which disables this feature. In this case, the number
169+
of missed opportunities for assigning off-switch containers is calculated based on
170+
the number of containers and unique locations specified in the resource request,
171+
as well as the size of the cluster.
172+
</description>
173+
</property>
174+
175+
<property>
176+
<name>yarn.scheduler.capacity.queue-mappings</name>
177+
<value></value>
178+
<description>
179+
A list of mappings that will be used to assign jobs to queues
180+
The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
181+
Typically this list will be used to map users to queues,
182+
for example, u:%user:%user maps all users to queues with the same name
183+
as the user.
184+
</description>
185+
</property>
186+
187+
<property>
188+
<name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
189+
<value>false</value>
190+
<description>
191+
If a queue mapping is present, will it override the value specified
192+
by the user? This can be used by administrators to place jobs in queues
193+
that are different than the one specified by the user.
194+
The default is false.
195+
</description>
196+
</property>
197+
198+
<property>
199+
<name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name>
200+
<value>1</value>
201+
<description>
202+
Controls the number of OFF_SWITCH assignments allowed
203+
during a node's heartbeat. Increasing this value can improve
204+
scheduling rate for OFF_SWITCH containers. Lower values reduce
205+
"clumping" of applications on particular nodes. The default is 1.
206+
Legal values are 1-MAX_INT. This config is refreshable.
207+
</description>
208+
</property>
209+
210+
211+
<property>
212+
<name>yarn.scheduler.capacity.application.fail-fast</name>
213+
<value>false</value>
214+
<description>
215+
Whether RM should fail during recovery if previous applications'
216+
queue is no longer valid.
217+
</description>
218+
</property>
219+
220+
<property>
221+
<name>yarn.scheduler.capacity.workflow-priority-mappings</name>
222+
<value></value>
223+
<description>
224+
A list of mappings that will be used to override application priority.
225+
The syntax for this list is
226+
[workflowId]:[full_queue_name]:[priority][,next mapping]*
227+
where an application submitted (or mapped to) queue "full_queue_name"
228+
and workflowId "workflowId" (as specified in application submission
229+
context) will be given priority "priority".
230+
</description>
231+
</property>
232+
233+
<property>
234+
<name>yarn.scheduler.capacity.workflow-priority-mappings-override.enable</name>
235+
<value>false</value>
236+
<description>
237+
If a priority mapping is present, will it override the value specified
238+
by the user? This can be used by administrators to give applications a
239+
priority that is different than the one specified by the user.
240+
The default is false.
241+
</description>
242+
</property>
243+
244+
</configuration>
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?xml version="1.0"?>
2+
<!--
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
-->
18+
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
19+
<xsl:output method="html"/>
20+
<xsl:template match="configuration">
21+
<html>
22+
<body>
23+
<table border="1">
24+
<tr>
25+
<td>name</td>
26+
<td>value</td>
27+
<td>description</td>
28+
</tr>
29+
<xsl:for-each select="property">
30+
<tr>
31+
<td><a name="{name}"><xsl:value-of select="name"/></a></td>
32+
<td><xsl:value-of select="value"/></td>
33+
<td><xsl:value-of select="description"/></td>
34+
</tr>
35+
</xsl:for-each>
36+
</table>
37+
</body>
38+
</html>
39+
</xsl:template>
40+
</xsl:stylesheet>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group
2+
banned.users=#comma separated list of users who can not run applications
3+
min.user.id=1000#Prevent other super-users
4+
allowed.system.users=##comma separated list of system users who CAN run applications
5+
feature.tc.enabled=false
6+
7+
# The configs below deal with settings for Docker
8+
#[docker]
9+
# module.enabled=## enable/disable the module. set to "true" to enable, disabled by default
10+
# docker.binary=/usr/bin/docker
11+
# docker.allowed.capabilities=## comma separated capabilities that can be granted, e.g CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE
12+
# docker.allowed.devices=## comma separated list of devices that can be mounted into a container
13+
# docker.allowed.networks=## comma separated networks that can be used. e.g bridge,host,none
14+
# docker.allowed.ro-mounts=## comma separated volumes that can be mounted as read-only
15+
# docker.allowed.rw-mounts=## comma separate volumes that can be mounted as read-write, add the yarn local and log dirs to this list to run Hadoop jobs
16+
# docker.privileged-containers.enabled=false
17+
# docker.allowed.volume-drivers=## comma separated list of allowed volume-drivers
18+
# docker.no-new-privileges.enabled=## enable/disable the no-new-privileges flag for docker run. Set to "true" to enable, disabled by default
19+
# docker.allowed.runtimes=## comma separated runtimes that can be used.
20+
21+
# The configs below deal with settings for FPGA resource
22+
#[fpga]
23+
# module.enabled=## Enable/Disable the FPGA resource handler module. set to "true" to enable, disabled by default
24+
# fpga.major-device-number=## Major device number of FPGA, by default is 246. Strongly recommend setting this
25+
# fpga.allowed-device-minor-numbers=## Comma separated allowed minor device numbers, empty means all FPGA devices managed by YARN.
26+
27+
# The configs below deal with settings for resource handled by pluggable device plugin framework
28+
#[devices]
29+
# module.enabled=## Enable/Disable the device resource handler module for isolation. Disabled by default.
30+
# devices.denied-numbers=## Blacklisted devices not permitted to use. The format is comma separated "majorNumber:minorNumber". For instance, "195:1,195:2". Leave it empty means default devices reported by device plugin are all allowed.
31+
32+
# The configs below deal with settings for GPU resource
33+
#[gpu]
34+
# module.enabled=## Enable/Disable GPU resource handler module. Set to "true" to enable, disabled by default

0 commit comments

Comments
 (0)