Skip to content

Commit 1baf3ef

Browse files
authored
Add files via upload
1 parent 20ff392 commit 1baf3ef

9 files changed

+553
-0
lines changed

dspark/dockerfile

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
FROM arm64v8/ubuntu:22.04
2+
3+
LABEL maintainer="TrungNghia <[email protected]>"
4+
5+
ENV TZ=Asia/Ho_Chi_Minh
6+
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
7+
8+
RUN apt-get update && apt-get install -y \
9+
curl \
10+
vim \
11+
wget \
12+
software-properties-common \
13+
ssh \
14+
net-tools \
15+
ca-certificates \
16+
python3 \
17+
python3-pip \
18+
python3-numpy \
19+
python3-matplotlib \
20+
python3-scipy \
21+
python3-pandas \
22+
openjdk-8-jdk \
23+
python3-simpy \
24+
rsync \
25+
apt-utils
26+
27+
RUN ssh-keygen -t rsa -f ~/.ssh/id_rsa -P '' && \
28+
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
29+
30+
RUN echo 'root:ren294' | chpasswd && \
31+
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
32+
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config && \
33+
service ssh restart
34+
35+
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64
36+
ENV SPARK_VERSION=3.2.0 \
37+
HADOOP_VERSION=3.2 \
38+
SPARK_HOME=/opt/spark \
39+
PYTHONHASHSEED=1
40+
41+
RUN update-alternatives --install "/usr/bin/python" "python" "$(which python3)" 1
42+
43+
COPY spark-3.2.0-bin-hadoop3.2 /opt/spark
44+
45+
RUN $SPARK_HOME/bin/spark-shell --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.2.0,org.apache.kafka:kafka-clients:3.2.0 -i /dev/null
46+
47+
WORKDIR /opt/spark
48+
ENV SPARK_MASTER_PORT=7077 \
49+
SPARK_MASTER_WEBUI_PORT=8080 \
50+
SPARK_LOG_DIR=/opt/spark/logs \
51+
SPARK_MASTER_LOG=/opt/spark/logs/spark-master.out \
52+
SPARK_WORKER_LOG=/opt/spark/logs/spark-worker.out \
53+
SPARK_WORKER_WEBUI_PORT=8080 \
54+
SPARK_WORKER_PORT=7000 \
55+
SPARK_MASTER="spark://spark-master:7077" \
56+
SPARK_WORKLOAD="master" \
57+
SPARK_HOME=/opt/spark
58+
59+
ENV PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
60+
61+
RUN mkdir -p $SPARK_LOG_DIR && \
62+
touch $SPARK_MASTER_LOG && \
63+
touch $SPARK_WORKER_LOG && \
64+
ln -sf /dev/stdout $SPARK_MASTER_LOG && \
65+
ln -sf /dev/stdout $SPARK_WORKER_LOG
66+
67+
COPY start-spark.sh /
68+
69+
CMD ["/bin/bash", "/start-spark.sh"]

dspark/dockerfile1

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
FROM arm64v8/ubuntu:22.04
2+
3+
LABEL \
4+
maintainer="TrungNghia <[email protected]>"
5+
6+
ENV TZ=Asia/Ho_Chi_Minh
7+
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
8+
9+
RUN apt-get update && apt-get install -y \
10+
curl \
11+
vim \
12+
wget \
13+
software-properties-common \
14+
ssh \
15+
net-tools \
16+
ca-certificates \
17+
python3 \
18+
python3-pip \
19+
python3-numpy \
20+
python3-matplotlib \
21+
python3-scipy \
22+
python3-pandas \
23+
openjdk-8-jdk \
24+
python3-simpy \
25+
rsync\
26+
apt-utils
27+
28+
29+
RUN ssh-keygen -t rsa -f ~/.ssh/id_rsa -P '' && \
30+
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
31+
32+
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64
33+
ENV SPARK_VERSION=3.2.0 \
34+
HADOOP_VERSION=3.2 \
35+
SPARK_HOME=/opt/spark \
36+
PYTHONHASHSEED=1
37+
RUN update-alternatives --install "/usr/bin/python" "python" "$(which python3)" 1
38+
39+
COPY spark-3.2.0-bin-hadoop3.2 /opt/spark
40+
41+
WORKDIR /opt/spark
42+
43+
ENV SPARK_MASTER_PORT=7077 \
44+
SPARK_MASTER_WEBUI_PORT=8080 \
45+
SPARK_LOG_DIR=/opt/spark/logs \
46+
SPARK_MASTER_LOG=/opt/spark/logs/spark-master.out \
47+
SPARK_WORKER_LOG=/opt/spark/logs/spark-worker.out \
48+
SPARK_WORKER_WEBUI_PORT=8080 \
49+
SPARK_WORKER_PORT=7000 \
50+
SPARK_MASTER="spark://spark-master:7077" \
51+
SPARK_WORKLOAD="master" \
52+
SPARK_HOME=/opt/spark
53+
ENV PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
54+
55+
RUN mkdir -p $SPARK_LOG_DIR && \
56+
touch $SPARK_MASTER_LOG && \
57+
touch $SPARK_WORKER_LOG && \
58+
ln -sf /dev/stdout $SPARK_MASTER_LOG && \
59+
ln -sf /dev/stdout $SPARK_WORKER_LOG
60+
61+
COPY start-spark.sh /
62+
63+
CMD ["/bin/bash", "/start-spark.sh"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?xml version="1.0"?>
2+
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one or more
5+
contributor license agreements. See the NOTICE file distributed with
6+
this work for additional information regarding copyright ownership.
7+
The ASF licenses this file to You under the Apache License, Version 2.0
8+
(the "License"); you may not use this file except in compliance with
9+
the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
20+
<allocations>
21+
<pool name="production">
22+
<schedulingMode>FAIR</schedulingMode>
23+
<weight>1</weight>
24+
<minShare>2</minShare>
25+
</pool>
26+
<pool name="test">
27+
<schedulingMode>FIFO</schedulingMode>
28+
<weight>2</weight>
29+
<minShare>3</minShare>
30+
</pool>
31+
</allocations>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Set everything to be logged to the console
19+
log4j.rootCategory=INFO, console
20+
log4j.appender.console=org.apache.log4j.ConsoleAppender
21+
log4j.appender.console.target=System.err
22+
log4j.appender.console.layout=org.apache.log4j.PatternLayout
23+
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24+
25+
# Set the default spark-shell/spark-sql log level to WARN. When running the
26+
# spark-shell/spark-sql, the log level for these classes is used to overwrite
27+
# the root logger's log level, so that the user can have different defaults
28+
# for the shell and regular Spark apps.
29+
log4j.logger.org.apache.spark.repl.Main=WARN
30+
log4j.logger.org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver=WARN
31+
32+
# Settings to quiet third party logs that are too verbose
33+
log4j.logger.org.sparkproject.jetty=WARN
34+
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
35+
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
36+
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
37+
log4j.logger.org.apache.parquet=ERROR
38+
log4j.logger.parquet=ERROR
39+
40+
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
41+
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
42+
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
43+
44+
# For deploying Spark ThriftServer
45+
# SPARK-34128:Suppress undesirable TTransportException warnings involved in THRIFT-4805
46+
log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter
47+
log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message
48+
log4j.appender.console.filter.1.AcceptOnMatch=false

0 commit comments

Comments
 (0)