docker下安装hadoop集群详解

docker下安装hadoop集群,按照如下步骤,可快速安装并启动hadoop集群。

准备如下安装包

hadoop-3.2.1.tar.gzjdk1.8.tar.gz

hadoop启动脚本

hadoop-master.sh

#!/bin/sh HADOOP_HOME=/usr/local/hadoop-3.2.1 /usr/sbin/sshd cd ${HADOOP_HOME} # 首次运行需要执行初始化,之后不需要 ${HADOOP_HOME}/bin/hdfs namenode -format ${HADOOP_HOME}/sbin/start-dfs.sh ${HADOOP_HOME}/sbin/start-yarn.sh tail -f /dev/nullhadoop-slave.sh

#!/bin/sh HADOOP_HOME=/usr/local/hadoop-3.2.1 /usr/sbin/sshd cd ${HADOOP_HOME} ${HADOOP_HOME}/sbin/start-dfs.sh ${HADOOP_HOME}/sbin/start-yarn.sh tail -f /dev/nullhadoop-history.sh

#!/bin/sh HADOOP_HOME=/usr/local/hadoop-3.2.1 /usr/sbin/sshd cd ${HADOOP_HOME} ${HADOOP_HOME}/sbin/start-dfs.sh ${HADOOP_HOME}/sbin/start-yarn.sh ${HADOOP_HOME}/mr-jobhistory-daemon.sh start historyserver tail -f /dev/null

docker文件

Dockerfile文件

FROM centos:latest ARG tar_file ARG tar_name ADD ${tar_file}.tar.gz /usr/local/ ADD hadoop-master.sh /opt/ ADD hadoop-slave.sh /opt/ ADD hadoop-history.sh /opt/ ADD jdk1.8.tar.gz /usr/local/ RUN chmod a+x /opt/hadoop-master.sh /opt/hadoop-slave.sh /opt/hadoop-history.sh \ && yum install -y passwd openssl openssh-server openssh-clients lsof vim which sudo \ && /bin/cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' > /etc/timezone \ && ssh-keygen -t rsa -P '' -f /root/.ssh/id_rsa \ && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys \ && ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key \ && ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key \ && ssh-keygen -t dsa -f /etc/ssh/ssh_host_ecdsa_key \ && ssh-keygen -t rsa -f /etc/ssh/ssh_host_ed25519_key \ && chmod 700 /root/.ssh/ \ && chmod 600 /root/.ssh/authorized_keys \ && cd /usr/local/${tar_name} \ && mkdir -p tmp namenode datanode ENV HDFS_DATANODE_USER root ENV HDFS_NAMENODE_USER root ENV HDFS_SECONDARYNAMENODE_USER root ENV HDFS_DATANODE_SECURE_USER hdfs ENV YARN_RESOURCEMANAGER_USER root ENV HADOOP_SECURE_DN_USER yarn ENV YARN_NODEMANAGER_USER root ENV JAVA_HOME /usr/local/jdk8 ENV CLASSPATH .:$JAVA_HOME/lib ENV PATH $PATH:$JAVA_HOME/bin ENV HADOOP_HOME /usr/local/${tar_name} ENV HADOOP_INSTALL $HADOOP_HOME ENV HADOOP_MAPRED_HOME $HADOOP_HOME ENV HADOOP_COMMON_HOME $HADOOP_HOME ENV HADOOP_HDFS_HOME $HADOOP_HOME ENV YARN_HOME $HADOOP_HOME ENV HADOOP_LIBEXEC_DIR $HADOOP_HOME/libexec ENV HADOOP_COMMON_LIB_NATIVE_DIR $HADOOP_HOME/lib/native ENV PATH $HADOOP_HOME/sbin:$HADOOP_HOME/bin:$JAVA_HOME/bin:$PATH ENV CLASSPATH $($HADOOP_HOME/bin/hadoop classpath):$CLASSPATH # ENTRYPOINT ["/opt/hadoop-master.sh"] # docker build -t kala/hadoop:2.0 .Docker-Compose.yml文件

version: "3.7" services: hadoop-master: build: context: . args: tar_file: hadoop-3.2.1 tar_name: hadoop-3.2.1 image: kala/hadoop:2.0 container_name: kala-hadoop-master-container hostname: hadoopmaster command: - /bin/sh - -c - | /opt/hadoop-master.sh extra_hosts: - "hadoopslave1:172.16.0.16" - "hadoopslave2:172.16.0.17" ports: - "19888:19888" - "18088:18088" - "9870:9870" - "9000:9000" - "8088:8088" volumes: - ../volumes/conf:/usr/local/hadoop-3.2.1/etc/hadoop networks: docker_net: ipv4_address: 172.16.0.15 hadoop-slave1: image: kala/hadoop:2.0 container_name: kala-hadoop-slave1-container hostname: hadoopslave1 command: - /bin/sh - -c - | /opt/hadoop-slave.sh extra_hosts: - "hadoopmaster:172.16.0.15" - "hadoopslave2:172.16.0.17" volumes: - ../volumes/conf:/usr/local/hadoop-3.2.1/etc/hadoop networks: docker_net: ipv4_address: 172.16.0.16 hadoop-slave2: image: kala/hadoop:2.0 container_name: kala-hadoop-slave2-container hostname: hadoopslave2 command: - /bin/sh - -c - | /opt/hadoop-slave.sh extra_hosts: - "hadoopmaster:172.16.0.15" - "hadoopslave1:172.16.0.16" volumes: - ../volumes/conf:/usr/local/hadoop-3.2.1/etc/hadoop networks: docker_net: ipv4_address: 172.16.0.17 networks: docker_net: ipam: driver: default config: - subnet: "172.16.0.0/24" external: name: docker-networks

hadoop配置文件

hadoop-env.sh

#替换jdk的路径 export JAVA_HOME=/usr/local/jdk8yarn-env.sh

#替换jdk的路径 export JAVA_HOME=/usr/local/jdk8core-site.xml

hadoop.tmp.dir /usr/local/hadoop-3.2.1/tmp A base for other temporary directories. fs.defaultFS hdfs://hadoopmaster:18088 hdfs-site.xml

dfs.replication 2 dfs.namenode.http-address hadoopmaster:9870 dfs.namenode.name.dir file:/usr/local/hadoop-3.2.1/namenode dfs.datanode.data.dir file:/usr/local/hadoop-3.2.1/datanode mapred-site.xml

mapreduce.framework.name yarn mapreduce.jobhistory.webapp.address hadoopmaster:19888 mapreduce.application.classpath $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* yarn-site.xml

yarn.resourcemanager.hostname hadoopmaster yarn.nodemanager.aux-services mapreduce_shuffle yarn.nodemanager.env-whitelist JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME workers

hadoopmaster hadoopslave1 hadoopslave2

集群启动操作

启动hadoop docker服务

docker-compose -f Docker-Compose.yml up停止hadoop docker服务

docker-compose -f Docker-Compose.yml down

访问hadoop集群

访问NameNode

http://nn_host:9870/

访问ResourceManager

http://nn_host:8088/

访问MapReduce JobHistory Server

http://nn_host:19888/

后续更多技术分享,尽在今日IT,欢迎关注哦~