CentOS 6.8 minimal yum install -y vim tar rsync openssh openssh-clients libaio net-tools
service iptables stop chkconfig iptables off vim /etc/hosts
192.168.2.100 hadoop100 192.168.2.102 hadoop102 192.168.2.103 hadoop103 192.168.2.104 hadoop104 192.168.2.104 hadoop104 192.168.2.105 hadoop105 192.168.2.106 hadoop106
useradd tian passwd tian vim /etc/sudoer
sudo mkdir /opt/module sudo mkdir /opt/software sudo chown tian:tian /opt/module/ /opt/software -R
vim /home/tian/bin/xsync vim /home/tian/bin/copy-ssh vim /home/tian/bin/xcall
#!/bin/bash pcount=$# if ((pcount==0)); then echo no args;exit ;fi p1=$1 fname=`basename $p1 ` echo fname=$fname pdir=`cd -P $(dirname $p1 ); pwd ` echo pdir=$pdir user=`whoami` for ((host=102; host<104; host++)); do echo "\033[31m ------------ hadoop$host ------------ \033[0m" rsync -av $pdir /$fname $user @hadoop$host :$pdir done
#!/bin/bash ssh-keygen for i in hadoop102 hadoop103 hadoop104do echo "\033[31m ======== $i ======== \033[0m" ssh-copy-id $i done
#! /bin/bash for i in hadoop102 hadoop103 hadoop104do echo -e "\033[31m ---------- $i ---------- \033[0m" ssh $i "$*" done
vim /etc/udev/rules.d/70-persistent-net.rules vim /etc/sysconfig/network-scripts/ifcfg-eth0 vim /etc/sysconfig/network
配置多个节点之间的免密连接
Hadoop vim core-site.xml vim hdfs-site.xml vi yarn-site.xml cp mapred-site.xml.template mapred-site.xml vim mapred-site.xml vim /opt/module/hadoop-2.7.2/etc/hadoop/slaves
<property > <name > fs.defaultFS</name > <value > hdfs://hadoop102:9000</value > </property > <property > <name > hadoop.tmp.dir</name > <value > /opt/module/hadoop-2.7.2/data/tmp</value > </property >
<property > <name > dfs.replication</name > <value > 3</value > </property > <property > <name > dfs.namenode.secondary.http-address</name > <value > hadoop104:50090</value > </property >
<property > <name > yarn.nodemanager.aux-services</name > <value > mapreduce_shuffle</value > </property > <property > <name > yarn.resourcemanager.hostname</name > <value > hadoop103</value > </property > <property > <name > yarn.log-aggregation-enable</name > <value > true</value > </property > <property > <name > yarn.log-aggregation.retain-seconds</name > <value > 604800</value > </property >
<property > <name > mapreduce.framework.name</name > <value > yarn</value > </property > <property > <name > mapreduce.jobhistory.address</name > <value > hadoop102:10020</value > </property > <property > <name > mapreduce.jobhistory.webapp.address</name > <value > hadoop102:19888</value > </property >
hadoop102 hadoop103 hadoop104
该文件中添加的内容结尾不允许有空格,文件中不允许有空行
集群上分发配置
群起集群
bin/hdfs namenode -format sbin/start-dfs.sh sbin/mr-jobhistory-daemon.sh start historyserver sbin/start-yarn.sh jpsall
Web端查看SecondaryNameNode .web端查看HDFS文件系统 Web页面查看YARN 查看JobHistory Web查看日志
Zookeeper 安装部署
tar -zxvf zookeeper-3.4.10.tar.gz -C /opt/module/ mkdir -p zkData vi myid xsync myid mv zoo_sample.cfg zoo.cfg vim zoo.cfg xsync zoo.cfg
# 修改数据存储路径配置 dataDir=/opt/module/zookeeper/zkData # 增加如下配置 #######################cluster########################## server.1=hadoop102:2888:3888 server.2=hadoop103:2888:3888 server.3=hadoop104:2888:3888
启停测试
bin/zkServer.sh start jps bin/zkServer.sh status bin/zkCli.sh quit bin/zkServer.sh stop
Flume tar -zxvf apache-flume-1.7.0-bin.tar.gz -C ../module/ mv apache-flume-1.7.0-bin flume mv flume-env.sh.template flume-env.sh vim flume-env.sh
Kafka tar -zxvf kafka_2.11-0.11.0.0.tgz -C /opt/module/ mv kafka_2.11-0.11.0.0/ kafka mkdir logs cd config/vim server.properties xsync /opt/module/kafka/
broker.id =0 delete.topic.enable =true num.network.threads =3 num.io.threads =8 socket.send.buffer.bytes =102400 socket.receive.buffer.bytes =102400 socket.request.max.bytes =104857600 log.dirs =/opt/module/kafka/logs num.partitions =1 num.recovery.threads.per.data.dir =1 log.retention.hours =168 zookeeper.connect =hadoop102:2181,hadoop103:2181,hadoop104:2181
启停测试
kafka-server-start.sh -daemon config/server.properties kafka-server-stop.sh
HBase tar -zxvf hbase-1.3.1-bin.tar.gz -C /opt/module vim hbase-env.sh vim hbase-site.xml vim regionservers mv hbase-1.3.1/ hbase/ ln -s /opt/module/hadoop-2.7.2/etc/hadoop/core-site.xml /opt/module/hbase/conf/core-site.xml ln -s /opt/module/hadoop-2.7.2/etc/hadoop/hdfs-site.xml /opt/module/hbase/conf/hdfs-site.xml xsync /opt/module/hbase/ hbase-daemon.sh start master hbase-daemon.sh start regionserver start-hbase.sh stop-hbase.sh hbase shell
export JAVA_HOME=/opt/module/jdk1.8.0_144 export HBASE_MANAGES_ZK=false
<configuration > <property > <name > hbase.rootdir</name > <value > hdfs://hadoop102:9000/hbase</value > </property > <property > <name > hbase.cluster.distributed</name > <value > true</value > </property > <property > <name > hbase.master.port</name > <value > 16000</value > </property > <property > <name > hbase.zookeeper.quorum</name > <value > hadoop102,hadoop103,hadoop104</value > </property > <property > <name > hbase.zookeeper.property.dataDir</name > <value > /opt/module/zookeeper/zkData</value > </property > </configuration >
hadoop102 hadoop103 hadoop104
hbase页面
Redis tar -zcvf file.tar.gz -C /opt/module yum -y install gcc-c++ make make install vim /etc/profile
bind 192.168.2.102 protected-mode no port 6379 tcp-backlog 511 timeout 0 tcp-keepalive 300 daemonize yes supervised no pidfile /var/run/redis_6379.pid loglevel notice logfile "" databases 16 save 900 1 save 300 10 save 60 10000
MySQL rpm -qa|grep mysql sudo rpm -e --nodeps mysql-libs-5.1.73-8.el6_8.x86_64 sudo rpm -ivh MySQL-client-5.6.24-1.el6.x86_64.rpm sudo rpm -ivh MySQL-server-5.6.24-1.el6.x86_64.rpm mysqladmin --version rpm -qa|grep MySQL sudo service mysql restart cat /root/.mysql_secret mysqladmin -u root password
SET PASSWORD =PASSWORD ('root' );show databases ;use mysql;show tables ;desc user; select User , Host, Password from user ;update user set host='%' where host='localhost' ;delete from user where Host='hadoop101' ;delete from user where Host='127.0.0.1' ;delete from user where Host='::1' ;flush privileges ;\q;
MySQL HA
如果Hive元数据配置到了MySQL,需要更改hive-site.xml中javax.jdo.option.ConnectionURL为虚拟ip
一主一从 | hadoop102 | hadoop103 | hadoop104 | | :——– | :——– | :——– | | Master | Slave | |
sudo vim /usr/my.cnf sudo service mysql restart mysql -uroot -proot
[mysqld] log_bin = mysql-bin server_id = 1
sudo vim /usr/my.cnf sudo service mysql restart mysql -uroot -proot
[mysqld] server_id = 2 relay-log =mysql-relay
CHANGE MASTER TO MASTER_HOST='hadoop102' , MASTER_USER='root' , MASTER_PASSWORD='root' , MASTER_LOG_FILE='mysql-bin.000001' , MASTER_LOG_POS=120 ; start slave ;show slave status \G;
双主 | hadoop102 | hadoop103 | hadoop104 | | :———— | :———— | :——– | | Master(Slave) | Slave(Master) | |
sudo vim /usr/my.cnf sudo service mysql restart mysql -uroot -proot
[mysqld] log_bin = mysql-bin server_id = 2 relay-log =mysql-relay
sudo vim /usr/my.cnf sudo service mysql restart mysql -uroot -proot
[mysqld] server_id = 1 log_bin = mysql-bin relay-log =mysql-relay
CHANGE MASTER TO MASTER_HOST='hadoop102' , MASTER_USER='root' , MASTER_PASSWORD='root' , MASTER_LOG_FILE='mysql-bin.000001' , MASTER_LOG_POS=107 ;
两个节点安装配置Keepalived
sudo yum install -y keepalived sudo chkconfig keepalived on sudo vim /etc/keepalived/keepalived.conf sudo vim /var/lib/mysql/keepalived.sh sudo keepalived start
global_defs { router_id MySQL-ha } vrrp_instance VI_1 { state master #初始状态 interface eth0 #网卡 virtual_router_id 51 #虚拟路由id priority 100 #优先级 advert_int 1 #Keepalived心跳间隔 nopreempt #只在高优先级配置,原master恢复之后不重新上位 authentication { auth_type PASS #认证相关 auth_pass 1111 } virtual_ipaddress { 192.168.1.100 #虚拟ip } } virtual_server 192.168.2.100 3306 { delay_loop 6 persistence_timeout 30 protocol TCP real_server 192.168.2.102 3306 { notify_down /var/lib/mysql/killkeepalived.sh #真实服务故障后调用脚本 TCP_CHECK { connect_timeout 3 #超时时间 nb_get_retry 1 #重试次数 delay_before_retry 1 #重试时间间隔 } } }
#!/bin/bash sudo service keepalived stop
sudo yum install -y keepalived sudo chkconfig keepalived on sudo vim /etc/keepalived/keepalived.conf sudo vim /var/lib/mysql/killkeepalived.sh sudo service keepalived start
! Configuration File for keepalived global_defs { router_id MySQL-ha } vrrp_instance VI_1 { state master #初始状态 interface eth0 #网卡 virtual_router_id 51 #虚拟路由id priority 100 #优先级 advert_int 1 #Keepalived心跳间隔 nopreempt #只在高优先级配置,原master恢复之后不重新上位 authentication { auth_type PASS #认证相关 auth_pass 1111 } virtual_ipaddress { 192.168.2.100 #虚拟ip } } #声明虚拟服务器 virtual_server 192.168.1.100 3306 { delay_loop 6 persistence_timeout 30 protocol TCP #声明真实服务器 real_server 192.168.2.103 3306 { notify_down /var/lib/mysql/killkeepalived.sh #真实服务故障后调用脚本 TCP_CHECK { connect_timeout 3 #超时时间 nb_get_retry 1 #重试次数 delay_before_retry 1 #重试时间间隔 } } }
#! /bin/bash sudo service keepalived stop
Hive tar -zxvf apache-hive-1.2.1-bin.tar.gz -C /opt/module/ mv apache-hive-1.2.1-bin/ hive mv hive-env.sh.template hive-env.sh bin/hadoop fs -mkdir /tmp bin/hadoop fs -mkdir -p /user/hive/warehouse bin/hadoop fs -chmod g+w /tmp bin/hadoop fs -chmod g+w /user/hive/warehouse
Hive元数据配置到MySQL
如果MySQL配置了HA,需要更改hive-site.xml中javax.jdo.option.ConnectionURL为虚拟ip
tar -zxvf mysql-connector-java-5.1.27.tar.gz cp mysql-connector-java-5.1.27-bin.jar /opt/module/hive/lib/
touch hive-site.xml vi hive-site.xml pwd mv hive-log4j.properties.template hive-log4j.properties vim hive-log4j.properties
根据官方文档配置参数官方文档参数 hive-site.xml
Tez tar -zxvf apache-tez-0.9.1-bin.tar.gz -C /opt/module/ mv apache-tez-0.9.1-bin/ tez-0.9.1 vim hive-env.sh vim hive-site.xml
export HADOOP_HOME=/opt/module/hadoop-2.7.2 export HIVE_CONF_DIR=/opt/module/hive/conf export TEZ_HOME=/opt/module/tez-0.9.1 #是你的tez的解压目录 export TEZ_JARS="" for jar in `ls $TEZ_HOME |grep jar`; do export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/$jar done for jar in `ls $TEZ_HOME/lib`; do export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/lib/$jar done export HIVE_AUX_JARS_PATH=/opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar$TEZ_JARS
<property > <name > hive.execution.engine</name > <value > tez</value > </property >
/opt/module/hive/conf目录下添加tez-site.xml
vim $HADOOP_HOME /etc/hadoop/yarn-site.xml
<property > <name > yarn.nodemanager.vmem-check-enabled</name > <value > false</value > </property >
hadoop fs -mkdir /tez hadoop fs -put /opt/module/tez-0.9.1/ /tez hadoop fs -ls /tez /tez/tez-0.9.1 hive
create table student(id int ,name string );insert into student values (1 ,"lisi" );select * from student;
Spark tar -zxvf spark-2.1.1-bin-hadoop2.7.tgz -C /opt/module/ mv spark-2.1.1-bin-hadoop2.7 spark-local bin/spark-submit \ --class org.apache.spark.examples.SparkPi \ --executor-memory 1G \ --total-executor-cores 2 \ ./examples/jars/spark-examples_2.11-2.1.1.jar \ 100 bin/spark-submit \ --class org.apache.spark.examples.SparkPi \ --master local [2] \ ./examples/jars/spark-examples_2.11-2.1.1.jar 100 bin/run-example SparkPi 100
ElasticSearch Canal Phoenix Flink tar -zxvf flink-1.7.2-bin-hadoop27-scala_2.11.tgz ../module vim conf/flink-conf.yaml vim conf/slave
jpbmanager.rpc.address : hadoop102
hadoop102 hadoop103 hadoop104
Flink Web页面
Sqoop tar -zxf sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz -C /opt/module/ mv sqoop-1.4.6.bin__hadoop-2.0.4-alpha/ sqoop/ vim /etc/profile source /etc/profile mv sqoop-env-template.sh sqoop-env.sh vim sqoop-env.sh cp mysql-connector-java-5.1.27-bin.jar /opt/module/sqoop/lib/
export HADOOP_COMMON_HOME=/opt/module/hadoop-2.7.2 export HADOOP_MAPRED_HOME=/opt/module/hadoop-2.7.2 export HIVE_HOME=/opt/module/hive export ZOOKEEPER_HOME=/opt/module/zookeeper export ZOOCFGDIR=/opt/module/zookeeper/conf export HBASE_HOME=/opt/module/hbase
bin/sqoop list-databases --connect jdbc:mysql://hadoop102:3306/ --username root --password root
Azkaban ==下载地址 ==
mkdir /opt/module/azkaban tar -zxvf azkaban-web-server-2.5.0.tar.gz -C /opt/module/azkaban/ tar -zxvf azkaban-executor-server-2.5.0.tar.gz -C /opt/module/azkaban/ tar -zxvf azkaban-sql-script-2.5.0.tar.gz -C /opt/module/azkaban/ mv azkaban-web-2.5.0/ server mv azkaban-executor-2.5.0/ executor mysql -uroot -proot keytool -keystore keystore -alias jetty -genkey -keyalg RSA tzselect
create database azkaban;use azkaban;source /opt/module/azkaban/azkaban-2.5.0/create -all -sql -2.5 .0 .sql;
vim /opt/module/azkaban/server/conf/azkaban.properties vim /opt/module/azkaban/server/conf/azkaban-users.xml
web.resource.dir =/opt/module/azkaban/server/web/ default.timezone.id =Asia/Shanghai user.manager.xml.file =/opt/module/azkaban/server/conf/azkaban-users.xml executor.global.properties =/opt/module/azkaban/executor/conf/global.properties mysql.host =hadoop101 mysql.user =root mysql.password =000000 jetty.keystore =/opt/module/azkaban/server/keystore jetty.password =000000 jetty.keypassword =000000 jetty.truststore =/opt/module/azkaban/server/keystore jetty.trustpassword =000000 mail.sender =Tiankx1003@gmial.com mail.host = stmp.gmail.com mail.user =Tiankx1003@gmail.com mail.password =Tt181024 job.failure.email = joa.success.email =
<azkaban-users > <user username ="azkaban" password ="azkaban" roles ="admin" groups ="azkaban" /> <user username ="metrics" password ="metrics" roles ="metrics" /> <user username ="admin" password ="admin" roles ="admin,metrics" /> <role name ="admin" permissions ="ADMIN" /> <role name ="metrics" permissions ="METRICS" /> </azkaban-users >
vim /opt/module/azkaban/server/conf/azkaban.properties
default.timezone.id =Asia/Shanghai executor.global.properties =/opt/module/azkaban/executor/conf/global.properties mysql.host =hadoop101 mysql.database =azkaban mysql.user =root mysql.password =000000
先启动executor在执行web,避免web server因为找不到executor启动失败
bin/azkaban-executor-start.sh bin/azkaban-web-start.sh jps bin/azkaban-executor-shutdown.sh bin/azkaban-web-shutdown.sh
==Web页面查看 https://hadoop101:8443 ==
Oozie Presto Presto Server
tar -zxvf presto-server-0.196.tar.gz -C /opt/module/ mv presto-server-0.196/ presto mkdir data mkdir etc vim jvm.config vim properties xsync /opt/module/presto xcall vim /opt/module/presto/etc/node.properties xcall vim /opt/module/presto/etc/config.properties
-server -Xmx16G -XX:+UseG1GC -XX:G1HeapRegionSize=32M -XX:+UseGCOverheadLimit -XX:+ExplicitGCInvokesConcurrent -XX:+HeapDumpOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
connector.name =hive-hadoop2 hive.metastore.uri =thrift://hadoop102:9083
node.environment =production node.id =ffffffff-ffff-ffff-ffff-ffffffffffff node.data-dir =/opt/module/presto/data node.environment =production node.id =ffffffff-ffff-ffff-ffff-fffffffffffe node.data-dir =/opt/module/presto/data node.environment =production node.id =ffffffff-ffff-ffff-ffff-fffffffffffd node.data-dir =/opt/module/presto/data
coordinator =true node-scheduler.include-coordinator =false http-server.http.port =8881 query.max-memory =50GB discovery-server.enabled =true discovery.uri =http://hadoop102:8881 coordinator =false http-server.http.port =8881 query.max-memory =50GB discovery.uri =http://hadoop102:8881 coordinator =false http-server.http.port =8881 query.max-memory =50GB discovery.uri =http://hadoop102:8881
xcall /opt/module/presto/launcher run xcall /opt/module/presto/launcher start
Presto Client
mv presto-cli-0.196-executable.jar prestocli chmod +x prestocli ./prestocli --server hadoop102:8881 --catalog hive --schema default
Presto命令行操作相当于hive命令行操作,每个表必须加上schema,如select * from schema.table limit 100
Presto 可视化Client
unzip yanagishima-18.0.zip cd yanagishima-18.0vim yanagishima.properties nohup bin/yanagishima-start.sh >y.log 2>&1 &
jetty.port =7080 presto.datasources =atiguigu-presto presto.coordinator.server.atiguigu-presto =http://hadoop102:8881 catalog.atiguigu-presto =hive schema.atiguigu-presto =default sql.query.engines =presto
查看Web页面http://hadoop102:7080
Druid Impala Kylin 官网地址 http://kylin.apache.org/cn/官方文档 http://kylin.apache.org/cn/docs/下载地址 http://kylin.apache.org/cn/download/
tar -zxvf apache-kylin-2.5.1-bin-hbase1x.tar.gz -C /opt/module/ start-dfs.sh start-yarn.sh mr-jobhistoryserver.sh start historyserver start-zk start-hbase.sh jpsall
Web页面 http://hadoop101:7070/kylin/