一、下载
1.下载hadoop
[root@master ~]# cd /usr/local/src/
[root@master src]# wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz
2.官网下载地址(可获最新)
http://hadoop.apache.org/releases.html#Download
二.安装
1.解压
[root@master src]# tar zxvf hadoop-2.7.2.tar.gz
2.移动
[root@master src]# mv hadoop-2.7.2 /usr/local/
3.向所有slave拷贝
[root@master src]# rsync -av /usr/local/hadoop slave1:/usr/local/
[root@master src]# rsync -av /usr/local/hadoop slave2:/usr/local/
[root@master src]# rsync -av /usr/local/hadoop slave3:/usr/local/
[root@master src]# rsync -av /usr/local/hadoop slave4:/usr/local/
[root@master src]# rsync -av /usr/local/hadoop slave5:/usr/local/
[root@master src]# rsync -av /usr/local/hadoop slave6:/usr/local/
三、配置
1.创建目录
[root@master ~]# mkdir -p /data/hadoop
[root@master ~]# cd !$ && mkdir tmp dfs dfs/data dfs/name
在所有slave完成相同操作。
2.配置core-site.xml文件
[root@master hadoop]# vim /usr/local/hadoop/etc/hadoop/core-site.xml
添加以下内容:
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://192.168.0.194:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:/data/hadoop/tmp/</value> </property> <property> <name>io.file.buffer.size</name> <value>131702</value> </property> </configuration> |
注意:
①hdfs后面的IP是master的ip
②file后面跟的路径是创建tmp目录路径
3.配置hdfs-site.xml文件
[root@master hadoop]# vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
添加以下内容:
<configuration> <property> <name>dfs.namenode.name.dir</name> <value>file:/data/hadoop/dfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/data/hadoop/dfs/data</value> </property> <property> <name>dfs.replication</name> <value>2</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>192.168.0.194:9001</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> </configuration> |
4.配置mapred-site.xml文件
[root@master hadoop]# cp /usr/local/hadoop/etc/hadoop/mapred-site.xmll.template /usr/local/hadoop/etc/hadoop/mapred-site.xml
[root@master hadoop]# vim /usr/local/hadoop/etc/hadoop/mapred-site.xml
添加以下内容:
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>192.168.0.194:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>192.168.0.194:19888</value> </property> </configuration> |
5.配置yarn-site.xml文件
[root@master hadoop]# vim /usr/local/hadoop/etc/hadoop/yarn-site.xml
添加以下内容:
<configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>192.168.0.194:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>192.168.0.194:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>192.168.0.194:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address</name> <value>192.168.0.194:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address</name> <value>192.168.0.194:8088</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>2048</value> </property> </configuration> |
注意:
2048表示设置内存大小,建议2G以上,否则在分析是就可卡死界面。
6.修改配置hadoop-env.sh文件
[root@master hadoop]# vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
修改以下内容:
将:export JAVA_HOME=${JAVA_HOME} 改为:export JAVA_HOME=/usr/local/jdk1.7.0_79 |
7.修改配置yarn-env.sh文件
[root@master hadoop]# vim /usr/local/hadoop/etc/hadoop/yarn-env.sh
在第一if前添加以下内容:
export JAVA_HOME=/usr/local/jdk1.7.0_79 |
8.修改配置slaves文件
[root@master hadoop]# vim /usr/local/hadoop/etc/hadoop/slaves
将所有slave的IP添加进入,例如:
192.168.0.2 192.168.0.3 …….. |
9.同步配置文件
[root@master hadoop]# rsync -av /usr/local/hadoop/etc/ slave1:/usr/local/hadoop/etc
[root@master hadoop]# rsync -av /usr/local/hadoop/etc/ slave2:/usr/local/hadoop/etc
[root@master hadoop]# rsync -av /usr/local/hadoop/etc/ slave3:/usr/local/hadoop/etc
[root@master hadoop]# rsync -av /usr/local/hadoop/etc/ slave4:/usr/local/hadoop/etc
[root@master hadoop]# rsync -av /usr/local/hadoop/etc/ slave5:/usr/local/hadoop/etc
[root@master hadoop]# rsync -av /usr/local/hadoop/etc/ slave6:/usr/local/hadoop/etc
四、启动服务
1.初始化
[root@master hadoop]# /usr/local/hadoop/bin/hdfs namenode -format
[root@master hadoop]# echo $?
0
注意:0表示无任何错误,如果大于0的自然数,说明初始化错误,看输出内容解决问题。
2.启动服务(yarn)
[root@master hadoop]# /usr/local/hadoop/sbin/start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /usr/local/hadoop/logs/yarn-root-resourcemanager-master.out
192.168.0.175: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-slave1.out
192.168.0.191: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-slave6.out
192.168.0.176: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-slave2.out
192.168.0.184: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-slave5.out
192.168.0.178: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-slave3.out
192.168.0.183: starting nodemanager, logging to /usr/local/hadoop/logs/yarn-root-nodemanager-slave4.out
注意:2.7.1的版本启动服务使用的脚本是start-all.sh(2.7.2已经弃用了)
3.停止服务(yarn)
[root@master hadoop]# /usr/local/hadoop/sbin/stop-yarn.sh
4.启动服务(hdfs)
[root@master ~]# /usr/local/hadoop/sbin/start-dfs.sh
5.停止服务(hdfs)
[root@master ~]# /usr/local/hadoop/sbin/stop-dfs.sh
6.slave上查看是否启动hadoop
[root@slave1 ~]# ps aux |grep java
[root@slave1 ~]# netstat -nlp |grep java 如果没有yum -y install net-tools
5.浏览器访问
http://192.168.0.194:8088
http://192.168.0.194:50070
五、测试
1.创建目录
[root@master ~]# cd /usr/local/hadoop/
[root@master hadoop]# bin/hdfs dfs -mkdir /123
2.查看目录
[root@master hadoop]# bin/hdfs dfs -ls /
3.拷贝测试文件
[root@master hadoop]# bin/hdfs dfs -copyFromLocal ./LICENSE.txt /123
4.分析文件(统计文件多个少单词)
[root@master hadoop]# bin/hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar wordcount /123/LICENSE.txt /111/123
/111/123:表示分析结果存放的目录
jar:表示使用什么类型文件
2.7.2.jar:表示具体使用的文件,这个最好TAB补全,不同的版本名称不一样。
5.查看分析结果
[root@master hadoop]# bin/hdfs dfs -cat /111/123/part-r-00000
6.删除目录
[root@master hadoop]# bin/hdfs dfs -rm -r /123
六、扩展内容
1.启停单个节点(hdfs)
启动Namenode:
$HADOOP_PREFIX/sbin/hadoop-daemon.sh –config $HADOOP_CONF_DIR –script hdfs start namenode |
停止Namenode:
$HADOOP_PREFIX/sbin/hadoop-daemon.sh –config $HADOOP_CONF_DIR –script hdfs stop namenode |
启动Datanode:
$HADOOP_PREFIX/sbin/hadoop-daemons.sh –config $HADOOP_CONF_DIR –script hdfs start datanode |
停止Datanode:
HADOOP_PREFIX/sbin/hadoop-daemons.sh –config $HADOOP_CONF_DIR –script hdfs stop datanode |
2.启停单个节点(yarn)
启动ResourceManager:
$HADOOP_YARN_HOME/sbin/yarn-daemon.sh –config $HADOOP_CONF_DIR start resourcemanager |
停止ResourceManager:
$HADOOP_YARN_HOME/sbin/yarn-daemon.sh –config $HADOOP_CONF_DIR stop resourcemanager |
启动NodeManager:
$HADOOP_YARN_HOME/sbin/yarn-daemons.sh –config $HADOOP_CONF_DIR start nodemanager |
停止NodeManager:
$HADOOP_YARN_HOME/sbin/yarn-daemons.sh –config $HADOOP_CONF_DIR stop nodemanager |
启动WebAppProxy:
$HADOOP_YARN_HOME/sbin/yarn-daemon.sh –config $HADOOP_CONF_DIR start proxyserver |
停止WebAppProxy:
$HADOOP_YARN_HOME/sbin/yarn-daemon.sh –config $HADOOP_CONF_DIR stop proxyserver |
3.参考文献
官网2.7.2文档网址:http://hadoop.apache.org/docs/stable/
原创文章,作者:奋斗,如若转载,请注明出处:https://blog.ytso.com/191972.html