01 hadoop伪分布式模型
配置环境
node1: 192.168.1.131 CentOS Linux release 7.2
1、安装hadoop
#安装
[root@node1 ~]# vim /etc/profile.d/java.sh
export JAVA_HOME=/usr
[root@node1 ~]# yum install java-1.7.0-openjdk-devel.x86_64
[root@node1 ~]# mkdir /bdapps
[root@node1 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/
[root@node1 ~]# cd /bdapps/
[root@node1 bdapps]# ll
total 0
lrwxrwxrwx 1 root root 13 Jan 7 09:24 hadoop -> hadoop-2.6.2/
drwxr-xr-x 9 10011 10011 139 Oct 22 2015 hadoop-2.6.2
[root@node1 hadoop]# vim /etc/profile.d/hadoop.sh
export HADOOP_PREFIX=/bdapps/hadoop
export PATH=$PATH:${HADOOP_PREFIX}/bin:${HADOOP_PREFIX}/sbin
export HADOOP_YARN_HOME=${HADOOP_PREFIX}
export HADOOP_MAPPERD_HOME=${HADOOP_PREFIX}
export HADOOP_COMMON_HOME=${HADOOP_PREFIX}
export HADOOP_HDFS_HOME=${HADOOP_PREFIX}
[root@node1 hadoop]# . /etc/profile.d/hadoop.sh
[root@node1 hadoop]# pwd
/bdapps/hadoop
[root@node1 hadoop]# ls
bin etc include lib libexec LICENSE.txt NOTICE.txt README.txt sbin share
[root@node1 hadoop]# ls bin/
container-executor hdfs mapred.cmd yarn
hadoop hdfs.cmd rcc yarn.cmd
hadoop.cmd mapred test-container-executor
[root@node1 hadoop]# ls include/
hdfs.h Pipes.hh SerialUtils.hh StringUtils.hh TemplateFactory.hh
[root@node1 hadoop]# ls lib
native
[root@node1 hadoop]# ls libexec/
hadoop-config.cmd hdfs-config.sh mapred-config.cmd yarn-config.sh
hadoop-config.sh httpfs-config.sh mapred-config.sh
hdfs-config.cmd kms-config.sh yarn-config.cmd
[root@node1 hadoop]# ls sbin/
distribute-exclude.sh start-all.cmd stop-balancer.sh
hadoop-daemon.sh start-all.sh stop-dfs.cmd
hadoop-daemons.sh start-balancer.sh stop-dfs.sh
hdfs-config.cmd start-dfs.cmd stop-secure-dns.sh
hdfs-config.sh start-dfs.sh stop-yarn.cmd
httpfs.sh start-secure-dns.sh stop-yarn.sh
kms.sh start-yarn.cmd yarn-daemon.sh
mr-jobhistory-daemon.sh start-yarn.sh yarn-daemons.sh
refresh-namenodes.sh stop-all.cmd
slaves.sh stop-all.sh
[root@node1 hadoop]# ls etc/hadoop/
capacity-scheduler.xml httpfs-env.sh mapred-env.sh
configuration.xsl httpfs-log4j.properties mapred-queues.xml.template
container-executor.cfg httpfs-signature.secret mapred-site.xml.template
core-site.xml httpfs-site.xml slaves
hadoop-env.cmd kms-acls.xml ssl-client.xml.example
hadoop-env.sh kms-env.sh ssl-server.xml.example
hadoop-metrics2.properties kms-log4j.properties yarn-env.cmd
hadoop-metrics.properties kms-site.xml yarn-env.sh
hadoop-policy.xml log4j.properties yarn-site.xml
hdfs-site.xml mapred-env.cmd
#创建运行Hadoop进程的用户和相关目录
[root@node1 hadoop]# groupadd hadoop
[root@node1 hadoop]# useradd -g hadoop yarn
[root@node1 hadoop]# useradd -g hadoop hdfs
[root@node1 hadoop]# useradd -g hadoop mapred
#创建数据和日志目录
[root@node1 hadoop]# mkdir -p /data/hadoop/hdfs/{nn,snn,dn}
[root@node1 hadoop]# chown -R hdfs:hadoop /data/hadoop/hdfs/
[root@node1 hadoop]# ll /data/hadoop/hdfs/
total 0
drwxr-xr-x 2 hdfs hadoop 6 Jan 7 09:48 dn
drwxr-xr-x 2 hdfs hadoop 6 Jan 7 09:48 nn
drwxr-xr-x 2 hdfs hadoop 6 Jan 7 09:48 snn
[root@node1 hadoop]# mkdir logs
[root@node1 hadoop]# chmod g+w logs
[root@node1 hadoop]# chown -R yarn:hadoop ./*
[root@node1 hadoop]# ll
total 36
drwxr-xr-x 2 yarn hadoop 4096 Oct 22 2015 bin
drwxr-xr-x 3 yarn hadoop 19 Oct 22 2015 etc
drwxr-xr-x 2 yarn hadoop 101 Oct 22 2015 include
drwxr-xr-x 3 yarn hadoop 19 Oct 22 2015 lib
drwxr-xr-x 2 yarn hadoop 4096 Oct 22 2015 libexec
-rw-r–r– 1 yarn hadoop 15429 Oct 22 2015 LICENSE.txt
drwxrwxr-x 2 yarn hadoop 6 Jan 7 09:51 logs
-rw-r–r– 1 yarn hadoop 101 Oct 22 2015 NOTICE.txt
-rw-r–r– 1 yarn hadoop 1366 Oct 22 2015 README.txt
drwxr-xr-x 2 yarn hadoop 4096 Oct 22 2015 sbin
drwxr-xr-x 4 yarn hadoop 29 Oct 22 2015 share
#配置hadoop
[root@node1 hadoop]# cd etc/hadoop/
[root@node1 hadoop]# vim core-site.xml
在末尾添加:
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:8020</value>
<final>true</final>
</property>
</configuration>
[root@node1 hadoop]# vim hdfs-site.xml
在末尾添加:
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hadoop/hdfs/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/hadoop/hdfs/dn</value>
</property>
<property>
<name>dfs.checkpoint.dir</name>
<value>file:///data/hadoop/hdfs/snn</value>
</property>
<property>
<name>fs.checkpoint.edits.dir</name>
<value>file:///data/hadoop/hdfs/snn</value>
</property>
</configuration>
[root@node1 hadoop]# cp mapred-site.xml.template mapred-site.xml
[root@node1 hadoop]# vim mapred-site.xml
在末尾添加:
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
[root@node1 hadoop]# vim yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:8032</value>
</property>
<property>
<name>yarn.resourcemanager.schedul.address</name>
<value>localhost:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name></name>
<value></value>
</property>
<property>
<name></name>
<value></value>
</property>
<property>
<name></name>
<value></value>
</property>
</configuration>
#格式化HDFS
[root@node1 hadoop]# su -hdfs
[hdfs@node1 ~]$ hdfs namenode -format
[hdfs@node1 ~]$ ls /data/hadoop/hdfs/nn/current/
fsp_w_picpath_0000000000000000000 fsp_w_picpath_0000000000000000000.md5 seen_txid VERSION
#启动Hadoop
[hdfs@node1 ~]$ hadoop-daemon.sh start namenode
starting namenode, logging to /bdapps/hadoop/logs/hadoop-hdfs-namenode-node1.out
[hdfs@node1 ~]$ ls /bdapps/hadoop/logs/
hadoop-hdfs-namenode-node1.log SecurityAuth-hdfs.audit
hadoop-hdfs-namenode-node1.out
[hdfs@node1 ~]$ jps
41937 NameNode
42025 Jps
[hdfs@node1 ~]$ hadoop-daemon.sh start secondarynamenode
starting secondarynamenode, logging to /bdapps/hadoop/logs/hadoop-hdfs-secondarynamenode-node1.out
[hdfs@node1 ~]$ jps
41937 NameNode
42090 SecondaryNameNode
42133 Jps
[hdfs@node1 ~]$ hadoop-daemon.sh start datanode
starting datanode, logging to /bdapps/hadoop/logs/hadoop-hdfs-datanode-node1.out
[hdfs@node1 ~]$ jps
41937 NameNode
42242 Jps
42166 DataNode
42090 SecondaryNameNode
[hdfs@node1 ~]$ hdfs dfs -mkdir /test
[hdfs@node1 ~]$ hdfs dfs -ls /
Found 1 items
drwxr-xr-x – hdfs supergroup 0 2017-01-07 11:08 /test
[hdfs@node1 ~]$ hdfs dfs -put /etc/fstab /test/fstab
[hdfs@node1 ~]$ hdfs dfs -ls /test
Found 1 items
-rw-r–r– 1 hdfs supergroup 465 2017-01-07 11:11 /test/fstab
[hdfs@node1 ~]$ hdfs dfs -lsr /
lsr: DEPRECATED: Please use ‘ls -R’ instead.
drwxr-xr-x – hdfs supergroup 0 2017-01-07 11:11 /test
-rw-r–r– 1 hdfs supergroup 465 2017-01-07 11:11 /test/fstab
[hdfs@node1 ~]$ ls /data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/
blk_1073741825 blk_1073741825_1001.meta
[hdfs@node1 ~]$ file /data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/blk_1073741825
/data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/blk_1073741825: ASCII text
[hdfs@node1 ~]$ cat /data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/blk_1073741825
#
# /etc/fstab
# Created by anaconda on Fri Sep 2 00:27:27 2016
#
# Accessible filesystems, by reference, are maintained under ‘/dev/disk’
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / xfs defaults 0 0
UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot xfs defaults 0 0
/dev/mapper/centos-swap swap swap defaults 0 0
[hdfs@node1 ~]$ hdfs dfs -cat /test/fstab
#
# /etc/fstab
# Created by anaconda on Fri Sep 2 00:27:27 2016
#
# Accessible filesystems, by reference, are maintained under ‘/dev/disk’
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / xfs defaults 0 0
UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot xfs defaults 0 0
/dev/mapper/centos-swap swap swap defaults 0 0
[hdfs@node1 ~]$ exit
[root@node1 hadoop]# su – yarn
[yarn@node1 ~]$ yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /bdapps/hadoop/logs/yarn-yarn-resourcemanager-node1.out
[yarn@node1 ~]$ jps
42932 ResourceManager
43161 Jps
[yarn@node1 ~]$ yarn-daemon.sh start nodemanager
starting nodemanager, logging to /bdapps/hadoop/logs/yarn-yarn-nodemanager-node1.out
[root@node1 hadoop]# pwd
/bdapps/hadoop
[root@node1 hadoop]# cd share/hadoop/mapreduce/
[root@node1 mapreduce]# ls
hadoop-mapreduce-client-app-2.6.2.jar
hadoop-mapreduce-client-common-2.6.2.jar
hadoop-mapreduce-client-core-2.6.2.jar
hadoop-mapreduce-client-hs-2.6.2.jar
hadoop-mapreduce-client-hs-plugins-2.6.2.jar
hadoop-mapreduce-client-jobclient-2.6.2.jar
hadoop-mapreduce-client-jobclient-2.6.2-tests.jar
hadoop-mapreduce-client-shuffle-2.6.2.jar
hadoop-mapreduce-examples-2.6.2.jar
lib
lib-examples
sources
[root@node1 mapreduce]# su – hdfs
Last login: Sat Jan 7 10:40:42 CST 2017 on pts/0
[hdfs@node1 ~]$ yarn jar /bdapps/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.2.jar
An example program must be given as the first argument.
Valid program names are:
aggregatewordcount: An Aggregate based map/reduce program that counts the words in the input files.
aggregatewordhist: An Aggregate based map/reduce program that computes the histogram of the words in the input files.
bbp: A map/reduce program that uses Bailey-Borwein-Plouffe to compute exact digits of Pi.
dbcount: An example job that count the pageview counts from a database.
distbbp: A map/reduce program that uses a BBP-type formula to compute exact bits of Pi.
grep: A map/reduce program that counts the matches of a regex in the input.
join: A job that effects a join over sorted, equally partitioned datasets
multifilewc: A job that counts words from several files.
pentomino: A map/reduce tile laying program to find solutions to pentomino problems.
pi: A map/reduce program that estimates Pi using a quasi-Monte Carlo method.
randomtextwriter: A map/reduce program that writes 10GB of random textual data per node.
randomwriter: A map/reduce program that writes 10GB of random data per node.
secondarysort: An example defining a secondary sort to the reduce.
sort: A map/reduce program that sorts the data written by the random writer.
sudoku: A sudoku solver.
teragen: Generate data for the terasort
terasort: Run the terasort
teravalidate: Checking results of terasort
wordcount: A map/reduce program that counts the words in the input files.
wordmean: A map/reduce program that counts the average length of the words in the input files.
wordmedian: A map/reduce program that counts the median length of the words in the input files.
wordstandarddeviation: A map/reduce program that counts the standard deviation of the length of the words in the input files.
[hdfs@node1 ~]$ yarn jar /bdapps/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.2.jar wordcount /test/fstab /test/fstab.out
17/01/07 11:43:42 INFO client.RMProxy: Connecting to ResourceManager at localhost/127.0.0.1:8032
17/01/07 11:43:43 INFO input.FileInputFormat: Total input paths to process : 1
17/01/07 11:43:43 INFO mapreduce.JobSubmitter: number of splits:1
17/01/07 11:43:44 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1483759378332_0001
17/01/07 11:43:45 INFO impl.YarnClientImpl: Submitted application application_1483759378332_0001
17/01/07 11:43:45 INFO mapreduce.Job: The url to track the job: http://node1:8088/proxy/application_1483759378332_0001/
17/01/07 11:43:45 INFO mapreduce.Job: Running job: job_1483759378332_0001
17/01/07 11:43:55 INFO mapreduce.Job: Job job_1483759378332_0001 running in uber mode : false
17/01/07 11:43:55 INFO mapreduce.Job: map 0% reduce 0%
17/01/07 11:44:03 INFO mapreduce.Job: map 100% reduce 0%
17/01/07 11:44:10 INFO mapreduce.Job: map 100% reduce 100%
17/01/07 11:44:11 INFO mapreduce.Job: Job job_1483759378332_0001 completed successfully
17/01/07 11:44:11 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=554
FILE: Number of bytes written=214131
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=562
HDFS: Number of bytes written=396
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=5413
Total time spent by all reduces in occupied slots (ms)=4521
Total time spent by all map tasks (ms)=5413
Total time spent by all reduce tasks (ms)=4521
Total vcore-seconds taken by all map tasks=5413
Total vcore-seconds taken by all reduce tasks=4521
Total megabyte-seconds taken by all map tasks=5542912
Total megabyte-seconds taken by all reduce tasks=4629504
Map-Reduce Framework
Map input records=11
Map output records=54
Map output bytes=588
Map output materialized bytes=554
Input split bytes=97
Combine input records=54
Combine output records=38
Reduce input groups=38
Reduce shuffle bytes=554
Reduce input records=38
Reduce output records=38
Spilled Records=76
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=258
CPU time spent (ms)=3020
Physical memory (bytes) snapshot=429760512
Virtual memory (bytes) snapshot=4256686080
Total committed heap usage (bytes)=301465600
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=465
File Output Format Counters
Bytes Written=396
[hdfs@node1 ~]$ hdfs dfs -ls /test/fstab.out
Found 2 items
-rw-r–r– 1 hdfs supergroup 0 2017-01-07 11:44 /test/fstab.out/_SUCCESS
-rw-r–r– 1 hdfs supergroup 396 2017-01-07 11:44 /test/fstab.out/part-r-00000
[hdfs@node1 ~]$ hdfs dfs -cat /test/fstab.out/part-r-00000
# 7
‘/dev/disk’ 1
/ 1
/boot 1
/dev/mapper/centos-root 1
/dev/mapper/centos-swap 1
/etc/fstab 1
0 6
00:27:27 1
2 1
2016 1
Accessible 1
Created 1
Fri 1
See 1
Sep 1
UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 1
anaconda 1
and/or 1
are 1
blkid(8) 1
by 2
defaults 3
filesystems, 1
findfs(8), 1
for 1
fstab(5), 1
info 1
maintained 1
man 1
more 1
mount(8) 1
on 1
pages 1
reference, 1
swap 2
under 1
xfs 2
02 Hadoop 2分布式集群
配置环境
node1: 192.168.1.130 CentOS Linux release 7.2
node2: 192.168.1.131 CentOS Linux release 7.2
node3: 192.168.1.132 CentOS Linux release 7.2
node4: 192.168.1.133 CentOS Linux release 7.2
[root@node1 ~]# vim /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.130 node1 master
192.168.1.131 node2
192.168.1.132 node3
192.168.1.133 node4
[root@node1 ~]# scp /etc/hosts node2:/etc/
[root@node1 ~]# scp /etc/hosts node3:/etc/
[root@node1 ~]# scp /etc/hosts node4:/etc/
[root@node1 ~]# vim /etc/profile.d/java.sh
export JAVA_HOME=/usr
[root@node1 ~]# yum makecache
[root@node1 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y
[root@node1 ~]# java -version
openjdk version “1.8.0_101”
OpenJDK Runtime Environment (build 1.8.0_101-b13)
OpenJDK 64-Bit Server VM (build 25.101-b13, mixed mode)
[root@node1 ~]# scp /etc/profile.d/java.sh node2:/etc/profile.d/
[root@node1 ~]# scp /etc/profile.d/java.sh node3:/etc/profile.d/
[root@node1 ~]# scp /etc/profile.d/java.sh node4:/etc/profile.d/
[root@node1 ~]# groupadd hadoop
[root@node1 ~]# useradd -g hadoop hadoop
[root@node1 ~]# echo hadoop:mageedu | chpasswd
[root@node2 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y
[root@node2 ~]# useradd hadoop
[root@node2 ~]# echo hadoop:mageedu | chpasswd
[root@node3 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y
[root@node3 ~]# useradd hadoop
[root@node3 ~]# echo hadoop:mageedu | chpasswd
[root@node4 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y
[root@node4 ~]# useradd hadoop
[root@node4 ~]# echo hadoop:mageedu | chpasswd
[hadoop@node1 ~]$ ssh-keygen -t rsa -P ”
Generating public/private rsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_rsa):
Created directory ‘/home/hadoop/.ssh’.
Your identification has been saved in /home/hadoop/.ssh/id_rsa.
Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.
The key fingerprint is:
2b:fc:62:6d:4e:ff:0d:01:6a:9e:4b:9f:55:a7:af:53 hadoop@node1
The key’s randomart p_w_picpath is:
+–[ RSA 2048]—-+
| |
| |
| . |
| . . |
| S . . .|
| . o o o oE|
| o.* o .. |
| o=o+ o o.. |
| . ++ +.. oo.|
+—————–+
[hadoop@node1 ~]$ for i in 2 3 4;do ssh-copy-id -i .ssh/id_rsa.pub hadoop@node${i};done
[hadoop@node1 ~]$ exit
logout
[root@node1 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}
[root@node1 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/
[root@node1 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/
[root@node1 ~]# cd /bdapps/
[root@node1 bdapps]# ls
hadoop-2.6.2
[root@node1 bdapps]# ln -s hadoop-2.6.2/ hadoop
[root@node1 bdapps]# cd hadoop
[root@node1 hadoop]# mkdir logs
[root@node1 hadoop]# chmod g+w logs
[root@node1 hadoop]# chown -R hadoop.hadoop ./*
[root@node1 hadoop]# ll
total 36
drwxr-xr-x 2 hadoop hadoop 4096 Oct 22 2015 bin
drwxr-xr-x 3 hadoop hadoop 19 Oct 22 2015 etc
drwxr-xr-x 2 hadoop hadoop 101 Oct 22 2015 include
drwxr-xr-x 3 hadoop hadoop 19 Oct 22 2015 lib
drwxr-xr-x 2 hadoop hadoop 4096 Oct 22 2015 libexec
-rw-r–r– 1 hadoop hadoop 15429 Oct 22 2015 LICENSE.txt
drwxrwxr-x 2 hadoop hadoop 6 Jan 7 14:21 logs
-rw-r–r– 1 hadoop hadoop 101 Oct 22 2015 NOTICE.txt
-rw-r–r– 1 hadoop hadoop 1366 Oct 22 2015 README.txt
drwxr-xr-x 2 hadoop hadoop 4096 Oct 22 2015 sbin
drwxr-xr-x 4 hadoop hadoop 29 Oct 22 2015 share
[root@node1 hadoop]# cd etc/hadoop/
[root@node1 hadoop]# vim core-site.xml
末行添加
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:8020</value>
<final>true</final>
</property>
</configuration>
[root@node1 hadoop]# vim yarn-site.xml
末尾添加
<configuration>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.schedul.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name></name>
<value></value>
</property>
<property>
<name></name>
<value></value>
</property>
<property>
<name></name>
<value></value>
</property>
</configuration>
[root@node1 hadoop]# vim hdfs-site.xml
末尾添加
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hadoop/hdfs/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/hadoop/hdfs/dn</value>
</property>
<property>
<name>dfs.checkpoint.dir</name>
<value>file:///data/hadoop/hdfs/snn</value>
</property>
<property>
<name>fs.checkpoint.edits.dir</name>
<value>file:///data/hadoop/hdfs/snn</value>
</property>
</configuration>
[root@node1 hadoop]# vim slaves
node2
node3
node4
[root@node2 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}
[root@node2 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/
[root@node3 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}
[root@node3 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/
[root@node4 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}
[root@node4 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/
[root@node2 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/
[root@node2 ~]# cd /bdapps/
[root@node2 bdapps]# ln -s hadoop-2.6.2/ hadoop
[root@node2 bdapps]# cd hadoop
[root@node2 hadoop]# mkdir logs
[root@node2 hadoop]# chmod g+w logs/
[root@node2 hadoop]# chown hadoop.hadoop -R ./*
[root@node3 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/
[root@node3 ~]# cd /bdapps/
[root@node3 bdapps]# ln -s hadoop-2.6.2/ hadoop
[root@node3 bdapps]# cd hadoop
[root@node3 hadoop]# mkdir logs
[root@node3 hadoop]# chmod g+w logs
[root@node3 hadoop]# chown hadoop.hadoop -R ./*
[root@node4 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/
[root@node4 ~]# cd /bdapps/
[root@node4 bdapps]# ln -s hadoop-2.6.2/ hadoop
[root@node4 bdapps]# cd hadoop
[root@node4 hadoop]# mkdir logs
[root@node4 hadoop]# chmod g+w logs/
[root@node4 hadoop]# chown hadoop.hadoop -R ./*
[root@node1 ~]# su – hadoop
[hadoop@node1 ~]$ scp /bdapps/hadoop/etc/hadoop/* node2:/bdapps/hadoop/etc/hadoop/
[hadoop@node1 ~]$ scp /bdapps/hadoop/etc/hadoop/* node3:/bdapps/hadoop/etc/hadoop/
[hadoop@node1 ~]$ scp /bdapps/hadoop/etc/hadoop/* node4:/bdapps/hadoop/etc/hadoop/
[hadoop@node1 ~]$ exit p/etc/hadoop/
logout
[root@node1 ~]# vim /etc/profile.d/hadoop.sh
export HADOOP_PREFIX=/bdapps/hadoop
export PATH=$PATH:${HADOOP_PREFIX}/bin:${HADOOP_PREFIX}/sbin
export HADOOP_COMMON_HOME=${HADOOP_PREFIX}
export HADOOP_YARN_HOME=${HADOOP_PREFIX}
export HADOOP_HDFS_HOME=${HADOOP_PREFIX}
export HADOOP_MAPRED_HOME=${HADOOP_PREFIX}
[root@node1 ~]# scp /etc/profile.d/hadoop.sh node2:/etc/profile.d/
[root@node1 ~]# scp /etc/profile.d/hadoop.sh node3:/etc/profile.d/
[root@node1 ~]# scp /etc/profile.d/hadoop.sh node4:/etc/profile.d/
#格式化HDFS
[root@node1 ~]# su – hadoop
[hadoop@node1 ~]$ hdfs namenode -format
[hadoop@node1 ~]$ ls /data/hadoop/hdfs/nn/
current
#启动服务
[hadoop@node1 ~]$ start-dfs.sh
[hadoop@node1 ~]$ jps
35957 NameNode
36153 SecondaryNameNode
36292 Jps
#停止服务
[hadoop@node1 ~]$ stop-dfs.sh
[root@node2 hadoop]# su – hadoop
[hadoop@node2 ~]$ jps
35527 DataNode
35639 Jps
[root@node3 hadoop]# su – hadoop
[hadoop@node3 ~]$ jps
35113 DataNode
35241 Jps
[root@node4 hadoop]# su – hadoop
[hadoop@node4 ~]$ jps
35113 DataNode
35242 Jps
#创建一个目录
[hadoop@node1 ~]$ hdfs dfs -mkdir /test
#向hadoop上传一个文件
[hadoop@node1 ~]$ hdfs dfs -put /etc/fstab /test/fstab
#查看上传的文件
[hadoop@node1 ~]$ hdfs dfs -lsr /test
lsr: DEPRECATED: Please use ‘ls -R’ instead.
-rw-r–r– 2 hadoop supergroup 465 2017-01-07 15:16 /test/fstab
#查看上传的文件内容
[hadoop@node1 ~]$ hdfs dfs -cat /test/fstab
#
# /etc/fstab
# Created by anaconda on Fri Sep 2 00:27:27 2016
#
# Accessible filesystems, by reference, are maintained under ‘/dev/disk’
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / xfs defaults 0 0
UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot xfs defaults 0 0
/dev/mapper/centos-swap swap swap defaults 0 0
#上传文件在hadoop中的存储位置
[hadoop@node2 ~]$ cat /data/hadoop/hdfs/dn/current/BP-1026073846-192.168.1.130-1483772708588/current/finalized/subdir0/subdir0/blk_1073741825
#
# /etc/fstab
# Created by anaconda on Fri Sep 2 00:27:27 2016
#
# Accessible filesystems, by reference, are maintained under ‘/dev/disk’
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / xfs defaults 0 0
UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot xfs defaults 0 0
/dev/mapper/centos-swap swap swap defaults 0 0
[hadoop@node3 ~]$ cat /data/hadoop/hdfs/dn/current/BP-1026073846-192.168.1.130-1483772708588/current/finalized/subdir0/subdir0/blk_1073741825
#
# /etc/fstab
# Created by anaconda on Fri Sep 2 00:27:27 2016
#
# Accessible filesystems, by reference, are maintained under ‘/dev/disk’
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info
#
/dev/mapper/centos-root / xfs defaults 0 0
UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot xfs defaults 0 0
/dev/mapper/centos-swap swap swap defaults 0 0
[hadoop@node4 ~]$ ls /data/hadoop/hdfs/dn/current/BP-1026073846-192.168.1.130-1483772708588/current/finalized/
[hadoop@node1 ~]$ start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-resourcemanager-node1.out
node4: starting nodemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-nodemanager-node4.out
node3: starting nodemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-nodemanager-node3.out
node2: starting nodemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-nodemanager-node2.out
[hadoop@node1 ~]$ jps
35957 NameNode
36153 SecondaryNameNode
36980 Jps
36715 ResourceManager
[hadoop@node2 ~]$ jps
35996 Jps
35527 DataNode
35880 NodeManager
[hadoop@node3 ~]$ jps
35113 DataNode
35474 NodeManager
35591 Jps
[hadoop@node4 ~]$ jps
35477 NodeManager
35113 DataNode
35600 Jps
[root@node1 ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:50090 *:*
LISTEN 0 128 192.168.1.130:8020 *:*
LISTEN 0 5 192.168.122.1:53 *:*
LISTEN 0 128 *:50070 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 128 127.0.0.1:631 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 128 :::22 :::*
LISTEN 0 128 ::1:631 :::*
LISTEN 0 128 ::ffff:192.168.1.130:8088 :::*
LISTEN 0 100 ::1:25 :::*
LISTEN 0 128 :::8030 :::*
LISTEN 0 128 ::ffff:192.168.1.130:8031 :::*
LISTEN 0 128 ::ffff:192.168.1.130:8032 :::*
LISTEN 0 128 ::ffff:192.168.1.130:8033 :::*
[hadoop@node1 ~]$ hdfs dfs -put /etc/rc.d/init.d/functions /test
[hadoop@node1 ~]$ hdfs dfs -ls /test
Found 2 items
-rw-r–r– 2 hadoop supergroup 465 2017-01-07 15:16 /test/fstab
-rw-r–r– 2 hadoop supergroup 13948 2017-01-07 15:35 /test/functions
[hadoop@node1 ~]$ hdfs dfs -put hadoop-2.6.2.tar.gz /test/
#统计文本文件
[hadoop@node1 ~]$ yarn jar /bdapps/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.2.jar wordcount /test/fstab /test/functions /test/wc
[hadoop@node1 ~]$hdfs -ls /test/wc
[hadoop@node1 ~]$hdfs dfs -cat /test/wc/part-r-00000
hadoop应用
http://192.168.1.130:8088
管理hadoop
http://192.168.1.130:50070
2、YARN集群管理命令
[hadoop@node1 ~]$ yarn application -list
17/01/07 17:11:25 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032
Total number of applications (application-types: [] and states: [SUBMITTED, ACCEPTED, RUNNING]):0
Application-Id Application-Name Application-Type User Queue State Final-State Progress Tracking-URL
[hadoop@node1 ~]$ yarn application -list -appStates=all
17/01/07 17:12:13 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032
Total number of applications (application-types: [] and states: [NEW, NEW_SAVING, SUBMITTED, ACCEPTED, RUNNING, FINISHED, FAILED, KILLED]):0
Application-Id Application-Name Application-Type User Queue State Final-State Progress Tracking-URL
[hadoop@node1 ~]$ yarn application -status
[hadoop@node1 ~]$ yarn node -list
17/01/07 17:16:47 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032
Total Nodes:3
Node-Id Node-State Node-Http-Address Number-of-Running-Containers
node2:40860 RUNNING node2:8042 0
node3:47737 RUNNING node3:8042 0
node4:45637 RUNNING node4:8042 0
[hadoop@node1 ~]$ yarn node -status node3:47737
17/01/07 17:20:41 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032
Node Report :
Node-Id : node3:47737
Rack : /default-rack
Node-State : RUNNING
Node-Http-Address : node3:8042
Last-Health-Update : Sat 07/Jan/17 05:18:54:160CST
Health-Report :
Containers : 0
Memory-Used : 0MB
Memory-Capacity : 8192MB
CPU-Used : 0 vcores
CPU-Capacity : 8 vcores
Node-Labels :
[hadoop@node1 ~]$ yarn logs -applicationId
[hadoop@node1 ~]$ yarn rmadmin -refreshNodes
17/01/07 17:27:20 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8033
原创文章,作者:Maggie-Hunter,如若转载,请注明出处:https://blog.ytso.com/tech/opensource/186874.html