56 Hadoop进阶

01 hadoop伪分布式模型

配置环境

node1: 192.168.1.131 CentOS Linux release 7.2

1、安装hadoop

#安装

[root@node1 ~]# vim /etc/profile.d/java.sh

export JAVA_HOME=/usr

[root@node1 ~]# yum install java-1.7.0-openjdk-devel.x86_64

[root@node1 ~]# mkdir /bdapps

[root@node1 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/

[root@node1 ~]# cd /bdapps/

[root@node1 bdapps]# ll

total 0

lrwxrwxrwx 1 root  root   13 Jan  7 09:24 hadoop -> hadoop-2.6.2/

drwxr-xr-x 9 10011 10011 139 Oct 22  2015 hadoop-2.6.2

[root@node1 hadoop]# vim /etc/profile.d/hadoop.sh

export HADOOP_PREFIX=/bdapps/hadoop

export PATH=$PATH:${HADOOP_PREFIX}/bin:${HADOOP_PREFIX}/sbin

export HADOOP_YARN_HOME=${HADOOP_PREFIX}

export HADOOP_MAPPERD_HOME=${HADOOP_PREFIX}

export HADOOP_COMMON_HOME=${HADOOP_PREFIX}

export HADOOP_HDFS_HOME=${HADOOP_PREFIX}

[root@node1 hadoop]# . /etc/profile.d/hadoop.sh

[root@node1 hadoop]# pwd

/bdapps/hadoop

[root@node1 hadoop]# ls

bin  etc  include  lib  libexec  LICENSE.txt  NOTICE.txt  README.txt  sbin  share

[root@node1 hadoop]# ls bin/

container-executor  hdfs      mapred.cmd               yarn

hadoop              hdfs.cmd  rcc                      yarn.cmd

hadoop.cmd          mapred    test-container-executor

[root@node1 hadoop]# ls include/

hdfs.h  Pipes.hh  SerialUtils.hh  StringUtils.hh  TemplateFactory.hh

[root@node1 hadoop]# ls lib

native

[root@node1 hadoop]# ls libexec/

hadoop-config.cmd  hdfs-config.sh    mapred-config.cmd  yarn-config.sh

hadoop-config.sh   httpfs-config.sh  mapred-config.sh

hdfs-config.cmd    kms-config.sh     yarn-config.cmd

[root@node1 hadoop]# ls sbin/

distribute-exclude.sh    start-all.cmd        stop-balancer.sh

hadoop-daemon.sh         start-all.sh         stop-dfs.cmd

hadoop-daemons.sh        start-balancer.sh    stop-dfs.sh

hdfs-config.cmd          start-dfs.cmd        stop-secure-dns.sh

hdfs-config.sh           start-dfs.sh         stop-yarn.cmd

httpfs.sh                start-secure-dns.sh  stop-yarn.sh

kms.sh                   start-yarn.cmd       yarn-daemon.sh

mr-jobhistory-daemon.sh  start-yarn.sh        yarn-daemons.sh

refresh-namenodes.sh     stop-all.cmd

slaves.sh                stop-all.sh

[root@node1 hadoop]# ls etc/hadoop/

capacity-scheduler.xml      httpfs-env.sh            mapred-env.sh

configuration.xsl           httpfs-log4j.properties  mapred-queues.xml.template

container-executor.cfg      httpfs-signature.secret  mapred-site.xml.template

core-site.xml               httpfs-site.xml          slaves

hadoop-env.cmd              kms-acls.xml             ssl-client.xml.example

hadoop-env.sh               kms-env.sh               ssl-server.xml.example

hadoop-metrics2.properties  kms-log4j.properties     yarn-env.cmd

hadoop-metrics.properties   kms-site.xml             yarn-env.sh

hadoop-policy.xml           log4j.properties         yarn-site.xml

hdfs-site.xml               mapred-env.cmd

#创建运行Hadoop进程的用户和相关目录

[root@node1 hadoop]# groupadd hadoop

[root@node1 hadoop]# useradd -g hadoop yarn

[root@node1 hadoop]# useradd -g hadoop hdfs

[root@node1 hadoop]# useradd -g hadoop mapred

#创建数据和日志目录

[root@node1 hadoop]# mkdir -p /data/hadoop/hdfs/{nn,snn,dn}

[root@node1 hadoop]# chown -R hdfs:hadoop /data/hadoop/hdfs/   

[root@node1 hadoop]# ll /data/hadoop/hdfs/

total 0

drwxr-xr-x 2 hdfs hadoop 6 Jan  7 09:48 dn

drwxr-xr-x 2 hdfs hadoop 6 Jan  7 09:48 nn

drwxr-xr-x 2 hdfs hadoop 6 Jan  7 09:48 snn

[root@node1 hadoop]# mkdir logs

[root@node1 hadoop]# chmod g+w logs

[root@node1 hadoop]# chown -R yarn:hadoop ./*

[root@node1 hadoop]# ll

total 36

drwxr-xr-x 2 yarn hadoop  4096 Oct 22  2015 bin

drwxr-xr-x 3 yarn hadoop    19 Oct 22  2015 etc

drwxr-xr-x 2 yarn hadoop   101 Oct 22  2015 include

drwxr-xr-x 3 yarn hadoop    19 Oct 22  2015 lib

drwxr-xr-x 2 yarn hadoop  4096 Oct 22  2015 libexec

-rw-r–r– 1 yarn hadoop 15429 Oct 22  2015 LICENSE.txt

drwxrwxr-x 2 yarn hadoop     6 Jan  7 09:51 logs

-rw-r–r– 1 yarn hadoop   101 Oct 22  2015 NOTICE.txt

-rw-r–r– 1 yarn hadoop  1366 Oct 22  2015 README.txt

drwxr-xr-x 2 yarn hadoop  4096 Oct 22  2015 sbin

drwxr-xr-x 4 yarn hadoop    29 Oct 22  2015 share

#配置hadoop

[root@node1 hadoop]# cd etc/hadoop/

[root@node1 hadoop]# vim core-site.xml 

在末尾添加:

<configuration>

<property>

<name>fs.defaultFS</name>

<value>hdfs://localhost:8020</value>

<final>true</final>

</property>

</configuration>

[root@node1 hadoop]# vim hdfs-site.xml 

在末尾添加:

<configuration>

<property>

<name>dfs.replication</name>

<value>1</value>

</property>

<property>

<name>dfs.namenode.name.dir</name>

<value>file:///data/hadoop/hdfs/nn</value>

</property>

<property>

<name>dfs.datanode.data.dir</name>

<value>file:///data/hadoop/hdfs/dn</value>

</property>

<property>

<name>dfs.checkpoint.dir</name>

<value>file:///data/hadoop/hdfs/snn</value>

</property>

<property>

<name>fs.checkpoint.edits.dir</name>

<value>file:///data/hadoop/hdfs/snn</value>

</property>

</configuration>

[root@node1 hadoop]# cp mapred-site.xml.template mapred-site.xml

[root@node1 hadoop]# vim mapred-site.xml

在末尾添加:

<configuration>

<property>

<name>mapreduce.framework.name</name>

<value>yarn</value>

</property>

</configuration>

[root@node1 hadoop]# vim yarn-site.xml 

<configuration>

<property>

<name>yarn.resourcemanager.address</name>

<value>localhost:8032</value>

</property>

<property>

<name>yarn.resourcemanager.schedul.address</name>

<value>localhost:8030</value>

</property>

<property>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>localhost:8031</value>

</property>

<property>

<name>yarn.resourcemanager.admin.address</name>

<value>localhost:8033</value>

</property>

<property>

<name>yarn.resourcemanager.webapp.address</name>

<value>localhost:8088</value>

</property>

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce_shuffle</value>

</property>

<property>

<name>yarn.nodemanager.auxservices.mapreduce_shuffle.class</name>

<value>org.apache.hadoop.mapred.ShuffleHandler</value>

</property>

<property>

<name>yarn.resourcemanager.scheduler.class</name>

<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>

</property>

<property>

<name></name>

<value></value>

</property>

<property>

<name></name>

<value></value>

</property>

<property>

<name></name>

<value></value>

</property>

</configuration>

#格式化HDFS

[root@node1 hadoop]# su -hdfs

[hdfs@node1 ~]$ hdfs namenode -format

[hdfs@node1 ~]$ ls /data/hadoop/hdfs/nn/current/

fsp_w_picpath_0000000000000000000  fsp_w_picpath_0000000000000000000.md5  seen_txid  VERSION

#启动Hadoop

[hdfs@node1 ~]$ hadoop-daemon.sh start namenode

starting namenode, logging to /bdapps/hadoop/logs/hadoop-hdfs-namenode-node1.out

[hdfs@node1 ~]$ ls /bdapps/hadoop/logs/

hadoop-hdfs-namenode-node1.log  SecurityAuth-hdfs.audit

hadoop-hdfs-namenode-node1.out

[hdfs@node1 ~]$ jps

41937 NameNode

42025 Jps

[hdfs@node1 ~]$ hadoop-daemon.sh start secondarynamenode

starting secondarynamenode, logging to /bdapps/hadoop/logs/hadoop-hdfs-secondarynamenode-node1.out

[hdfs@node1 ~]$ jps

41937 NameNode

42090 SecondaryNameNode

42133 Jps

[hdfs@node1 ~]$ hadoop-daemon.sh start datanode

starting datanode, logging to /bdapps/hadoop/logs/hadoop-hdfs-datanode-node1.out

[hdfs@node1 ~]$ jps

41937 NameNode

42242 Jps

42166 DataNode

42090 SecondaryNameNode

[hdfs@node1 ~]$ hdfs dfs -mkdir /test

[hdfs@node1 ~]$ hdfs dfs -ls /

Found 1 items

drwxr-xr-x   – hdfs supergroup          0 2017-01-07 11:08 /test

[hdfs@node1 ~]$ hdfs dfs -put /etc/fstab /test/fstab

[hdfs@node1 ~]$ hdfs dfs -ls /test

Found 1 items

-rw-r–r–   1 hdfs supergroup        465 2017-01-07 11:11 /test/fstab

[hdfs@node1 ~]$ hdfs dfs -lsr /

lsr: DEPRECATED: Please use ‘ls -R’ instead.

drwxr-xr-x   – hdfs supergroup          0 2017-01-07 11:11 /test

-rw-r–r–   1 hdfs supergroup        465 2017-01-07 11:11 /test/fstab

[hdfs@node1 ~]$ ls /data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/

blk_1073741825  blk_1073741825_1001.meta

[hdfs@node1 ~]$ file /data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/blk_1073741825

/data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/blk_1073741825: ASCII text

[hdfs@node1 ~]$ cat /data/hadoop/hdfs/dn/current/BP-1017498243-192.168.1.131-1483757917078/current/finalized/subdir0/subdir0/blk_1073741825

#

# /etc/fstab

# Created by anaconda on Fri Sep  2 00:27:27 2016

#

# Accessible filesystems, by reference, are maintained under ‘/dev/disk’

# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info

#

/dev/mapper/centos-root /                       xfs     defaults        0 0

UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot                   xfs     defaults        0 0

/dev/mapper/centos-swap swap                    swap    defaults        0 0

[hdfs@node1 ~]$ hdfs dfs -cat /test/fstab

#

# /etc/fstab

# Created by anaconda on Fri Sep  2 00:27:27 2016

#

# Accessible filesystems, by reference, are maintained under ‘/dev/disk’

# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info

#

/dev/mapper/centos-root /                       xfs     defaults        0 0

UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot                   xfs     defaults        0 0

/dev/mapper/centos-swap swap                    swap    defaults        0 0

[hdfs@node1 ~]$ exit

[root@node1 hadoop]# su – yarn

[yarn@node1 ~]$ yarn-daemon.sh start resourcemanager

starting resourcemanager, logging to /bdapps/hadoop/logs/yarn-yarn-resourcemanager-node1.out

[yarn@node1 ~]$ jps

42932 ResourceManager

43161 Jps

[yarn@node1 ~]$ yarn-daemon.sh start nodemanager

starting nodemanager, logging to /bdapps/hadoop/logs/yarn-yarn-nodemanager-node1.out

[root@node1 hadoop]# pwd

/bdapps/hadoop

[root@node1 hadoop]# cd share/hadoop/mapreduce/

[root@node1 mapreduce]# ls

hadoop-mapreduce-client-app-2.6.2.jar

hadoop-mapreduce-client-common-2.6.2.jar

hadoop-mapreduce-client-core-2.6.2.jar

hadoop-mapreduce-client-hs-2.6.2.jar

hadoop-mapreduce-client-hs-plugins-2.6.2.jar

hadoop-mapreduce-client-jobclient-2.6.2.jar

hadoop-mapreduce-client-jobclient-2.6.2-tests.jar

hadoop-mapreduce-client-shuffle-2.6.2.jar

hadoop-mapreduce-examples-2.6.2.jar

lib

lib-examples

sources

[root@node1 mapreduce]# su – hdfs

Last login: Sat Jan  7 10:40:42 CST 2017 on pts/0

[hdfs@node1 ~]$ yarn jar /bdapps/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.2.jar 

An example program must be given as the first argument.

Valid program names are:

  aggregatewordcount: An Aggregate based map/reduce program that counts the words in the input files.

  aggregatewordhist: An Aggregate based map/reduce program that computes the histogram of the words in the input files.

  bbp: A map/reduce program that uses Bailey-Borwein-Plouffe to compute exact digits of Pi.

  dbcount: An example job that count the pageview counts from a database.

  distbbp: A map/reduce program that uses a BBP-type formula to compute exact bits of Pi.

  grep: A map/reduce program that counts the matches of a regex in the input.

  join: A job that effects a join over sorted, equally partitioned datasets

  multifilewc: A job that counts words from several files.

  pentomino: A map/reduce tile laying program to find solutions to pentomino problems.

  pi: A map/reduce program that estimates Pi using a quasi-Monte Carlo method.

  randomtextwriter: A map/reduce program that writes 10GB of random textual data per node.

  randomwriter: A map/reduce program that writes 10GB of random data per node.

  secondarysort: An example defining a secondary sort to the reduce.

  sort: A map/reduce program that sorts the data written by the random writer.

  sudoku: A sudoku solver.

  teragen: Generate data for the terasort

  terasort: Run the terasort

  teravalidate: Checking results of terasort

  wordcount: A map/reduce program that counts the words in the input files.

  wordmean: A map/reduce program that counts the average length of the words in the input files.

  wordmedian: A map/reduce program that counts the median length of the words in the input files.

  wordstandarddeviation: A map/reduce program that counts the standard deviation of the length of the words in the input files.

[hdfs@node1 ~]$ yarn jar /bdapps/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.2.jar wordcount /test/fstab /test/fstab.out

17/01/07 11:43:42 INFO client.RMProxy: Connecting to ResourceManager at localhost/127.0.0.1:8032

17/01/07 11:43:43 INFO input.FileInputFormat: Total input paths to process : 1

17/01/07 11:43:43 INFO mapreduce.JobSubmitter: number of splits:1

17/01/07 11:43:44 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1483759378332_0001

17/01/07 11:43:45 INFO impl.YarnClientImpl: Submitted application application_1483759378332_0001

17/01/07 11:43:45 INFO mapreduce.Job: The url to track the job: http://node1:8088/proxy/application_1483759378332_0001/

17/01/07 11:43:45 INFO mapreduce.Job: Running job: job_1483759378332_0001

17/01/07 11:43:55 INFO mapreduce.Job: Job job_1483759378332_0001 running in uber mode : false

17/01/07 11:43:55 INFO mapreduce.Job:  map 0% reduce 0%

17/01/07 11:44:03 INFO mapreduce.Job:  map 100% reduce 0%

17/01/07 11:44:10 INFO mapreduce.Job:  map 100% reduce 100%

17/01/07 11:44:11 INFO mapreduce.Job: Job job_1483759378332_0001 completed successfully

17/01/07 11:44:11 INFO mapreduce.Job: Counters: 49

        File System Counters

                FILE: Number of bytes read=554

                FILE: Number of bytes written=214131

                FILE: Number of read operations=0

                FILE: Number of large read operations=0

                FILE: Number of write operations=0

                HDFS: Number of bytes read=562

                HDFS: Number of bytes written=396

                HDFS: Number of read operations=6

                HDFS: Number of large read operations=0

                HDFS: Number of write operations=2

        Job Counters 

                Launched map tasks=1

                Launched reduce tasks=1

                Data-local map tasks=1

                Total time spent by all maps in occupied slots (ms)=5413

                Total time spent by all reduces in occupied slots (ms)=4521

                Total time spent by all map tasks (ms)=5413

                Total time spent by all reduce tasks (ms)=4521

                Total vcore-seconds taken by all map tasks=5413

                Total vcore-seconds taken by all reduce tasks=4521

                Total megabyte-seconds taken by all map tasks=5542912

                Total megabyte-seconds taken by all reduce tasks=4629504

        Map-Reduce Framework

                Map input records=11

                Map output records=54

                Map output bytes=588

                Map output materialized bytes=554

                Input split bytes=97

                Combine input records=54

                Combine output records=38

                Reduce input groups=38

                Reduce shuffle bytes=554

                Reduce input records=38

                Reduce output records=38

                Spilled Records=76

                Shuffled Maps =1

                Failed Shuffles=0

                Merged Map outputs=1

                GC time elapsed (ms)=258

                CPU time spent (ms)=3020

                Physical memory (bytes) snapshot=429760512

                Virtual memory (bytes) snapshot=4256686080

                Total committed heap usage (bytes)=301465600

        Shuffle Errors

                BAD_ID=0

                CONNECTION=0

                IO_ERROR=0

                WRONG_LENGTH=0

                WRONG_MAP=0

                WRONG_REDUCE=0

        File Input Format Counters 

                Bytes Read=465

        File Output Format Counters 

                Bytes Written=396

[hdfs@node1 ~]$ hdfs dfs -ls /test/fstab.out

Found 2 items

-rw-r–r–   1 hdfs supergroup          0 2017-01-07 11:44 /test/fstab.out/_SUCCESS

-rw-r–r–   1 hdfs supergroup        396 2017-01-07 11:44 /test/fstab.out/part-r-00000

[hdfs@node1 ~]$ hdfs dfs -cat /test/fstab.out/part-r-00000

#       7

‘/dev/disk’     1

/       1

/boot   1

/dev/mapper/centos-root 1

/dev/mapper/centos-swap 1

/etc/fstab      1

0       6

00:27:27        1

2       1

2016    1

Accessible      1

Created 1

Fri     1

See     1

Sep     1

UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6       1

anaconda        1

and/or  1

are     1

blkid(8)        1

by      2

defaults        3

filesystems,    1

findfs(8),      1

for     1

fstab(5),       1

info    1

maintained      1

man     1

more    1

mount(8)        1

on      1

pages   1

reference,      1

swap    2

under   1

xfs     2

02 Hadoop 2分布式集群

配置环境

node1: 192.168.1.130 CentOS Linux release 7.2

node2: 192.168.1.131 CentOS Linux release 7.2

node3: 192.168.1.132 CentOS Linux release 7.2

node4: 192.168.1.133 CentOS Linux release 7.2

[root@node1 ~]# vim /etc/hosts

127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4

::1         localhost localhost.localdomain localhost6 localhost6.localdomain6

192.168.1.130           node1 master

192.168.1.131           node2

192.168.1.132           node3

192.168.1.133           node4

[root@node1 ~]# scp /etc/hosts node2:/etc/

[root@node1 ~]# scp /etc/hosts node3:/etc/

[root@node1 ~]# scp /etc/hosts node4:/etc/

[root@node1 ~]# vim /etc/profile.d/java.sh

export JAVA_HOME=/usr

[root@node1 ~]# yum makecache

[root@node1 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y

[root@node1 ~]# java -version

openjdk version “1.8.0_101”

OpenJDK Runtime Environment (build 1.8.0_101-b13)

OpenJDK 64-Bit Server VM (build 25.101-b13, mixed mode)

[root@node1 ~]# scp /etc/profile.d/java.sh node2:/etc/profile.d/

[root@node1 ~]# scp /etc/profile.d/java.sh node3:/etc/profile.d/

[root@node1 ~]# scp /etc/profile.d/java.sh node4:/etc/profile.d/

[root@node1 ~]# groupadd hadoop

[root@node1 ~]# useradd -g hadoop hadoop

[root@node1 ~]# echo hadoop:mageedu | chpasswd

[root@node2 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y

[root@node2 ~]# useradd hadoop

[root@node2 ~]# echo hadoop:mageedu | chpasswd

[root@node3 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y

[root@node3 ~]# useradd hadoop

[root@node3 ~]# echo hadoop:mageedu | chpasswd

[root@node4 ~]# yum install java-1.7.0-openjdk-devel.x86_64 -y

[root@node4 ~]# useradd hadoop

[root@node4 ~]# echo hadoop:mageedu | chpasswd

[hadoop@node1 ~]$ ssh-keygen -t rsa -P ”

Generating public/private rsa key pair.

Enter file in which to save the key (/home/hadoop/.ssh/id_rsa): 

Created directory ‘/home/hadoop/.ssh’.

Your identification has been saved in /home/hadoop/.ssh/id_rsa.

Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub.

The key fingerprint is:

2b:fc:62:6d:4e:ff:0d:01:6a:9e:4b:9f:55:a7:af:53 hadoop@node1

The key’s randomart p_w_picpath is:

+–[ RSA 2048]—-+

|                 |

|                 |

|          .      |

|         . .     |

|        S   . . .|

|     . o o   o oE|

|      o.*   o .. |

|      o=o+ o o.. |

|     . ++ +.. oo.|

+—————–+

[hadoop@node1 ~]$ for i in 2 3 4;do ssh-copy-id -i .ssh/id_rsa.pub hadoop@node${i};done

[hadoop@node1 ~]$ exit

logout

[root@node1 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}

[root@node1 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/

[root@node1 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/

[root@node1 ~]# cd /bdapps/

[root@node1 bdapps]# ls

hadoop-2.6.2

[root@node1 bdapps]# ln -s hadoop-2.6.2/ hadoop

[root@node1 bdapps]# cd hadoop

[root@node1 hadoop]# mkdir logs

[root@node1 hadoop]# chmod g+w logs

[root@node1 hadoop]# chown -R hadoop.hadoop ./*

[root@node1 hadoop]# ll

total 36

drwxr-xr-x 2 hadoop hadoop  4096 Oct 22  2015 bin

drwxr-xr-x 3 hadoop hadoop    19 Oct 22  2015 etc

drwxr-xr-x 2 hadoop hadoop   101 Oct 22  2015 include

drwxr-xr-x 3 hadoop hadoop    19 Oct 22  2015 lib

drwxr-xr-x 2 hadoop hadoop  4096 Oct 22  2015 libexec

-rw-r–r– 1 hadoop hadoop 15429 Oct 22  2015 LICENSE.txt

drwxrwxr-x 2 hadoop hadoop     6 Jan  7 14:21 logs

-rw-r–r– 1 hadoop hadoop   101 Oct 22  2015 NOTICE.txt

-rw-r–r– 1 hadoop hadoop  1366 Oct 22  2015 README.txt

drwxr-xr-x 2 hadoop hadoop  4096 Oct 22  2015 sbin

drwxr-xr-x 4 hadoop hadoop    29 Oct 22  2015 share

[root@node1 hadoop]# cd etc/hadoop/

[root@node1 hadoop]# vim core-site.xml

末行添加

<configuration>

<property>

<name>fs.defaultFS</name>

<value>hdfs://master:8020</value>

<final>true</final>

</property>

</configuration>

[root@node1 hadoop]# vim yarn-site.xml 

末尾添加

<configuration>

<property>

<name>yarn.resourcemanager.address</name>

<value>master:8032</value>

</property>

<property>

<name>yarn.resourcemanager.schedul.address</name>

<value>master:8030</value>

</property>

<property>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>master:8031</value>

</property>

<property>

<name>yarn.resourcemanager.admin.address</name>

<value>master:8033</value>

</property>

<property>

<name>yarn.resourcemanager.webapp.address</name>

<value>master:8088</value>

</property>

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce_shuffle</value>

</property>

<property>

<name>yarn.nodemanager.auxservices.mapreduce_shuffle.class</name>

<value>org.apache.hadoop.mapred.ShuffleHandler</value>

</property>

<property>

<name>yarn.resourcemanager.scheduler.class</name>

<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>

</property>

<property>

<name></name>

<value></value>

</property>

<property>

<name></name>

<value></value>

</property>

<property>

<name></name>

<value></value>

</property>

</configuration>

[root@node1 hadoop]# vim hdfs-site.xml 

末尾添加

<configuration>

<property>

<name>dfs.replication</name>

<value>2</value>

</property>

<property>

<name>dfs.namenode.name.dir</name>

<value>file:///data/hadoop/hdfs/nn</value>

</property>

<property>

<name>dfs.datanode.data.dir</name>

<value>file:///data/hadoop/hdfs/dn</value>

</property>

<property>

<name>dfs.checkpoint.dir</name>

<value>file:///data/hadoop/hdfs/snn</value>

</property>

<property>

<name>fs.checkpoint.edits.dir</name>

<value>file:///data/hadoop/hdfs/snn</value>

</property>

</configuration>

[root@node1 hadoop]# vim slaves 

node2

node3

node4

[root@node2 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}

[root@node2 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/

[root@node3 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}

[root@node3 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/

[root@node4 ~]# mkdir -p /bdapps /data/hadoop/hdfs/{nn,snn,dn}

[root@node4 ~]# chown -R hadoop.hadoop /data/hadoop/hdfs/

[root@node2 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/

[root@node2 ~]# cd /bdapps/

[root@node2 bdapps]# ln -s hadoop-2.6.2/ hadoop

[root@node2 bdapps]# cd hadoop

[root@node2 hadoop]# mkdir logs

[root@node2 hadoop]# chmod g+w logs/

[root@node2 hadoop]# chown hadoop.hadoop  -R ./*

[root@node3 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/

[root@node3 ~]# cd /bdapps/

[root@node3 bdapps]# ln -s hadoop-2.6.2/ hadoop

[root@node3 bdapps]# cd  hadoop

[root@node3 hadoop]# mkdir logs

[root@node3 hadoop]# chmod g+w logs

[root@node3 hadoop]# chown hadoop.hadoop -R ./*

[root@node4 ~]# tar xf hadoop-2.6.2.tar.gz -C /bdapps/

[root@node4 ~]# cd /bdapps/

[root@node4 bdapps]# ln -s hadoop-2.6.2/ hadoop

[root@node4 bdapps]# cd hadoop

[root@node4 hadoop]# mkdir logs

[root@node4 hadoop]# chmod g+w logs/

[root@node4 hadoop]# chown hadoop.hadoop -R ./*

[root@node1 ~]# su – hadoop

[hadoop@node1 ~]$ scp /bdapps/hadoop/etc/hadoop/* node2:/bdapps/hadoop/etc/hadoop/

[hadoop@node1 ~]$ scp /bdapps/hadoop/etc/hadoop/* node3:/bdapps/hadoop/etc/hadoop/

[hadoop@node1 ~]$ scp /bdapps/hadoop/etc/hadoop/* node4:/bdapps/hadoop/etc/hadoop/

[hadoop@node1 ~]$ exit                                               p/etc/hadoop/

logout

[root@node1 ~]# vim /etc/profile.d/hadoop.sh

export HADOOP_PREFIX=/bdapps/hadoop

export PATH=$PATH:${HADOOP_PREFIX}/bin:${HADOOP_PREFIX}/sbin

export HADOOP_COMMON_HOME=${HADOOP_PREFIX}

export HADOOP_YARN_HOME=${HADOOP_PREFIX}

export HADOOP_HDFS_HOME=${HADOOP_PREFIX}

export HADOOP_MAPRED_HOME=${HADOOP_PREFIX}

[root@node1 ~]# scp /etc/profile.d/hadoop.sh node2:/etc/profile.d/

[root@node1 ~]# scp /etc/profile.d/hadoop.sh node3:/etc/profile.d/

[root@node1 ~]# scp /etc/profile.d/hadoop.sh node4:/etc/profile.d/

#格式化HDFS

[root@node1 ~]# su – hadoop

[hadoop@node1 ~]$ hdfs namenode -format

[hadoop@node1 ~]$ ls /data/hadoop/hdfs/nn/

current

#启动服务

[hadoop@node1 ~]$ start-dfs.sh 

[hadoop@node1 ~]$ jps

35957 NameNode

36153 SecondaryNameNode

36292 Jps

#停止服务

[hadoop@node1 ~]$ stop-dfs.sh 

[root@node2 hadoop]# su – hadoop

[hadoop@node2 ~]$ jps

35527 DataNode

35639 Jps

[root@node3 hadoop]# su – hadoop

[hadoop@node3 ~]$ jps

35113 DataNode

35241 Jps

[root@node4 hadoop]# su – hadoop

[hadoop@node4 ~]$ jps

35113 DataNode

35242 Jps

#创建一个目录

[hadoop@node1 ~]$ hdfs dfs -mkdir /test

#向hadoop上传一个文件

[hadoop@node1 ~]$ hdfs dfs -put /etc/fstab /test/fstab

#查看上传的文件

[hadoop@node1 ~]$ hdfs dfs -lsr /test

lsr: DEPRECATED: Please use ‘ls -R’ instead.

-rw-r–r–   2 hadoop supergroup        465 2017-01-07 15:16 /test/fstab

#查看上传的文件内容

[hadoop@node1 ~]$ hdfs dfs -cat /test/fstab

#

# /etc/fstab

# Created by anaconda on Fri Sep  2 00:27:27 2016

#

# Accessible filesystems, by reference, are maintained under ‘/dev/disk’

# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info

#

/dev/mapper/centos-root /                       xfs     defaults        0 0

UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot                   xfs     defaults        0 0

/dev/mapper/centos-swap swap                    swap    defaults        0 0

#上传文件在hadoop中的存储位置

[hadoop@node2 ~]$ cat /data/hadoop/hdfs/dn/current/BP-1026073846-192.168.1.130-1483772708588/current/finalized/subdir0/subdir0/blk_1073741825

#

# /etc/fstab

# Created by anaconda on Fri Sep  2 00:27:27 2016

#

# Accessible filesystems, by reference, are maintained under ‘/dev/disk’

# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info

#

/dev/mapper/centos-root /                       xfs     defaults        0 0

UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot                   xfs     defaults        0 0

/dev/mapper/centos-swap swap                    swap    defaults        0 0

[hadoop@node3 ~]$ cat /data/hadoop/hdfs/dn/current/BP-1026073846-192.168.1.130-1483772708588/current/finalized/subdir0/subdir0/blk_1073741825

#

# /etc/fstab

# Created by anaconda on Fri Sep  2 00:27:27 2016

#

# Accessible filesystems, by reference, are maintained under ‘/dev/disk’

# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) for more info

#

/dev/mapper/centos-root /                       xfs     defaults        0 0

UUID=a2f7c1a7-a991-470a-b5f5-1c4ed4e098b6 /boot                   xfs     defaults        0 0

/dev/mapper/centos-swap swap                    swap    defaults        0 0

[hadoop@node4 ~]$ ls /data/hadoop/hdfs/dn/current/BP-1026073846-192.168.1.130-1483772708588/current/finalized/

[hadoop@node1 ~]$ start-yarn.sh

starting yarn daemons

starting resourcemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-resourcemanager-node1.out

node4: starting nodemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-nodemanager-node4.out

node3: starting nodemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-nodemanager-node3.out

node2: starting nodemanager, logging to /bdapps/hadoop/logs/yarn-hadoop-nodemanager-node2.out

[hadoop@node1 ~]$ jps

35957 NameNode

36153 SecondaryNameNode

36980 Jps

36715 ResourceManager

[hadoop@node2 ~]$ jps

35996 Jps

35527 DataNode

35880 NodeManager

[hadoop@node3 ~]$ jps

35113 DataNode

35474 NodeManager

35591 Jps

[hadoop@node4 ~]$ jps

35477 NodeManager

35113 DataNode

35600 Jps

[root@node1 ~]# ss -tnl

State       Recv-Q Send-Q                             Local Address:Port                                            Peer Address:Port              

LISTEN      0      128                                            *:50090                                                      *:*                  

LISTEN      0      128                                192.168.1.130:8020                                                       *:*                  

LISTEN      0      5                                  192.168.122.1:53                                                         *:*                  

LISTEN      0      128                                            *:50070                                                      *:*                  

LISTEN      0      128                                            *:22                                                         *:*                  

LISTEN      0      128                                    127.0.0.1:631                                                        *:*                  

LISTEN      0      100                                    127.0.0.1:25                                                         *:*                  

LISTEN      0      128                                           :::22                                                        :::*                  

LISTEN      0      128                                          ::1:631                                                       :::*                  

LISTEN      0      128                         ::ffff:192.168.1.130:8088                                                      :::*                  

LISTEN      0      100                                          ::1:25                                                        :::*                  

LISTEN      0      128                                           :::8030                                                      :::*                  

LISTEN      0      128                         ::ffff:192.168.1.130:8031                                                      :::*                  

LISTEN      0      128                         ::ffff:192.168.1.130:8032                                                      :::*                  

LISTEN      0      128                         ::ffff:192.168.1.130:8033                                                      :::*          

[hadoop@node1 ~]$ hdfs dfs -put /etc/rc.d/init.d/functions /test

[hadoop@node1 ~]$ hdfs dfs -ls /test

Found 2 items

-rw-r–r–   2 hadoop supergroup        465 2017-01-07 15:16 /test/fstab

-rw-r–r–   2 hadoop supergroup      13948 2017-01-07 15:35 /test/functions

[hadoop@node1 ~]$ hdfs dfs -put hadoop-2.6.2.tar.gz /test/

#统计文本文件

[hadoop@node1 ~]$ yarn jar /bdapps/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.2.jar wordcount /test/fstab /test/functions /test/wc

[hadoop@node1 ~]$hdfs -ls /test/wc

[hadoop@node1 ~]$hdfs dfs -cat /test/wc/part-r-00000

hadoop应用

http://192.168.1.130:8088

管理hadoop

http://192.168.1.130:50070

2、YARN集群管理命令

[hadoop@node1 ~]$ yarn application -list

17/01/07 17:11:25 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032

Total number of applications (application-types: [] and states: [SUBMITTED, ACCEPTED, RUNNING]):0

                Application-Id      Application-Name        Application-Type          User           Queue                   State             Final-State          Progress                        Tracking-URL

[hadoop@node1 ~]$ yarn application -list -appStates=all

17/01/07 17:12:13 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032

Total number of applications (application-types: [] and states: [NEW, NEW_SAVING, SUBMITTED, ACCEPTED, RUNNING, FINISHED, FAILED, KILLED]):0

                Application-Id      Application-Name        Application-Type          User           Queue                   State             Final-State          Progress                        Tracking-URL

[hadoop@node1 ~]$ yarn application -status

[hadoop@node1 ~]$ yarn node -list

17/01/07 17:16:47 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032

Total Nodes:3

         Node-Id             Node-State Node-Http-Address       Number-of-Running-Containers

     node2:40860                RUNNING        node2:8042                                  0

     node3:47737                RUNNING        node3:8042                                  0

     node4:45637                RUNNING        node4:8042                                  0

 

[hadoop@node1 ~]$ yarn node -status node3:47737  

17/01/07 17:20:41 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8032

Node Report : 

        Node-Id : node3:47737

        Rack : /default-rack

        Node-State : RUNNING

        Node-Http-Address : node3:8042

        Last-Health-Update : Sat 07/Jan/17 05:18:54:160CST

        Health-Report : 

        Containers : 0

        Memory-Used : 0MB

        Memory-Capacity : 8192MB

        CPU-Used : 0 vcores

        CPU-Capacity : 8 vcores

        Node-Labels : 

[hadoop@node1 ~]$ yarn logs -applicationId  

[hadoop@node1 ~]$ yarn rmadmin -refreshNodes  

17/01/07 17:27:20 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.1.130:8033

原创文章,作者:Maggie-Hunter,如若转载,请注明出处:https://blog.ytso.com/tech/opensource/186874.html

(0)
上一篇 2021年11月4日 13:21
下一篇 2021年11月4日 13:21

相关推荐

发表回复

登录后才能评论