2012年8月22日 星期三

handout for hadoop cluster installation and deployment

172.16.229.128    172.16.229.129
+-----------+     +------------+
|Namenode   |     |Secondary   |
|nn01       |     |Namenode &  |
|           |     |JobTracker  |
|           |     |jt01        |
+-----+-----+     +------+-----+            +----NAT(@VM)
      |                  |                  |
------+------------------+------------------+
      |                  |                  |
+-----+-----+     +------+-----+     +------+-----+
|Datanode   |     |Datanode    |     |Datanode    |
|   &       |     |      &     |     |      &     |
|TaskTracker|     |TaskTracker |     |TaskTracker |
|dn01       |     |dn02        |     |dn02        |
+-----------+     +------------+     +------------+
172.16.229.130    172.16.229.131     172.16.229.132

Environment: (@VM)

        host : CentOS-6.0
        kernel: 2.6.32-71.el6.x86_64 #1 SMP
        MemTotal: 7891424 kB(/proc/meminfo)
        CPU(Quad core) : Intel(R) Core(TM) i3-2130 CPU @ 3.40GHz
        HDD: 1TB*2 (LVM ==> 2T)

        guest: CentOS-6.0
        kernel: 2.6.32-71.el6.x86_64 #1 SMP
        MemTotal: 1G
        CPU *1
        HDD: 200GB

@NFS Server(VM host)

yum install -y rpcbind nfs-utils
mkdir /home/nfs
vi /etc/exports
  /home/nfs         172.16.229.0/24(rw,sync,no_root_squash) *(ro,sync,no_root_squash)
chmod -R 777 /home/nfs
#nfs permission options may include 'no_root_squash',
#or you would get error when you format your namenode(sudo -u hdfs hadoop namenode -format)
#some error when you lsof: (/nfsmount/dfs/nn/current/nfs....... -> java will be stuck by the file)
#no enough permissin for hdfs


chkconfig rpcbind on
chkconfig nfs on
chkconfig nfslock on
service rpcbind start
service nfs start
service nfslock start

showmount -e localhost
  Export list for localhost:
  /home/nfs 172.16.229.0/24,localhost.localdomain

mkdir /mnt/test
mount -t nfs -o tcp,soft,intr,timeo=10,retrans=10 172.16.229.1:/home/nfs /mnt/test
umount /mnt/test


@all hadoop/hdfs nodes(VM guests)

[root@nn01 yum.repos.d]# cat /etc/hosts
172.16.43.248   HT248   # Added by NetworkManager
127.0.0.1       localhost.localdomain   localhost
::1     HT248   localhost6.localdomain6 localhost6
172.16.43.248 HT248
172.16.229.128 hd01 nn01 namenode nn01.hd
172.16.229.129 hd02 jt01 jobtracker  nn02 #secondary namenode
172.16.229.130 hd03 dn01 datanode01
172.16.229.131 hd04 dn02 datanode02
172.16.229.132 hd05 dn03 datanode03

[root@nn01 ~]# scp /etc/hosts jt01:/etc/.
[root@nn01 ~]# scp /etc/hosts dn01:/etc/.
[root@nn01 ~]# scp /etc/hosts dn02:/etc/.
[root@nn01 ~]# scp /etc/hosts dn03:/etc/.


ssh password-less for all hosts


#got cdh3 gpg key for all hosts
rpm --import http://archive.cloudera.com/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera

#add a repository of cdh3 for all hosts
[cloudera-cdh3]
name=Cloudera's Distribution for Hadoop, Version 3
mirrorlist=http://archive.cloudera.com/redhat/6/x86_64/cdh/3/mirrors
gpgkey = http://archive.cloudera.com/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera   
gpgcheck = 1

#install hadoop and hadoop-native for all hosts
yum install hadoop-0.20 hadoop-0.20-native -y
yum install hadoop-0.20-namenode hadoop-0.20-datanode hadoop-0.20-secondarynamenode hadoop-0.20-jobtracker hadoop-0.20-tasktracker nfs-utils -y


[root@nn01 ~]# yum install java-1.6.0-openjdk java-1.6.0-openjdk-devel -y
[root@nn01 ~]# vi /usr/lib/hadoop-0.20/conf/hadoop-env.sh
# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME.  All others are
# optional.  When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use.  Required.
# export JAVA_HOME=/usr/lib/j2sdk1.6-sun
export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64

[root@nn01 ~]# vi /etc/profile
  :
export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64

[root@nn01 ~]# scp /usr/lib/hadoop-0.20/conf/hadoop-env.sh jt01:/usr/lib/hadoop-0.20/conf/.
[root@nn01 ~]# scp /usr/lib/hadoop-0.20/conf/hadoop-env.sh dn01:/usr/lib/hadoop-0.20/conf/.
[root@nn01 ~]# scp /usr/lib/hadoop-0.20/conf/hadoop-env.sh dn02:/usr/lib/hadoop-0.20/conf/.
[root@nn01 ~]# scp /usr/lib/hadoop-0.20/conf/hadoop-env.sh dn03:/usr/lib/hadoop-0.20/conf/.
[root@nn01 ~]# scp /etc/profile jt01:/etc/.
[root@nn01 ~]# scp /etc/profile dn01:/etc/.
[root@nn01 ~]# scp /etc/profile dn02:/etc/.
[root@nn01 ~]# scp /etc/profile dn03:/etc/.

##config open files limits(@ all hosts)
vi /etc/security/limits.conf
  :
* - nofile 65536
  :
# or
  :
hdfs - nofile 65536        #Hadoop processes owned by hdfs(@ hadoop cluster ?!)
hypertable - nofile 65536    #Hypertable processes owned by hypertable

#and then re-login , or reboot

[root@nn01 ~]# ulimit -a
  :
open files                      (-n) 65536
  :


#close all hadoop service for all hosts, before hdfs was installed successful.
chkconfig hadoop-0.20-namenode off
chkconfig hadoop-0.20-jobtracker off
chkconfig hadoop-0.20-secondarynamenode off
chkconfig hadoop-0.20-tasktracker off
chkconfig hadoop-0.20-datanode off

#@nn01
chkconfig rpcbind on
chkconfig nfslock on
service rpcbind start
service nfslock start

#mkdir for all hosts
##Create the dfs.name.dir local directories
mkdir -p /data/1/dfs/nn /nfsmount/dfs/nn

##Create the dfs.data.dir local directories(as volumes of datanode)
mkdir -p /data/1/dfs/dn /data/2/dfs/dn /data/3/dfs/dn /data/4/dfs/dn

##Create the mapred.local.dir local directories
mkdir -p /data/1/mapred/local /data/2/mapred/local /data/3/mapred/local /data/4/mapred/local

##owner and permission config
chown -R hdfs:hadoop /data/1/dfs/nn /nfsmount/dfs/nn /data/1/dfs/dn /data/2/dfs/dn /data/3/dfs/dn /data/4/dfs/dn
chown -R mapred:hadoop /data/1/mapred/local /data/2/mapred/local /data/3/mapred/local /data/4/mapred/local
chmod -R 700 /data/1/dfs/nn /nfsmount/dfs/nn
chmod -R 700 /data/1/dfs/dn /data/2/dfs/dn /data/3/dfs/dn /data/4/dfs/dn
chmod -R 755 /data/1/mapred/local /data/2/mapred/local /data/3/mapred/local /data/4/mapred/local


#mount nfs(CentOS6) from VM host(only @nn01)
mount -t nfs -o tcp,soft,intr,timeo=10,retrans=10 172.16.229.1:/home/nfs /nfsmount/dfs/nn
umount /nfsmount/dfs/nn
vi /etc/fstab
  172.16.229.1:/home/nfs    /nfsmount/dfs/nn    nfs tcp,soft,intr,timeo=10,retrans=10 1 1
mount /nfsmount/dfs/nn

#custumizing config for all hosts : make a copy of "empty" configuration
cp -r  /etc/hadoop-0.20/conf.empty /etc/hadoop-0.20/conf.my_cluster

#custumizing config for all hosts : make a higher priority of the config
alternatives --install /etc/hadoop-0.20/conf hadoop-0.20-conf /etc/hadoop-0.20/conf.my_cluster 50

alternatives --display hadoop-0.20-conf

#custumizing config for all hosts : core-site.xml(/etc/hadoop/conf.my_cluster/core-site.xml)
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
            <name>fs.default.name</name>
            <value>hdfs://nn01:9000</value>
         </property>
    </configuration>

#custumizing config for hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
            <name>dfs.name.dir</name>
            <value>/data/1/dfs/nn,/nfsmount/dfs/nn</value>
        </property>
        <property>
            <name>dfs.data.dir</name>
            <value>/data/1/dfs/dn,/data/2/dfs/dn,/data/3/dfs/dn</value>
        </property>
        <property>
            <name>dfs.webhdfs.enabled</name>
            <value>true</value>
        </property>
        <property>
            <name>dfs.datanode.failed.volumes.tolerated</name>
            <value>2</value>
        </property>
        <property>
            <name>dfs.http.address</name>
            <value>nn01:50070</value>
        </property>

    </configuration>

#custumizing config for  mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
        <property>
            <name>mapred.job.tracker</name>
            <value>jt01:9001</value>
        </property>
        <property>
            <name>mapred.local.dir</name>
            <value>/data/1/mapred/local,/data/2/mapred/local,/data/3/mapred/local</value>
        </property>
        <property>
            <name>mapreduce.jobtracker.restart.recover</name>
            <value>true</value>
        </property>

    </configuration>

#custumizing config for masters and slaves
##/etc/hadoop/conf.my_cluster/masters, jt01 for Secondary Namenode
nn01
jt01

##/etc/hadoop/conf.my_cluster/slaves
dn01
dn02
dn03

#deployment all nodes
[root@nn01 ~]# scp -r /etc/hadoop/conf.my_cluster jt01:/etc/hadoop/.
[root@nn01 ~]# scp -r /etc/hadoop/conf.my_cluster dn01:/etc/hadoop/.
[root@nn01 ~]# scp -r /etc/hadoop/conf.my_cluster dn02:/etc/hadoop/.
[root@nn01 ~]# scp -r /etc/hadoop/conf.my_cluster dn03:/etc/hadoop/.


##Check conf priority in all hosts
alternatives --display hadoop-0.20-conf
### /etc/hadoop-0.20/conf.my_cluster is suppose to  50(the highest)

#format namenode @nn01
[root@nn01 ~]# sudo -u hdfs hadoop namenode -format
12/08/22 19:05:42 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = nn01/172.16.229.128
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 0.20.2-cdh3u5
STARTUP_MSG:   build = file:///data/1/tmp/topdir/BUILD/hadoop-0.20.2-cdh3u5 -r 580d1d26c7ad6a7c6ba72950d8605e2c6fbc96cc; compiled by 'root' on Mon Aug  6 20:22:48 PDT 2012
************************************************************/
Re-format filesystem in /data/1/dfs/nn ? (Y or N) Y
Re-format filesystem in /nfsmount/dfs/nn ? (Y or N) Y
12/08/22 19:08:35 INFO util.GSet: VM type       = 64-bit
12/08/22 19:08:35 INFO util.GSet: 2% max memory = 19.33375 MB
12/08/22 19:08:35 INFO util.GSet: capacity      = 2^21 = 2097152 entries
  :
  :
  :
## UPPER-CASE 'Y' is neccessary when you would like to re-format namenode.



#####NOTE: if you got error like
+======================================================================+
|      Error: JAVA_HOME is not set and Java could not be found         |
+----------------------------------------------------------------------+
| Please download the latest Sun JDK from the Sun Java web si/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64te        |
|       > http://java.sun.com/javase/downloads/ <                      |
|                                                                      |
| Hadoop requires Java 1.6 or later.                                   |
| NOTE: This script will find Sun Java whether you install using the   |
|       binary or the RPM based installer.                             |
+======================================================================+

##check your $JAVA_HOME is set in /usr/lib/hadoop-0.20/conf/hadoop-env.sh

#start hdfs service right now and auto-start when reboot
## Namenode@nn01
[root@nn01 ~]# service hadoop-0.20-namenode start
[root@nn01 ~]# chkconfig hadoop-0.20-namenode on

## Secondary Namenode @jt01
[root@jt01 ~]# service hadoop-0.20-secondarynamenode start
[root@jt01 ~]# chkconfig hadoop-0.20-secondarynamenode on

## DataNode @dn01~dn03
[root@dn01 ~]# service hadoop-0.20-datanode start
[root@dn02 ~]# service hadoop-0.20-datanode start
[root@dn03 ~]# service hadoop-0.20-datanode start
[root@dn01 ~]# chkconfig hadoop-0.20-datanode on
[root@dn02 ~]# chkconfig hadoop-0.20-datanode on
[root@dn03 ~]# chkconfig hadoop-0.20-datanode on

#Testing around nodes
[root@nn01 ~]# sudo -u hdfs hadoop fs -mkdir /tmp
[root@jt01 ~]# sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
[root@dn03 ~]# sudo -u hdfs hadoop fs -ls /
Found 1 items
drwxrwxrwt   - hdfs supergroup          0 2012-08-22 23:02 /tmp
##watch out file permission inside hdfs, serveral user need with Write-access Permission
## such as : hdfs / root / mapred ... 


#start MapReduce service right now and auto-start when reboot
## JobTracker @jt01
[root@jt01 ~]# service hadoop-0.20-jobtracker start
[root@jt01 ~]# chkconfig hadoop-0.20-jobtracker on

## TaskTracker @dn01~dn03
[root@dn01 ~]# chkconfig hadoop-0.20-tasktracker on
[root@dn02 ~]# chkconfig hadoop-0.20-tasktracker on
[root@dn03 ~]# chkconfig hadoop-0.20-tasktracker on
[root@dn01 ~]# service hadoop-0.20-tasktracker start
[root@dn02 ~]# service hadoop-0.20-tasktracker start
[root@dn03 ~]# service hadoop-0.20-tasktracker start

#After all hosts rebooted

[root@nn01 ~]# netstat -ptlnu | grep java
tcp        0      0 172.16.229.128:9000         0.0.0.0:*                   LISTEN      1486/java          
tcp        0      0 172.16.229.128:50070        0.0.0.0:*                   LISTEN      1486/java          
tcp        0      0 0.0.0.0:60551               0.0.0.0:*                   LISTEN      1486/java          

[root@jt01 ~]# netstat -ptlnu | grep java
tcp        0      0 172.16.229.129:9001         0.0.0.0:*                   LISTEN      1387/java          
tcp        0      0 0.0.0.0:50090               0.0.0.0:*                   LISTEN      1468/java          
tcp        0      0 0.0.0.0:34733               0.0.0.0:*                   LISTEN      1387/java          
tcp        0      0 0.0.0.0:50030               0.0.0.0:*                   LISTEN      1387/java          
tcp        0      0 0.0.0.0:59731               0.0.0.0:*                   LISTEN      1468/java          

[root@dn01 ~]# netstat -ptlnu | grep java
tcp        0      0 0.0.0.0:50060               0.0.0.0:*                   LISTEN      1515/java          
tcp        0      0 0.0.0.0:50010               0.0.0.0:*                   LISTEN      1386/java          
tcp        0      0 0.0.0.0:55162               0.0.0.0:*                   LISTEN      1386/java          
tcp        0      0 0.0.0.0:50075               0.0.0.0:*                   LISTEN      1386/java          
tcp        0      0 0.0.0.0:50020               0.0.0.0:*                   LISTEN      1386/java          
tcp        0      0 127.0.0.1:45221             0.0.0.0:*                   LISTEN      1515/java          

[root@dn02 ~]# netstat -ptlnu | grep java
tcp        0      0 0.0.0.0:50060               0.0.0.0:*                   LISTEN      1528/java          
tcp        0      0 0.0.0.0:47663               0.0.0.0:*                   LISTEN      1399/java          
tcp        0      0 0.0.0.0:50010               0.0.0.0:*                   LISTEN      1399/java          
tcp        0      0 127.0.0.1:55771             0.0.0.0:*                   LISTEN      1528/java          
tcp        0      0 0.0.0.0:50075               0.0.0.0:*                   LISTEN      1399/java          
tcp        0      0 0.0.0.0:50020               0.0.0.0:*                   LISTEN      1399/java          

[FAQ]

(1) namenode start-up error
  PriviledgedActionException as:hdfs (auth:SIMPLE) cause:java.net.BindException:

  checking hdfs-site.xml and masters
  if secondary-name node has been assigned , you should list it into masters

(2) for more performance, modify /etc/sysctl.conf
  vi /etc/sysctl.conf
    vm.swappiness=0
  sysctl -p
  #for more memory usage( not mention of zero-swap usage)
(3) Secodary Namenode setting:
  1) hdfs-site.xml@SecondaryNameNode(or all hosts) should contain:
        <property>
            <name>dfs.http.address</name>
            <value>nn01:50070</value>
        </property>
      #nn01 is namenode
  2) masters(all hosts) should contain:
     nn01
     jt01
     #Namenode and SecondaryNameNode
  3) startup namenode service @nn01
  4) startup secondarynamenode service @jt01
  Note: (refer to https://ccp.cloudera.com/display/CDHDOC/CDH3+Deployment+on+a+Cluster)
The Secondary NameNode actually performs an HTTP GET request to retrieve the current fsimage, (checkpoint), and edits log from the NameNode and the HTTP PUT request to upload the new checkpoint back to the NameNode. The dfs.http.address property defines the mask and host where the NameNode listens for the Secondary NameNode to connect.

沒有留言:

張貼留言

文章分類