2012年7月31日 星期二

hadoop with hdfs and kfs on CentOS 6 x86_64 -- single node


(1)Environment: (@vmware)

    host : CentOS-6.0
    kernel: 2.6.32-71.el6.x86_64 #1 SMP
    MemTotal: 7891424 kB(/proc/meminfo)
    CPU(Quad core) : Intel(R) Core(TM) i3-2130 CPU @ 3.40GHz
    HDD: 1TB*2 (LVM ==> 2T)

    guest: CentOS-6.0 *2 (hadoop/hdfs and kfs)
    kernel: 2.6.32-71.el6.x86_64 #1 SMP
    MemTotal: 1G
    CPU *1
    HDD: 100GB
    hostname for kfs : metaserver

    hostname for hdfs/hadoop : nn-01
    /etc/hosts
        192.168.65.142 nn-01 hadoop1 ## single node for  hadoop with hdfs
        192.168.65.135 metaserver ## single node for kfs

(2)kfs installation
    see [KFS](KosmosFS) 安裝方法一

(3) hadoop installation and startup
    useradd -u 1000 hadoop

    su - hadoop
    mkdir ~/.ssh; ssh-keygen -t dsa
    mv ~/.ssh/id_dsa.pub ~/.ssh/authorized_keys ## for ssh password-less
    yum install java-1.6.0-openjdk.x86_64 java-1.6.0-openjdk-devel.x86_64 -y
    wget wget http://archive.cloudera.com/cdh/3/hadoop-0.20.2-cdh3u3.tar.gz
    tar -zxvf hadoop-0.20.2-cdh3u3.tar.gz
    ln -s hadoop-0.20.2-cdh3u3 hadoop

    ## append the following line into /etc/profile(by root user) , ~/.bashrc(by hadoop user) , /home/hadoop/hadoop/conf/hadoop-env.sh(by hadoop user)

    export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64

    ## configure (by hadoop user)
    vi /home/hadoop/hadoop/conf/core-site.xml
        <?xml version="1.0"?>

        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>fs.default.name</name>
                <value>hdfs://localhost:9000</value>
            </property>
        </configuration>

        value of fs.default.name also could be hdfs://hadoop1:9000


    vi /home/hadoop/hadoop/conf/hdfs-site.xml
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>dfs.replication</name>
                <value>1</value>
            </property>
        </configuration>

    vi /home/hadoop/hadoop/conf/mapred-site.xml
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>mapred.job.tracker</name>
                <value>localhost:9001</value>
            </property>
        </configuration>

    value of mapred.job.tracker also could be hadoop1:9001

    vi /home/hadoop/hadoop/conf/masters
        localhost

        "hadoop1" is also fine.

    vi /home/hadoop/hadoop/conf/slaves
        localhost

        "hadoop1" is also fine.

    ## startup hadoop (by hadoop user)
    su - hadoop; cd ~/hadoop
    bin/hadoop namenode -format
    #if you got error , it might be your JAVA_HOME is not setting correct.

    bin/start-all.sh

    bin/hadoop fs -ls /

    bin/hadoop fs -put conf/core-site.xml /test.xml
    bin/hadoop fs -ls /

    netstat -tplnu | grep 9000 ## check hdfs listen port

    bin/stop-all.sh   # stop hdfs for changing filesystem to kfs
 
(4)chang to kfs
    ## root@metaserver
    cd /usr/loca/kfs-0.5
    ant jar       # see build/kfs-0.5.jar
    scp build/kfs-0.5.jar hadoop1:/home/hadoop/hadoop/lib/.
    scp build/lib/* hadoop1:/home/hadoop/hadoop/lib/native/Linux-adm64-64
 
   
 
    ## root@hadoop1
    cd /home/hadoop/hadoop/lib
    mv kfs-0.x.y.jar kfs-0.x.y.jar.bak ## rename old version
    chown hadoop.hadoop kfs-0.5.jar
    chown -R hadoop.hadoop /home/hadoop/hadoop/lib/native/Linux-adm64-64
    vi /etc/profile

            :
            :
        export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64

        export LD_LIBRARY_PATH=/home/hadoop/hadoop/lib/native/Linux-adm64-64

        export HADOOP_PID_DIR=/home/hadoop/hadoop/tmp
        #if you changed config file path , modified this parameter
        #export HADOOP_CONF_DIR=/home/hadoop/hadoop/conf

    ## hadoop@hadoop1

    vi ~/.bashrc & vi ~/hadoop/conf/hadoop-env.sh
            :

            :
        export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64
        export LD_LIBRARY_PATH=/home/hadoop/hadoop/lib/native/Linux-adm64-64
        export HADOOP_PID_DIR=/home/hadoop/hadoop/tmp
        #if you changed config file path , modified this parameter
        #export HADOOP_CONF_DIR=/home/hadoop/hadoop/conf

    vi ~/hadoop/conf/core-site.xml
        <?xml version="1.0"?>

        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>fs.default.name</name>
                <value>kfs://metaserver:20000</value>
            </property>
            <property>
                <name>fs.kfs.metaServerHost</name>
                <value>metaserver</value>
                <description>The location of the KFS meta server.</description>
            </property>
            <property>
                <name>fs.kfs.metaServerPort</name>
                <value>20000</value>
                <description>The location of the meta server's port.</description>
            </property>
        </configuration>

    ## testing
    cd ~/hadoop
    bin/hadoop fs -fs kfs://metaserver:20000 -ls /

    ## and then you could start the hadoop up as well , command is the same as hadoop/hdfs

reference: http://wangwei3.iteye.com/blog/895867
        http://archive.cloudera.com/cdh/3/hadoop-0.20.2-cdh3u3/single_node_setup.html
        http://sourceforge.net/apps/trac/kosmosfs/wiki/UsingWithHadoop
        http://trac.nchc.org.tw/cloud/wiki/Hadoop_Lab1

沒有留言:

張貼留言

文章分類