Archive for Juli 19th, 2015

Raspberry Pi Hadoop Cluster – to get into distributed computing and Big Data processing

Sonntag, Juli 19th, 2015

raspberrypi_logo.jpg   hadoop_logo   Raspberry Pi Hadoop Cluster is built up by a number of components and Open Source frameworks which makes it quite flexible and modular – before diving deeper into Hadoop it is easier to view it as two main parts – the data storage (HDFS) and the data processing (MapReduce)

Hadoop-HDFS_01

Hadoop-MapReduce-WordCount_01

RaspberryPiHadoop_01
# vi /etc/network/interfaces
iface eth0 inet static
address 192.168.1.201(-204)
netmask 255.255.255.0
gateway 192.168.1.254
# vi /etc/resolv.conf
nameserver 192.168.1.254
# apt-get update
# apt-get upgrade -y
# rpi-update
# vi /etc/dphys-swapfile
CONF_SWAPSIZE=512
# dphys-swapfile setup
# dphys-swapfile swapon
# java -Version
# addgroup hadoop
# adduser –ingroup hadoop hduser
# adduser hduser sudo
# su – hduser
# mkdir .ssh
# ssh-keygen -t rsa -C hduser@JSHOHadoop01
# cat .ssh/id_rsa.pub | ssh hduser@JSHOHadoop01 ‚cat >> .ssh/authorized_keys‘
# su – hduser
# ssh JSHOHadoop01
# su – root
# wget http://apache.mirrors.spacedump.net/hadoop/core/hadoop-1.2.1/hadoop-1.2.1.tar.gz
# tar -xvzf hadoop-1.2.1.tar.gz -C /opt/
# cd /opt
# mv hadoop-1.2.1 hadoop
# chown -R hduser:hadoop hadoop
# vi /etc/bash.bashrc
export JAVA_HOME=$(readlink -f /usr/bin/java | sed „s:bin/java::“)
export HADOOP_INSTALL=/opt/hadoop
export PATH=$PATH:$HADOOP_INSTALL/bin
# su – hduser
# hadoop Version
# vi /opt/hadoop/conf/hadoop-env.sh
export JAVA_HOME=$(readlink -f /usr/bin/java | sed „s:bin/java::“)
export HADOOP_HEAPSIZE=250
export HADOOP_DATANODE_OPTS=“-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS -client“

Single Node

 # vi /opt/hadoop/conf/core-site.xml
<configuration>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/hdfs/tmp</value>
  </property>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://localhost:54310</value>
  </property>
</configuration>
# vi /opt/hadoop/conf/mapred-site.xml
<configuration>
  <property>
    <name>mapred.job.tracker</name>
    <value>localhost:54311</value>
  </property>
</configuration>
# vi /opt/hadoop/conf/hdfs-site.xml
<configuration>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
</configuration>
# su – root
# mkdir -p /hdfs/tmp
# chown hduser:hadoop /hdfs/tmp
# chmod 750 /hdfs/tmp
# su – hduser

# hadoop namenode -Format
# su – hduser

# /opt/hadoop/bin/start-dfs.sh
# /opt/hadoop/bin/start-mapred.sh
# jps
# hadoop jar /opt/hadoop/hadoop-examples-1.2.1.jar wordcount /license.txt /license-out.txt

Multiple Node

# vi /opt/hadoop/conf/masters
JSHOHadoop01
# vi core-site.xml
<configuration>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/hdfs/tmp</value>
  </property>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://JSHOHadoop01:54310</value>
  </property>
</configuration>
# vi vi mapred-site.xml
<configuration>
  <property>
    <name>mapred.job.tracker</name>
    <value>JSHOHadoop01:54311</value>
  </property>
</configuration>
# vi /opt/hadoop/conf/hdfs-site.xml
<configuration>
  <property>
    <name>dfs.replication</name>
    <value>4</value>
  </property>
</configuration>
# vi /opt/hadoop/conf/slaves
JSHOHadoop01
JSHOHadoop02
JSHOHadoop03
JSHOHadoop04
# su – hduser
# ssh JSHOHadoop01
# exit
# ssh JSHOHadoop02
# exit
# ssh JSHOHadoop03
# exit
# ssh JSHOHadoop04
# exit
# hadoop namenode -format
# /opt/hadoop/bin/start-dfs.sh
# /opt/hadoop/bin/start-mapred.sh
http://JSHOHadoop01:50030
http://JSHOHadoop02:50070

# /opt/hadoop/bin/stop-mapred.sh
# /opt/hadoop/bin/stop-dfs.sh
# /opt/hadoop/bin/start-all.sh
# /opt/hadoop/bin/stop-all.sh