Hadoop、Hive简单部署说明

以下安装包下载地址:http://www.chocolee.cn/download/hadoop-hive/

hadooplogo.png

第一部分 前期工作

系统版本:Red Hat Enterprise Linux Server release 6.4 (Santiago)

2.6.32-358.el6.x86_64

hadoop版本:2.6.0

hive版本:1.0.0

mysql版本:5.5.32

JDK版本:1.7.0_25

IP地址 主机名 预装服务
192.168.199.123 namenode hadoop,hive,mysql,jdk
192.168.199.124 datanode hadoop,jdk

1.1创建hadoop用户

#namnode
[root@namenode ~]# useradd hadoop
[root@namenode ~]# echo 'hadoop' |passwd hadoop --stdin
Changing password for user hadoop.
passwd: all authentication tokens updated successfully.
#datanode
[root@datanode ~]# useradd hadoop
[root@datanode ~]# echo 'hadoop' |passwd hadoop --stdin
Changing password for user hadoop.
passwd: all authentication tokens updated successfully.

1.2修改hosts文件及hostname

#namnode
[root@namenode tmp]# cat /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=namenode
[root@namenode ~]# cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.199.123 namenode
192.168.199.124 datanode
#datanode
[root@datanode ~]# cat /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=datanode
[root@datanode ~]# cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.199.123 namenode
192.168.199.124 datanode

1.3配置免密钥登陆

#注意,这里需要配置从namenode到datanode,还要配置namenode到namenode
[root@namenode ~]# su - hadoop
[hadoop@namenode ~]$ ssh-keygen -t dsa
Generating public/private dsa key pair.
Enter file in which to save the key (/home/hadoop/.ssh/id_dsa): 
Created directory '/home/hadoop/.ssh'.
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /home/hadoop/.ssh/id_dsa.
Your public key has been saved in /home/hadoop/.ssh/id_dsa.pub.
The key fingerprint is:
a2:6f:80:fc:d7:9e:f2:fa:d7:8a:c9:bf:be:5d:99:2b hadoop@namenode
The key's randomart image is:
+--[ DSA 1024]----+
|                 |
|                 |
|                 |
|                 |
| . .  . S        |
|  o .. .       o |
|   ... .   .  +  |
|    ..+..+..E. . |
|     oo*O+*+...  |
+-----------------+
[hadoop@namenode ~]$ cd .ssh/
[hadoop@namenode .ssh]$ ssh-copy-id -i id_dsa.pub 192.168.199.123
The authenticity of host '192.168.199.123 (192.168.199.123)' can't be established.
RSA key fingerprint is ea:45:01:09:21:ca:dd:e6:77:f5:51:27:c9:ba:16:c9.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added '192.168.199.123' (RSA) to the list of known hosts.
hadoop@192.168.199.123's password: 
Now try logging into the machine, with "ssh '192.168.199.123'", and check in:
 
  .ssh/authorized_keys
 
to make sure we haven't added extra keys that you weren't expecting.
 
[hadoop@namenode .ssh]$ ssh-copy-id -i id_dsa.pub 192.168.199.124
The authenticity of host '192.168.199.124 (192.168.199.124)' can't be established.
RSA key fingerprint is ea:45:01:09:21:ca:dd:e6:77:f5:51:27:c9:ba:16:c9.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added '192.168.199.124' (RSA) to the list of known hosts.
hadoop@192.168.199.124's password: 
Now try logging into the machine, with "ssh '192.168.199.124'", and check in:
 
  .ssh/authorized_keys
 
to make sure we haven't added extra keys that you weren't expecting.

1.4配置JDK

#jdk已事先配置好,将jdk目录放在hadoop用户根目录下
[hadoop@namenode ~]$ ll
total 4
drwxr-xr-x. 8 hadoop hadoop 4096 Nov  6 11:23 jdk1.7.0_25
[hadoop@datanode ~]$ ll
total 4
drwxr-xr-x. 8 hadoop hadoop 4096 Nov  6 11:23 jdk1.7.0_25

第二部分 部署hadoop

2.1 配置hadoop用户环境变量

#hive我们安装在namenode上,所以在datanode把hive的环境变量去掉
[hadoop@namenode ~]$ cat .bash_profile 
# .bash_profile
 
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
        . ~/.bashrc
fi
 
# User specific environment and startup programs
 
JAVA_HOME=/home/hadoop/jdk1.7.0_25
JRE_HOME=/home/hadoop/jdk1.7.0_25/jre
HADOOP_HOME=/home/hadoop/hadoop
HIVE_HOME=/home/hadoop/hive
PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$PATH
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib:
export JAVA_HOME JRE_HOME HADOOP_HOME PATH CLASSPATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib:./lib:/lib:/usr/lib:/usr/libexec:/usr/lib64
PATH=$PATH:$HOME/bin
export PATH
[hadoop@namenode ~]$ . .bash_profile 
[hadoop@namenode ~]$ which java
~/jdk1.7.0_25/bin/java

2.2 配置hadoop

#将编译好的hadoop程序包拷贝到hadoop用户根目录下
[hadoop@namenode ~]$ ll -d hadoop
drwxr-xr-x. 10 hadoop hadoop 4096 Mar 19 11:30 hadoop
 
#etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/hadoop/jdk1.7.0_25
export HADOOP_HOME=/home/hadoop/hadoop
export PATH=$PATH:/home/hadoop/hadoop/bin
 
#etc/hadoop/core-site.xml
<configuration>
   <property>
      <name>fs.defaultFS</name>
      <value>hdfs://namenode:9000</value>
   </property>
   <property>
      <name>io.file.buffer.size</name>
      <value>131072</value>
      <description>Size of read/write buffer used in SequenceFiles,The default value is 131072</description>
  </property>
</configuration>
 
#etc/hadoop/hdfs-site.xml
<configuration>
    <property>
       <name>dfs.namenode.name.dir</name>
       <value>file:///home/hadoop/hadoopdata/hdfs/namenode</value>
    </property>
    <property>
       <name>dfs.datanode.data.dir</name>
       <value>file:///home/hadoop/hadoopdata/hdfs/datanode</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
</configuration>
 
#etc/hadoop/mapred-site.xml
<configuration>
  <property>
      <name>mapreduce.framework.name</name>
      <value>yarn</value>
      <description>Execution framework set to Hadoop YARN.</description>
  </property>
</configuration>
 
#etc/hadoop/yarn-site.xml
<configuration>
 
<!-- Site specific YARN configuration properties -->
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
    <description>Shuffle service that needs to be set for Map Reduce applications.</description>
</property>
<property>
   <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
   <value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
   <name>yarn.resourcemanager.address</name>
   <value>namenode:8032</value>
</property>
<property>
   <name>yarn.resourcemanager.scheduler.address</name>
   <value>namenode::8030</value>
</property>
<property>
   <name>yarn.resourcemanager.resource-tracker.address</name>
   <value>namenode::8035</value>
</property>
<property>
   <name>yarn.resourcemanager.admin.address</name>
   <value>namenode::8033</value>
</property>
<property>
   <name>yarn.resourcemanager.webapp.address</name>
   <value>namenode::8088</value>
</property>
</configuration>
 
#etc/hadoop/slaves
datanode
 
配置完成后,保证namenode 和datanode 设备hadoop程序相同。

2.3 namenodehadoop程序推送到datanode hadoop用户根目录下

#推送hadoop程序
[hadoop@namenode ~]$ scp -r hadoop/ datanode:/home/hadoop
#推送环境变量
[hadoop@namenode ~]$ scp .bash_profile  datanode:/home/hadoop
.bash_profile                                                             100%  598     0.6KB/s   00:00

2.4 修改datanode hadoop用户环境变量

[hadoop@datanode ~]$ cat .bash_profile 
# .bash_profile
 
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
        . ~/.bashrc
fi
 
# User specific environment and startup programs
 
JAVA_HOME=/home/hadoop/jdk1.7.0_25
JRE_HOME=/home/hadoop/jdk1.7.0_25/jre
HADOOP_HOME=/home/hadoop/hadoop
PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$HADOOP_HOME/bin:$PATH
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib:
export JAVA_HOME JRE_HOME HADOOP_HOME PATH CLASSPATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib:./lib:/lib:/usr/lib:/usr/libexec:/usr/lib64
PATH=$PATH:$HOME/bin
export PATH
[hadoop@datanode ~]$ . .bash_profile

2.5 namenode格式化及启动hadoop

#namenode格式化
[hadoop@namenode hadoop]$ ./bin/hadoop namenode -format
...
15/04/01 17:07:17 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
15/04/01 17:07:17 INFO util.ExitUtil: Exiting with status 0
15/04/01 17:07:17 INFO namenode.NameNode: SHUTDOWN_MSG: 
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at namenode/192.168.199.123
************************************************************/
 
#启动hadoop
[hadoop@namenode hadoop]$ ./sbin/start-all.sh 
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [namenode]
The authenticity of host 'namenode (192.168.199.123)' can't be established.
RSA key fingerprint is ea:45:01:09:21:ca:dd:e6:77:f5:51:27:c9:ba:16:c9.
Are you sure you want to continue connecting (yes/no)? yes
namenode: Warning: Permanently added 'namenode' (RSA) to the list of known hosts.
namenode: starting namenode, logging to /home/hadoop/hadoop/logs/hadoop-hadoop-namenode-namenode.out
datanode: starting datanode, logging to /home/hadoop/hadoop/logs/hadoop-hadoop-datanode-datanode.out
Starting secondary namenodes [0.0.0.0]
The authenticity of host '0.0.0.0 (0.0.0.0)' can't be established.
RSA key fingerprint is ea:45:01:09:21:ca:dd:e6:77:f5:51:27:c9:ba:16:c9.
Are you sure you want to continue connecting (yes/no)? yes
0.0.0.0: Warning: Permanently added '0.0.0.0' (RSA) to the list of known hosts.
0.0.0.0: starting secondarynamenode, logging to /home/hadoop/hadoop/logs/hadoop-hadoop-secondarynamenode-namenode.out
starting yarn daemons
starting resourcemanager, logging to /home/hadoop/hadoop/logs/yarn-hadoop-resourcemanager-namenode.out
datanode: starting nodemanager, logging to /home/hadoop/hadoop/logs/yarn-hadoop-nodemanager-datanode.out

2.6 检查hadoop是否正常启动

#namenode检查
[hadoop@namenode hadoop]$ jps
17189 SecondaryNameNode
18178 Jps
17383 ResourceManager
16971 NameNode
 
#datanode检查
[hadoop@datanode ~]$ jps
29371 Jps
29243 NodeManager
29141 DataNode

网页检查IE输入http://192.168.199.123:50070/

1.png

第三部分 部署hive

3.1 配置hive

#hive/conf/hive-site.xml
<property>
  <name>hive.exec.local.scratchdir</name>
  <value>/home/hadoop/hive/iotmp/hadoop</value>
  <description>Local scratch space for Hive jobs</description>
</property>
<property>
  <name>hive.downloaded.resources.dir</name>
  <value>/home/hadoop/hive/iotmp/${hive.session.id}_resources</value>
  <description>Temporary local directory for added resources in the remote file system.</description>
</property>
<property>
 
 
--------
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://192.168.199.123:3307/hive?createDatabaseIfNotExist=true</value>
    <description>JDBC connect string for a JDBC metastore</description>
  </property>
 
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>pgm</value>
    <description>Username to use against metastore database</description>
  </property>
 
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>pgmfetion</value>
    <description>password to use against metastore database</description>
  </property>
 
  <property>
  <name>datanucleus.autoCreateTables</name>
  <value>True</value>
  </property>

3.2 创建hive库并授权

#创建hive库,要拉丁字符集
mysql> create database hive character set latin1;
mysql> show create database hive;
+----------+-----------------------------------------------------------------+
| Database | Create Database                                                 |
+----------+-----------------------------------------------------------------+
| hive     | CREATE DATABASE `hive` /*!40100 DEFAULT CHARACTER SET latin1 */ |
+----------+-----------------------------------------------------------------+
mysql> grant all on hive.* to 'pgm'@'192.168.199.%';
mysql>flush privileges;

3.3 登陆hive并测试

hadoop@namenode conf]$ hive
 
Logging initialized using configuration in file:/home/hadoop/hive/conf/hive-log4j.properties
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/hadoop/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hadoop/hive/lib/hive-jdbc-0.14.0-standalone.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
hive> 
    > 
> 
#创建test表
    > create table test(id int, name string ) row format delimited FIELDS TERMINATED BY ',';
OK
Time taken: 1.023 seconds
#加载数据测试
[hadoop@namenode rcs]$ cat test.txt 
123,test
 
hive> load data local inpath 'test.txt' overwrite into table test;     
Loading data to table default.test
Table default.test stats: [numFiles=1, numRows=0, totalSize=9, rawDataSize=0]
OK
Time taken: 1.942 seconds
hive> select * from test;
OK
123     test
Time taken: 0.583 seconds, Fetched: 1 row(s)
#查看HDFS文件系统
[hadoop@namenode rcs]$ hadoop dfs -lsr /

3.4 hive数据库中自动创建的元数据表预览

mysql> use hive;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
 
Database changed
mysql> show tables;
+---------------------------+
| Tables_in_hive            |
+---------------------------+
| BUCKETING_COLS            |
| CDS                       |
| COLUMNS_V2                |
| DATABASE_PARAMS           |
| DBS                       |
| FUNCS                     |
| FUNC_RU                   |
| GLOBAL_PRIVS              |
| IDXS                      |
| INDEX_PARAMS              |
| PARTITIONS                |
| PARTITION_KEYS            |
| PARTITION_KEY_VALS        |
| PARTITION_PARAMS          |
| PART_COL_PRIVS            |
| PART_COL_STATS            |
| PART_PRIVS                |
| ROLES                     |
| SDS                       |
| SD_PARAMS                 |
| SEQUENCE_TABLE            |
| SERDES                    |
| SERDE_PARAMS              |
| SKEWED_COL_NAMES          |
| SKEWED_COL_VALUE_LOC_MAP  |
| SKEWED_STRING_LIST        |
| SKEWED_STRING_LIST_VALUES |
| SKEWED_VALUES             |
| SORT_COLS                 |
| TABLE_PARAMS              |
| TAB_COL_STATS             |
| TBLS                      |
| TBL_COL_PRIVS             |
| TBL_PRIVS                 |
| VERSION                   |
+---------------------------+
35 rows in set (0.01 sec)


发表评论

登录 后发表评论.