Centos7安装Hive2.0.1集群
1、准备工作
1.1、安装jdk1.8和mysql5.7.21,略
1.2、安装Hadoop2.6.0,略
1.3、机器介绍
192.168.1.89 node1
192.168.1.149 node2
192.168.1.180 node3
node1、node2、node3上都已经安装了Hadoop2.6.0
2、下载并上传(三个节点都操作)
http://archive.apache.org/dist/hive/hive-2.0.1/apache-hive-2.0.1-bin.tar.gz
上传至三台机器的/data/server目录下,并解压:
tar -zxvf apache-hive-2.0.1-bin.tar.gz
3、修改配置文件(三个节点都操作)
cd apache-hive-2.0.1-bin/conf
cp hive-default.xml.template hive-site.xml
vi hive-site.xml,新增:
<configuration>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.1.166:3306/hive_db?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>1</value>
</property>
<property>
<name>hive.in.test</name>
<value>true</value>
</property>
<property>
<name>hive.server2.transport.mode</name>
<value>binary</value>
<description>
Expects one of [binary, http].
Transport mode of HiveServer2.
</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>机器IP</value>
<description>
Bind host on which to run the HiveServer2 Thrift interface.Can
be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST
</description>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'.</description>
</property>
</configuration>
4、修改环境变量(三个节点都操作)
vim /etc/profile
export HIVE_HOME=/data/server/apache-hive-2.0.1-bin
export HIVE_CONF_DIR=$HIVE_HOME/conf
export PATH=$HIVE_HOME/bin:$PATH
激活配置:source /etc/profile
5、上传mysql驱动(三个节点都操作)
cd /data/server/apache-hive-2.0.1-bin/lib/
上传mysql-connector-java-5.1.30.jar
6、测试是否成功(三个节点都操作)
(1) hive //登录hive
(2) show databases; //显示hive表名
7、后台启动服务(三个节点都操作)
//不需要启动 nohup hive --service metastore &
nohup hive --service hiveserver2 &
8、简单使用1(node1上操作,不可修改或删除行数据)
使用beeline命令打开客户端,使用!connect jdbc:hive2://node1:10000/default连接上hive默认库并回车
8.1、创建库
create database test_db;
8.2、创建表
use test_db;
CREATE TABLE t_test1 (a int, b int, c int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
关键字释义:
ROW FORMAT DELIMITED 是指明后面的关键词是列和元素分隔符的
FIELDS TERMINATED BY 是字段分隔符,
8.3、导入数据
vi /data/server/apache-hive-2.0.1-bin/t_test1.txt,字段之间以tab隔开:
16 2 3
61 12 13
41 2 31
17 21 3
71 2 31
1 12 34
11 2 34
导入脚本:
LOAD DATA LOCAL INPATH '/data/server/apache-hive-2.0.1-bin/t_test1.txt' OVERWRITE INTO TABLE t_test1;
8.4、新增数据
insert into t_test1(a,b,c) values(12,13,14);
8.5、查看数据(可到其他节点查询数据是否同步,从而验证集群)
select * from t_test1;
8.6、查看表结构
desc t_test1;
8.7、删除表
drop table t_test1;
9、简单使用2(node1上操作,可修改或删除行数据)
使用beeline命令打开客户端,使用!connect jdbc:hive2://node1:10000/default连接上hive默认库并回车
9.1、创建表
use test_db;
create table t_test2(id int ,name string ) clustered by (id) into 2 buckets stored as orc TBLPROPERTIES('transactional'='true');
关键字释义:
clustered by (id) into 2 buckets 将表根据id分为2个桶
stored as orc 表存储文件的格式为orc
TBLPROPERTIES('transactional'='true')启用表的事务支持
9.2、导入数据
不支持文件导入
9.3、新增数据
insert into t_test2(id,name) values(1,'你好');
9.4、修改数据
update t_test2 set name = '你不好' where id=1;
9.5、查看数据(可到其他节点查询数据是否同步,从而验证集群)
select * from t_test2;
9.6、删除数据
delete from t_test2 where id=1;
9.7、查看表结构
desc t_test1;
9.8、删除表
drop table t_test1;
9.9、注意事项
如果在创建orc表时出现lock错误提示,可以添加Hive元数据(使用mysql存储)INSERT INTO NEXT_LOCK_ID VALUES(1);
10、java调用hive实例
10.1、导入maven依赖
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>2.1.0</version>
</dependency>
10.2、代码实现
package com.x.y.test;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class HiveJDBC {
private static String driverName="org.apache.hive.jdbc.HiveDriver";
private static String url = "jdbc:hive2://192.168.1.89:10000/test_db";
private static Connection conn = null;
private static Statement stmt = null;
private static ResultSet rs = null;
public static void main(String[] args) throws Exception {
Class.forName(driverName);
conn = DriverManager.getConnection(url, null, null);
stmt = conn.createStatement();
String sql = "select * from t_xxl";
System.out.println("Running: " + sql);
rs = stmt.executeQuery(sql);
System.out.println("a" + "\t" + "b" +"\t" + "c" );
while (rs.next()) {
System.out.println(rs.getInt(1) + "\t" + rs.getString(2)+ "\t"+ rs.getString(3));
}
if (rs != null) {
rs.close();
}
if (stmt != null) {
stmt.close();
}
if (conn != null) {
conn.close();
}
}
}