创建数据库
CREATE DATABASE hive;
desc database
在MySQL中查看元数据信息
mysql> select * from dbs\G;
********* 1. row *********
DB_ID: 1
DESC: Default Hive database
DB_LOCATION_URI: hdfs://localhost:9000/user/hive/warehouse
NAME: default
OWNER_NAME: public
OWNER_TYPE: ROLE
********* 2. row *********
DB_ID: 6
DESC: NULL
DB_LOCATION_URI: hdfs://localhost:9000/user/hive/warehouse/hive.db
NAME: hive
OWNER_NAME: hadoop
OWNER_TYPE: USER
********* 3. row *********
DB_ID: 7
DESC: NULL
DB_LOCATION_URI: hdfs://localhost:9000/d6_hive/test
NAME: hive2
OWNER_NAME: hadoop
OWNER_TYPE: USER
********* 4. row *********
DB_ID: 11
DESC: NULL
DB_LOCATION_URI: hdfs://localhost:9000/user/hive/warehouse/testdb.db
NAME: testdb
OWNER_NAME: hadoop
OWNER_TYPE: USER
********* 5. row *********
DB_ID: 12
DESC: NULL
DB_LOCATION_URI: hdfs://localhost:9000/user/hive/warehouse/d6_hive.db
NAME: d6_hive
OWNER_NAME: hadoop
OWNER_TYPE: USER
5 rows in set (0.00 sec)
ERROR:
No query specified
mysql>
修改hive参数的几种形式
1) set hive.metastore.warehouse.dir;
set key 取值
set key=value 设置值
局部 单session
2)hive-site.xml
全局
set xxxx=yyyy
....
set xxxx=zzzz
hive.db
create database hive; hive是db的名称.db
CREATE DATABASE hive2
location '/d6_hive/directory';
row format/ file format
两大分隔符:行与行 字段与字段之间的分隔符
列分隔符:\001
file格式:行式 列式
数值类型: int bigint float double DECIMAL
字符串:string
true/false 0/1
创建表
create table emp(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
加载数据
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename ;
LOCAL: 从本地(linux)加载数据
LOAD DATA LOCAL INPATH '/home/hadoop/data/emp.txt' OVERWRITE INTO TABLE emp ;
拷贝表结构
CREATE TABLE emp2
LIKE emp;
拷贝表结构和数据
create table emp3 as select * from emp;
查看表结构
desc formatted emp3; #推荐使用
desc extended emp3;
desc emp3;
修改表名
ALTER TABLE emp3 RENAME TO new_emp3;
删除表
drop table new_emp3;
内部表/外部表
官网对比
内部表 Table Type: MANAGED_TABLE
hive default: MANAGED_TABLE hive默认的是内部表 EXTERNAL
创建内部表
create table managed_emp(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
在mysql中
mysql> select * from tbls\G;
********* 1. row *********
TBL_ID: 1
CREATE_TIME: 1552944715
DB_ID: 1
LAST_ACCESS_TIME: 0
OWNER: hadoop
RETENTION: 0
SD_ID: 1
TBL_NAME: d6_wc
TBL_TYPE: MANAGED_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
********* 2. row *********
TBL_ID: 6
CREATE_TIME: 1552961328
DB_ID: 12
LAST_ACCESS_TIME: 0
OWNER: hadoop
RETENTION: 0
SD_ID: 6
TBL_NAME: emp
TBL_TYPE: MANAGED_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
********* 3. row *********
TBL_ID: 11
CREATE_TIME: 1553271846
DB_ID: 12
LAST_ACCESS_TIME: 0
OWNER: hadoop
RETENTION: 0
SD_ID: 11
TBL_NAME: emp2
TBL_TYPE: MANAGED_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
********* 4. row *********
TBL_ID: 14
CREATE_TIME: 1553274632
DB_ID: 12
LAST_ACCESS_TIME: 0
OWNER: hadoop
RETENTION: 0
SD_ID: 14
TBL_NAME: managed_emp
TBL_TYPE: MANAGED_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
4 rows in set (0.00 sec)
ERROR:
No query specified
mysql>
删除内部表:数据+元数据 删除 hive (d6_hive)> drop table managed_emp;
在MySQL中
mysql> select * from tbls\G;
********* 1. row *********
TBL_ID: 1
CREATE_TIME: 1552944715
DB_ID: 1
LAST_ACCESS_TIME: 0
OWNER: hadoop
RETENTION: 0
SD_ID: 1
TBL_NAME: d6_wc
TBL_TYPE: MANAGED_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
********* 2. row *********
TBL_ID: 6
CREATE_TIME: 1552961328
DB_ID: 12
LAST_ACCESS_TIME: 0
OWNER: hadoop
RETENTION: 0
SD_ID: 6
TBL_NAME: emp
TBL_TYPE: MANAGED_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
********* 3. row *********
TBL_ID: 11
CREATE_TIME: 1553271846
DB_ID: 12
LAST_ACCESS_TIME: 0
OWNER: hadoop
RETENTION: 0
SD_ID: 11
TBL_NAME: emp2
TBL_TYPE: MANAGED_TABLE
VIEW_EXPANDED_TEXT: NULL
VIEW_ORIGINAL_TEXT: NULL
3 rows in set (0.00 sec)
创建外部表
create EXTERNAL table emp_external(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
location '/d6_hive/external';
删除外部表:数据不删,元数据删
hive (d6_hive)> drop table emp_external;
在MySQL中发现元数据已经删除了
在hive-site.xml文件中添加如下
<!--在hive的cli打印当前库-->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!--在hive的cli打印当前表的字段名-->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
在hive的官网中的配置文件中有详细说明
内部表和外部的区别以及使用场景
未被external修饰的是内部表(managed table),被external 修饰的外部表(external table);
区别:
内部表数据由Hive自身管理,外部表数据由HDFS管理;
内部表数据存储的位置是hive.metastore.warehouse.dir(默认:/usr/hive/warehouse),外部表数据的存储位置由自己制定。
删除内部表会直接删除元数据(metadata)及存储数据;删除外部表仅仅会删除元数据,HDFS上的文件并不会被删除;
对内部表的修改会将修改直接同步给元数据,而对外部表的表结构和分区进行修改,则需要修复(MSCK REPAIR TABLE table_name;)
使用场景:其他部门共有的,可以保证数据安全