博客 Hive的表操作1

Hive的表操作1

数栈君发表于 2023-07-07 11:12 454 0

一、Hive表操作1-内部表和外部表
1、内部表是私有表，一旦给表加载数据之后，内部表认为这份数据就是他独占的，表一旦删除，表数据文件会跟着全部删除，如果在应用中，数据是部门内部的，或者个人的，则表可以设置为内部表，不会对其他人造成影响。

2、内部表创建语法： create table 表

3、外部表是公有表，一旦给表加载数据之后，外部表认为这份数据大家的，表一旦删除，表数据文件不会删除，只删除表和文件之间的映射关系，如果在应用中，数据是各部门共享，则可以设置为外部表，你的表只是对文件有访问权。

4、外部表创建语法： create external table 表

-- 1、创建外部表
create external table teacher
(
tid string,
tname string
) row format delimited fields terminated by '\t';

create external table student
(
sid string,
sname string,
sbirth string,
ssex string
) row format delimited fields terminated by '\t';

-- 加载数据
load data local inpath '/export/data/hivedatas/student.txt' into table student;
load data local inpath '/export/data/hivedatas/teacher.txt' into table teacher;

select * from student;
select * from teacher;

-- 删除表,只删除元数据，不会删除表数据
drop table teacher;

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
外部表可以实现共享一份数据

-- 模拟多张表共享一份数据
drop table covid1;
create external table covid1(
date_val string,
country string,
state string,
code string,
cases int,
deaths int
)
row format delimited fields terminated by ','
location '/input/covid';
select * from covid1;

create external table covid2(
date_val string,
country string,
state string,
code string,
cases int,
deaths int
)
row format delimited fields terminated by ','
location '/input/covid';
select * from covid2;

-- 删除covid1
drop table covid1;
select * from covid2;

-- 删除covid2
drop table covid2;
select * from covid2;

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
二、Hive的复杂类型
1.array类型
-- 1、准备数据
zhangsan beijing,shanghai,tianjin,hangzhou
wangwu changchun,chengdu,wuhan,beijing

-- 2、创建表
create external table hive_array
(
name string,
work_locations array<string>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';

-- 3、加载数据
load data local inpath '/export/data/hivedatas/work_locations.txt' into table hive_array;

select * from hive_array;

-- 4、查询数据
-- 查询所有数据
select * from hive_array;
-- 查询work_locations数组中第一个元素
select name, work_locations[0] location from hive_array;
-- 查询location数组中元素的个数
select name, size(work_locations) location_size from hive_array;
-- 查询location数组中包含tianjin的信息
select * from hive_array where array_contains(work_locations,'tianjin');

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
2.map类型
-- 1、准备数据

1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26

-- 2、建表
create table hive_map
(
id int,
name string,
members map<string,>,
age int
)
row format delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';

-- 3、加载数据

load data local inpath '/export/data/hivedatas/hive_map.txt' into table hive_map;
select * from hive_map;

-- 4、查询操作
select * from hive_map;
-- 根据键找对应的值
select id, name, members['father'] father, members['mother'] mother, age from hive_map;

-- 获取所有的键
select id, name, map_keys(members) as relation from hive_map;

-- 获取所有的值
select id, name, map_values(members) as relation from hive_map;

-- 获取键值对个数
select id,name,size(members) num from hive_map;

-- 获取有指定key的数据
-- 判断亲属关系中哪一个包含brother
select * from hive_map where array_contains(map_keys(members), 'brother');

-- 查找包含brother这个键的数据，并获取brother键对应的值
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
3.struct类型
class 类名{
String name,
int age ,
double score
}

-- 1、准备数据
192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70

-- 2、创建表
create table hive_struct(
ip string,
info struct<name:string, age:int>
)
row format delimited fields terminated by '#'
collection items terminated by ':';

-- 3、给表加载数据

load data local inpath '/export/data/hivedatas/hive_struct.txt' into table hive_struct;

select * from hive_struct;

-- 4、查询表数据
select ip,info.name,info.age from hive_struct;
select ip,info from hive_struct;

免责申明：

本文系转载，版权归原作者所有，如若侵权请联系我们进行删除！

《数据治理行业实践白皮书》下载地址：https://fs80.cn/4w2atu

《数栈V6.0产品白皮书》下载地址：https://fs80.cn/cw0iw1

想了解或咨询更多有关袋鼠云大数据产品、行业解决方案、客户案例的朋友，浏览袋鼠云官网：https://www.dtstack.com/?src=bbs

同时，欢迎对大数据开源项目有兴趣的同学加入「袋鼠云开源框架钉钉技术群」，交流最新开源技术信息，群号码：30537511，项目地址：https://github.com/DTStack

常用分布式数据库数据治理大数据数字化转型数据开发大数据开发数据库数据处理数字化治理 hive

0条评论

上一篇：Hive的表操作2

下一篇：Yarn概述

我要提问

分享经验

社区公告

大数据领域最专业的产品&技术交流社区，专注于探讨与分享大数据领域有趣又火热的信息，专业又专注的数据人园地

最新活动更多

Hive的表操作1

我要提问

分享经验

微信扫码获取数字化转型资料

钉钉扫码加入技术交流群