您的位置:首页 > 其它

Hive的HQL的基本操作

2016-05-13 14:35 302 查看
HQL的基本命令(全):

1、查看数据库

show databases; //查看已经存在的数据库

describe database test; //查看某个已经存在的数据库

2、创建数据库

create database test;

create database if not exists test;

create database test2 location '/user/hadoop/temp'; //制定数据库创建的目录

3、删除数据库

drop database if exists test1 cascade;

4、切换当前工作的数据库

use test2;

5、查看数据库中的表

show tables; //查看当前工作的数据库中的表

show tables in test3; //查看数据库test3中的表

6、创建表

create table if not exists test.student(

name string comment 'student name',

age int comment 'student age',

course array<string>,

body map<string,float>,

address struct<street:string,city:string,state:string>

)

comment 'the info of student' //表的备注

row format delimited fields terminated by '\001' //指定列分隔符’\001’

collection items terminated by '\002' //指定集合元素间的分隔符’\002’

map keys terminated by '\003' //指定类型为MAP字段的键值对分割符

lines terminated by '\n' //指定行分割符

stored as textfile //存储的文件格式

location '/user/hive/warehouse/test.db/student';

create table if not exists test.student(

id string comment 'student id',

name string comment 'student name',

sex string comment 'student sex',

age int comment 'student age'

)

comment 'the info of student'

row format delimited fields terminated by '\t'

lines terminated by '\n'

stored as textfile;

create table if not exists student2 like student; //复制已存在的表结构创建表(不复制数据)

Hive中建表默认为管理表(managed table),当表需要被其它工具分析时,需要建立外部表(exter nal table)

create exter nal table -- //建立外部表

建立分区表

create table student_info(

id string,

name string,

age int)

partitioned by (province string,city string);

7、查看表结构信息以及列的注释

desc student;

8、查看表的详细信息

desc formatted student;

9、查看分区表的详细信息

describe extended student_info;

10、删除表

drop table student;

drop table if exists student;

11、表重命名

alter table student rename to student_1;

12、增加、修改、删除分区

alter table student_info add partition (province='fujain',city='fuzhou') location

'/user/hive/warehouse/test/fujian/fuzhou'; //表必须为分区表,且province、city为分区字段

alter table student_info drop partition(province='fujian',city='fuzhou');

13、修改列信息

alter table student_1

change column age sage int

comment 'the student age'

after name;

14、增加列

alter table student_1 add columns (new_col int);

15、装载数据

load data local inpath '/home/hadoop/student_1' into table student; //
从本地加载数据

load data inpath '/home/hadoop/student_1' into table student; //
从HDFS上加载数据

16、通过查询语句向表中插入语句:

insert overwrite table student2 select * from student;

17、导出数据:

insert overwrite directory '/hive_tmp' select * from student2;

insert overwrite local directory '/home/hadoop/hive_tmp' select * from student2;

18、简单查询语句:

select id,name,age from student;

select id,name,age from student limit 3;

19、在查询语句中对表数据进行简单处理:

select id,name,

case

when age ='10' then '10岁'

when age='12' then '12岁'

else age

end

from student;

20、查询语句中添加条件:

select id,name,age from student where age >13;

select id,name,age from student where age >13 or id <1002;

21、Group by 语句:

select age,count(*) from student group by age;

select age,count(*) from student group by age having age>12;

24、Join 语句

select t1.id,t2.id from student t1 join score t2 on t1.id = t2.id; //内连接

select t1.id,t2.id from student t1 left outer join score t2 on t1.id = t2.id; //左连接

select t1.id,t2.id from student t1 right outer join score t2 on t1.id = t2.id; //右连接

select t1.id,t2.id from student t1 full outer join score t2 on t1.id = t2.id; //全外连接

select t1.id,t2.id from student t1 left semi join score t2 on t1.id = t2.id; //左半连接

select /*+mapjoin(t1)*/ t1.id,t2.id from student t1 join score t2 on t1.id = t2.id;//map端连接

25、排序语句:

select id,name,sex,age from student order by age asc,id desc;

26、分桶和抽样:

select * from student tablesample(bucket 1 out of 3 on id);

select * from student tablesample(bucket 1 out of 3 on rand());

27、Union All

select t.id

from(

select t1.id from student t1

union all

select t2.id from score t2) t;
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: