作业电影评分系统 HIVE实战 正则表达式(限于string)解决了HIVE 源文件多个分隔符的问题
2016-12-02 21:46
471 查看
root@master:/usr/local/hadoop-2.6.0/sbin# start-dfs.sh
Starting namenodes on [master]
master: starting namenode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-namenode-master.out
worker1: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker1.out
worker6: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker6.out
worker4: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker4.out
worker7: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker7.out
worker3: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker3.out
worker5: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker5.out
worker8: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker8.out
worker2: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker2.out
Starting secondary namenodes [0.0.0.0]
root@master:/usr/local/hadoop-2.6.0/sbin# start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-resourcemanager-master.out
worker4: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker4.out
worker7: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker7.out
worker8: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker8.out
worker2: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker2.out
worker3: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker3.out
worker5: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker5.out
worker6: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker6.out
worker1: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker1.out
电影表
^(.*)::(.*)::(.*)$
说明: .* 表示任意字符,()分组
用户表
^(.*)::(.*)::(.*)::(.*)::(.*)$
评分表
^(.*)::(.*)::(.*)::(.*)$
建立电影表
//CREATE TABLE movies(MovieID BigInt, Title String, Genres String) ROW FORMAT DELIMITED FIELDS TERMINATED BY '::';
//LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/movies.dat' INTO TABLE movies;
//CREATE TABLE movies(MovieID BigInt, Title String, Genres String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)$' , 'output.format.string' = '%1$s%2$s%3$s') stored as textfile;
//ok
CREATE TABLE movies(MovieID String, Title String, Genres String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)$' , 'output.format.string' = '%1$s%2$s%3$s') stored as textfile;
LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/movies.dat' INTO TABLE movies;
//ok
CREATE TABLE users(UserID String, Gender String, Age String, Occupation String, Zipcode String) PARTITIONED BY (dt String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)::(.*)::(.*)$'
, 'output.format.string' = '%1$s%2$s%3$s%4$s%5$s') stored as textfile;
LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/users.dat' INTO TABLE users PARTITION(dt='20161101');
CREATE TABLE ratings(UserID String, MovieID String, Rating String, Timestamped String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)::(.*)$' , 'output.format.string' = '%1$s%2$s%3$s%4$s')
stored as textfile;
LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/ratings.dat' INTO TABLE ratings;
SELECT users.UserID, users.Age, users.Gender FROM ratings JOIN users ON (ratings.UserID = users.UserID) WHERE ratings.MovieID = 2916 ;
SELECT /*+MAP JOIN(MOVIES)*/ users.UserID, users.Age, users.Gender,movies.title,movies.gendre FROM ratings JOIN users ON (ratings.UserID = users.UserID) JOIN movies ON (rating.movie.id = movies .movid) WHERE ratins.MovieID = 2916 ;
Starting namenodes on [master]
master: starting namenode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-namenode-master.out
worker1: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker1.out
worker6: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker6.out
worker4: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker4.out
worker7: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker7.out
worker3: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker3.out
worker5: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker5.out
worker8: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker8.out
worker2: starting datanode, logging to /usr/local/hadoop-2.6.0/logs/hadoop-root-datanode-worker2.out
Starting secondary namenodes [0.0.0.0]
root@master:/usr/local/hadoop-2.6.0/sbin# start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-resourcemanager-master.out
worker4: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker4.out
worker7: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker7.out
worker8: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker8.out
worker2: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker2.out
worker3: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker3.out
worker5: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker5.out
worker6: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker6.out
worker1: starting nodemanager, logging to /usr/local/hadoop-2.6.0/logs/yarn-root-nodemanager-worker1.out
电影表
^(.*)::(.*)::(.*)$
说明: .* 表示任意字符,()分组
用户表
^(.*)::(.*)::(.*)::(.*)::(.*)$
评分表
^(.*)::(.*)::(.*)::(.*)$
建立电影表
//CREATE TABLE movies(MovieID BigInt, Title String, Genres String) ROW FORMAT DELIMITED FIELDS TERMINATED BY '::';
//LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/movies.dat' INTO TABLE movies;
//CREATE TABLE movies(MovieID BigInt, Title String, Genres String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)$' , 'output.format.string' = '%1$s%2$s%3$s') stored as textfile;
//ok
CREATE TABLE movies(MovieID String, Title String, Genres String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)$' , 'output.format.string' = '%1$s%2$s%3$s') stored as textfile;
LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/movies.dat' INTO TABLE movies;
//ok
CREATE TABLE users(UserID String, Gender String, Age String, Occupation String, Zipcode String) PARTITIONED BY (dt String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)::(.*)::(.*)$'
, 'output.format.string' = '%1$s%2$s%3$s%4$s%5$s') stored as textfile;
LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/users.dat' INTO TABLE users PARTITION(dt='20161101');
CREATE TABLE ratings(UserID String, MovieID String, Rating String, Timestamped String) ROW FORMAT serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' with serdeproperties ('input.regex' = '^(.*)::(.*)::(.*)::(.*)$' , 'output.format.string' = '%1$s%2$s%3$s%4$s')
stored as textfile;
LOAD DATA LOCAL INPATH '/usr/local/IMF_testdata/movie20161202/ratings.dat' INTO TABLE ratings;
SELECT users.UserID, users.Age, users.Gender FROM ratings JOIN users ON (ratings.UserID = users.UserID) WHERE ratings.MovieID = 2916 ;
SELECT /*+MAP JOIN(MOVIES)*/ users.UserID, users.Age, users.Gender,movies.title,movies.gendre FROM ratings JOIN users ON (ratings.UserID = users.UserID) JOIN movies ON (rating.movie.id = movies .movid) WHERE ratins.MovieID = 2916 ;
相关文章推荐
- UNIX系统平台下文件上传慢问题解决
- 解决Window系统默认打开文件操作的错误绑定的问题
- 解决Vista进入桌面前“文件缺失”提示不能进入系统的问题
- 解决系统hal.dll文件丢失问题
- 解决IIS cpu100%的一个问题,有瑕疵的正则表达式可能会引起.net的死循环
- 文件系统不同步问题resource is out of sync with the file system的解决办法
- 巧夺天工:采用正则表达式解决树匹配问题
- UNIX系统平台下文件上传慢问题解决
- 解决linux下挂载windows的FAT32格式文件系统乱码问题
- Ocfs2文件系统常见问题解决方法
- 解决 Ubuntu 文件系统(磁盘)强制检查的问题
- windows2003系统的iis不能下载exe文件问题的解决
- 解决windows下文件在unix系统下出现^M的问题 及 Vi高级命令
- 解决“GStreamer 遇到了常规流错误“& “播放电影文件时显示的文件名是乱码”问题
- 修改系统文件后解决系统自动恢复的问题
- 在次记一下日期验证的正则表达式,已解决闰月问题
- 巧用多个正则表达式解决取反替换问题(解决不匹配则替换问题)——用sed和perl的正则表达式
- 【Vegas原创】用正则表达式解决FCKEditor图片路径问题
- 待解决的问题--用DOS命令删除远程主机系统文件的方法
- 如何解决电影文件.avi一播放就出错的问题