hive的练习

2017-10-20  本文已影响10人  薛落花随泪绽放
#cd
#vi relations
Zhangsan Wangwu 01:01:01
Zhangsan Zhaoliu 00:11:21
Zhangsan Yuqi 00:19:01
Zhangsan Jingba 00:21:01
Zhangsan Wuxi 01:31:17
Wangwu Zhaoliu 00:51:01
Wangwu Zhaoliu 01:11:19
Wangwu Yuqi 00:00:21
Wangwu Yuqi 00:37:21
Yuqi Zhaoliu 00:23:01
Yuqi Wuxi 00:18:00
Jingba Wangwu 00:01:01
Jingba Wangwu 00:00:06
Jingba Wangwu 00:02:03
Jingba Wangwu 00:02:54
Wangwu Yuqi 01:00:19
#hadoop fs -put relations /
#vi test_mapper.py
import sys
for line in sys.stdin:
        line = line.strip()
        fromstr,tostr,time =line.split('\t')
        hours,minutes,secondes = time.split(':')
        newtime = int(hours)*60*60+int(minutes)*60+int(secondes)
        if cmp(fromstr,tostr) == -1:
          #print(cmp(fromstr,tostr))
          fromstr,tostr = tostr,fromstr
        print ' ' .join([fromstr+tostr,str(newtime)])

#vi test.hive
CREATE TABLE relations_new (
fromtostr STRING,
   duration INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ' ';

add FILE test_mapper.py;

INSERT OVERWRITE TABLE relations_new
SELECT
  TRANSFORM (fromstr,tostr,duration)
  USING 'python test_mapper.py'
  AS (fromtostr,duration)
FROM relations;
#hive
create table relations (
fromstr string,
tostr string,
duration string)
row format delimited fields terminated by ' ';

load data inpath '/relations' overwrite into table relations;
exit;
#hive -f test.hive
#hive

select 
    fromtostr,
    duration,
    rank() over(order by duration desc) rank,
dense_rank() over(order by duration) dense_rank,
row_number() over(order by duration) row_number
from relations_new;

OK
WangwuJingba    6   1   1   1
YuqiWangwu  21  2   2   2
WangwuJingba    61  3   3   3
WangwuJingba    123 4   4   4
WangwuJingba    174 5   5   5
ZhaoliuZhangsan 681 6   6   6
YuqiWuxi    1080    7   7   7
YuqiWangwu  1109    8   8   8
ZhangsanYuqi    1141    9   9   9
ZhangsanJingba  1261    10  10  10
ZhaoliuYuqi 1381    11  11  11
YuqiWangwu  2241    12  12  12
ZhaoliuWangwu   3061    13  13  13
YuqiWangwu  3619    14  14  14
ZhangsanWangwu  3661    15  15  15
ZhaoliuWangwu   4279    16  16  16
ZhangsanWangwu  4713    17  17  17
ZhangsanWuxi    5477    18  18  18
Time taken: 27.345 seconds, Fetched: 18 row(s)



上一篇 下一篇

猜你喜欢

热点阅读