SQL
2020-04-24 本文已影响0人
渡猫
碎片文件过多
sqly语句最后加上
distribute by ss_sold_date_sk, cast(rand() * 5 as int);
解析json文件并处理换行符
regexp_replace(get_json_object(json_content, '$.body'), '[\\n\\r]', '') as content
避免mapreduce
set hive.exce.mode.local.auto = true;
避免float错误
0.2对于float 类型是0.2000001,而对于double类型是0.200000000001
>cast(0.2 as float)
扫描一次数据
FROM history
INSERT OVERWRITE sales SELECT * WHERE action='purchased'
INSERT OVERWRITE credits SELECT * WHERE action='returned';
presto与hive的行列互转
concat_ws(',', collect_set(order_id))
array_join(array_distinct(array_agg(order_id)), ',')
lateral view explode(split(order_ids, ',')) b as order_id
cross join unnest(split(order_ids, ',')) as b(order_id)
插入总行数
sum(1) over()