1 Star 2 Fork 1

小明/gmalll

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
DWS.sql 17.45 KB
一键复制 编辑 原始数据 按行查看 历史
--列式存储,snappy压缩
--命名规范 dws_数据域_统计粒度_业务过程_统计周期(1d/nd/td)
--1d表示近1日,nd表示最近n日,td表示历史至今
--用于将DIM,DWD的数据进行提前统计,将统计结果保存到当前表中
--所以当前表不是最终统计结果表
--数据量就可能有点多,表中应该添加分区
--当前表需要进一步的聚合处理,所以表中应该是列式存储,并且采用snappy压缩
--表的分类
--1d:1天的数据统计
--数据来源为DIM,DWD
--nd:N天的数据统计
-- 数据来源必须为1d表
--td:所有的数据统计
-- 数据来源可以为1d也可以是nd
--表的设计
--参考ADS层设计
--指标体系:原子指标,派生指标,衍生指标
--原子指标(拆分指标)
--行为,统计字段,统计逻辑
--派生指标(增加条件)
--统计周期(范围)+业务限定(筛选条件)+统计粒度(分组维度)
--衍生指标(比例关系)
--交易域用户商品粒度订单最近1日汇总表
--交易域
--商品用户粒度
--user+sku
--订单
--下单:
--最近一日
--数据范围
DROP TABLE IF EXISTS dws_trade_user_sku_order_1d;
CREATE EXTERNAL TABLE dws_trade_user_sku_order_1d
(
`user_id` STRING COMMENT '用户ID',
`sku_id` STRING COMMENT 'SKU_ID',
`sku_name` STRING COMMENT 'SKU名称',
`category1_id` STRING COMMENT '一级品类ID',
`category1_name` STRING COMMENT '一级品类名称',
`category2_id` STRING COMMENT '二级品类ID',
`category2_name` STRING COMMENT '二级品类名称',
`category3_id` STRING COMMENT '三级品类ID',
`category3_name` STRING COMMENT '三级品类名称',
`tm_id` STRING COMMENT '品牌ID',
`tm_name` STRING COMMENT '品牌名称',
`order_count_1d` BIGINT COMMENT '最近1日下单次数',
`order_num_1d` BIGINT COMMENT '最近1日下单件数',
`order_original_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
`activity_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日活动优惠金额',
`coupon_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日优惠券优惠金额',
`order_total_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
) COMMENT '交易域用户商品粒度订单最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_sku_order_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--数据装载
-- --首日包含历史数据
select count(*)
from ods_log_inc;
set hive.exec.dynamic.partition.mode=nonstrict;
-- Hive的bug:对某些类型数据的处理可能会导致报错,关闭矢量化查询优化解决
set hive.vectorized.execution.enabled = false;
insert overwrite table dws_trade_user_sku_order_1d partition (dt)
select
`user_id` ,-- STRING COMMENT '用户ID',
`sku_id` ,-- STRING COMMENT 'SKU_ID',
`sku_name` ,-- STRING COMMENT 'SKU名称',
`category1_id` ,-- STRING COMMENT '一级品类ID',
`category1_name` ,-- STRING COMMENT '一级品类名称',
`category2_id` ,-- STRING COMMENT '二级品类ID',
`category2_name` ,-- STRING COMMENT '二级品类名称',
`category3_id` ,-- STRING COMMENT '三级品类ID',
`category3_name` ,-- STRING COMMENT '三级品类名称',
`tm_id` ,-- STRING COMMENT '品牌ID',
`tm_name` ,-- STRING COMMENT '品牌名称',
count(distinct order_id) `order_count_1d` ,-- BIGINT COMMENT '最近1日下单次数',
sum(sku_num) `order_num_1d` ,-- BIGINT COMMENT '最近1日下单件数',
sum(split_original_amount)`order_original_amount_1d` ,-- DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
sum(split_activity_amount)`activity_reduce_amount_1d`,-- DECIMAL(16, 2) COMMENT '最近1日活动优惠金额',
sum(split_coupon_amount)`coupon_reduce_amount_1d` ,-- DECIMAL(16, 2) COMMENT '最近1日优惠券优惠金额',
sum(split_total_amount)`order_total_amount_1d` , -- DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
dt
from (
select
user_id,
sku_id,
order_id,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
dt
from dwd_trade_order_detail_inc
)od left join (
select
`sku_name` ,-- STRING COMMENT 'SKU名称',
`category1_id` ,-- STRING COMMENT '一级品类ID',
`category1_name` ,-- STRING COMMENT '一级品类名称',
`category2_id` ,-- STRING COMMENT '二级品类ID',
`category2_name` ,-- STRING COMMENT '二级品类名称',
`category3_id` ,-- STRING COMMENT '三级品类ID',
`category3_name` ,-- STRING COMMENT '三级品类名称',
`tm_id` ,-- STRING COMMENT '品牌ID',
`tm_name` -- STRING COMMENT '品牌名称',
from dim_sku_full
where dt="2022-06-08"
)tm on od.sku_id=tm.tm_id
group by `user_id`, `sku_id`, `sku_name`, `category1_id`, `category1_name`, `category2_id`, `category2_name`, `category3_id`, `category3_name`, `tm_id`, `tm_name`,dt;
--每日,不包含历史数据
insert overwrite table dws_trade_user_sku_order_1d partition (dt="2022-06-09")
select
`user_id` ,-- STRING COMMENT '用户ID',
`sku_id` ,-- STRING COMMENT 'SKU_ID',
`sku_name` ,-- STRING COMMENT 'SKU名称',
`category1_id` ,-- STRING COMMENT '一级品类ID',
`category1_name` ,-- STRING COMMENT '一级品类名称',
`category2_id` ,-- STRING COMMENT '二级品类ID',
`category2_name` ,-- STRING COMMENT '二级品类名称',
`category3_id` ,-- STRING COMMENT '三级品类ID',
`category3_name` ,-- STRING COMMENT '三级品类名称',
`tm_id` ,-- STRING COMMENT '品牌ID',
`tm_name` ,-- STRING COMMENT '品牌名称',
count(distinct order_id) `order_count_1d` ,-- BIGINT COMMENT '最近1日下单次数',
sum(sku_num) `order_num_1d` ,-- BIGINT COMMENT '最近1日下单件数',
sum(split_original_amount)`order_original_amount_1d` ,-- DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
sum(split_activity_amount)`activity_reduce_amount_1d`,-- DECIMAL(16, 2) COMMENT '最近1日活动优惠金额',
sum(split_coupon_amount)`coupon_reduce_amount_1d` ,-- DECIMAL(16, 2) COMMENT '最近1日优惠券优惠金额',
sum(split_total_amount)`order_total_amount_1d` -- DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
from (
select
user_id,
sku_id,
order_id,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
dt
from dwd_trade_order_detail_inc
where dt="2022-06-09"
)od left join (
select
`sku_name` ,-- STRING COMMENT 'SKU名称',
`category1_id` ,-- STRING COMMENT '一级品类ID',
`category1_name` ,-- STRING COMMENT '一级品类名称',
`category2_id` ,-- STRING COMMENT '二级品类ID',
`category2_name` ,-- STRING COMMENT '二级品类名称',
`category3_id` ,-- STRING COMMENT '三级品类ID',
`category3_name` ,-- STRING COMMENT '三级品类名称',
`tm_id` ,-- STRING COMMENT '品牌ID',
`tm_name` -- STRING COMMENT '品牌名称',
from dim_sku_full
where dt="2022-06-09"
)tm on od.sku_id=tm.tm_id
group by `user_id`, `sku_id`, `sku_name`, `category1_id`, `category1_name`, `category2_id`, `category2_name`, `category3_id`, `category3_name`, `tm_id`, `tm_name`;
--用户订单近n日汇总表
--设计:参考1d表
--数据来源1d表
DROP TABLE IF EXISTS dws_trade_user_sku_order_30d;
CREATE EXTERNAL TABLE dws_trade_user_sku_order_30d
(
`user_id` STRING COMMENT '用户ID',
`sku_id` STRING COMMENT 'SKU_ID',
`sku_name` STRING COMMENT 'SKU名称',
`category1_id` STRING COMMENT '一级品类ID',
`category1_name` STRING COMMENT '一级品类名称',
`category2_id` STRING COMMENT '二级品类ID',
`category2_name` STRING COMMENT '二级品类名称',
`category3_id` STRING COMMENT '三级品类ID',
`category3_name` STRING COMMENT '三级品类名称',
`tm_id` STRING COMMENT '品牌ID',
`tm_name` STRING COMMENT '品牌名称',
`order_count_7d` BIGINT COMMENT '最近7日下单次数',
`order_num_7d` BIGINT COMMENT '最近7日下单件数',
`order_original_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单原始金额',
`activity_reduce_amount_7d` DECIMAL(16, 2) COMMENT '最近7日活动优惠金额',
`coupon_reduce_amount_7d` DECIMAL(16, 2) COMMENT '最近7日优惠券优惠金额',
`order_total_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单最终金额',
`order_count_30d` BIGINT COMMENT '最近30日下单次数',
`order_num_30d` BIGINT COMMENT '最近30日下单件数',
`order_original_amount_30d` DECIMAL(16, 2) COMMENT '最近30日下单原始金额',
`activity_reduce_amount_30d`DECIMAL(16, 2) COMMENT '最近30日活动优惠金额',
`coupon_reduce_amount_30d` DECIMAL(16, 2) COMMENT '最近30日优惠券优惠金额',
`order_total_amount_30d` DECIMAL(16, 2) COMMENT '最近30天下单最终金额'
) COMMENT '交易域用户商品粒度订单最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_sku_order_30d'
TBLPROPERTIES ('orc.compress' = 'snappy');
insert overwrite table dws_trade_user_sku_order_30d partition (dt="2022-06-08")
select
`user_id` ,
`sku_id` ,
`sku_name` ,
`category1_id` ,
`category1_name` ,
`category2_id` ,
`category2_name` ,
`category3_id` ,
`category3_name` ,
`tm_id` ,
`tm_name` ,
sum(if (dt>= date_sub("2022-06-08",6),order_count_1d,0) ),
sum(if (dt>= date_sub("2022-06-08",6),order_num_1d,0)),
sum(if (dt>= date_sub("2022-06-08",6),order_original_amount_1d,0)),
sum(if (dt>= date_sub("2022-06-08",6),activity_reduce_amount_1d,0)),
sum(if (dt>= date_sub("2022-06-08",6),coupon_reduce_amount_1d,0)),
sum(if (dt>= date_sub("2022-06-08",6),coupon_reduce_amount_1d,0)),
sum(order_count_1d),
sum(order_num_1d),
sum(order_original_amount_1d),
sum(activity_reduce_amount_1d),
sum(coupon_reduce_amount_1d),
sum(coupon_reduce_amount_1d)
from dws_trade_user_sku_order_1d
where dt>=date_sub("2022-06-08",29) and dt<="2022-06-08"
group by `user_id`, `sku_id`, `sku_name`, `category1_id`, `category1_name`, `category2_id`, `category2_name`, `category3_id`, `category3_name`, `tm_id`, `tm_name` ;
--1d 交易域用户粒度订单最近1日汇总表
--交易域
--用户粒度
--user
--订单
--下单
--最近1日汇总表
DROP TABLE IF EXISTS dws_trade_user_order_1d;
CREATE EXTERNAL TABLE dws_trade_user_order_1d
(
`user_id` STRING COMMENT '用户ID',
`order_count_1d` BIGINT COMMENT '最近1日下单次数',
`order_num_1d` BIGINT COMMENT '最近1日下单商品件数',
`order_original_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
`activity_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额',
`coupon_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单优惠券优惠金额',
`order_total_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
) COMMENT '交易域用户粒度订单最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_order_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--首日数据装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_trade_user_order_1d partition(dt)
select
user_id,
count(distinct order_id),
sum(sku_num),
sum(split_original_amount),
sum(split_activity_amount),
sum(split_coupon_amount),
sum(split_total_amount),
dt
from dwd_trade_order_detail_inc
group by dt,user_id;
--每日数据装载
insert overwrite table dws_trade_user_order_1d partition(dt="2022-06-09")
select
user_id,
count(distinct order_id),
sum(sku_num),
sum(split_original_amount),
sum(split_activity_amount),
sum(split_coupon_amount),
sum(split_total_amount)
from dwd_trade_order_detail_inc
where dt="2022-06-09"
group by user_id;
--30d表
DROP TABLE IF EXISTS dws_trade_user_order_30d;
CREATE EXTERNAL TABLE dws_trade_user_order_30d
(
`user_id` STRING COMMENT '用户ID',
`order_count_30d` BIGINT COMMENT '最近1日下单次数',
`order_num_30d` BIGINT COMMENT '最近1日下单商品件数',
`order_original_amount_30d` DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
`activity_reduce_amount_30d` DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额',
`coupon_reduce_amount_30d` DECIMAL(16, 2) COMMENT '最近1日下单优惠券优惠金额',
`order_total_amount_30d` DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
) COMMENT '交易域用户粒度订单最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_order_30d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--交易域用户粒度支付最近1日汇总表
--订单
--交易行为
--建表语句
--未执行
DROP TABLE IF EXISTS dws_trade_user_order_td;
CREATE EXTERNAL TABLE dws_trade_user_order_td
(
`user_id` STRING COMMENT '用户ID',
`order_date_first` STRING COMMENT '历史至今首次下单日期',
`order_date_last` STRING COMMENT '历史至今末次下单日期',
`order_count_td` BIGINT COMMENT '历史至今下单次数',
`order_num_td` BIGINT COMMENT '历史至今购买商品件数',
`original_amount_td` DECIMAL(16, 2) COMMENT '历史至今下单原始金额',
`activity_reduce_amount_td` DECIMAL(16, 2) COMMENT '历史至今下单活动优惠金额',
`coupon_reduce_amount_td` DECIMAL(16, 2) COMMENT '历史至今下单优惠券优惠金额',
`total_amount_td` DECIMAL(16, 2) COMMENT '历史至今下单最终金额'
) COMMENT '交易域用户粒度订单历史至今汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_order_td'
TBLPROPERTIES ('orc.compress' = 'snappy');
--数据装
insert overwrite table dws_trade_user_order_td partition (dt="2022-06-08")
select
user_id,
min(dt) order_date_first,
max(dt) order_date_last,
sum(order_count_1d),
sum(order_num_1d),
sum(order_original_amount_1d),
sum(activity_reduce_amount_1d),
sum(coupon_reduce_amount_1d),
sum(order_total_amount_1d)
from dws_trade_user_order_1d
group by user_id ;
--每日数据加载
--直比前一天的数据多了一天的数据,而前一天的数据已经统计过了
--获取昨天的统计结果+今天的新数据
insert overwrite table dws_trade_user_order_td partition(dt='2022-06-09')
select user_id,
min(order_date_first) as order_date_first,
max(order_date_last) as order_date_last,
sum(order_count_td) as order_count_td,
sum(order_num_td) as order_num_td,
sum(original_amount_td) as original_amount_td,
sum(activity_reduce_amount_td) as activity_reduce_amount_td,
sum(coupon_reduce_amount_td) as coupon_reduce_amount_td,
sum(total_amount_td) as total_amount_td
from (
select user_id,
order_date_first,
order_date_last,
order_count_td,
order_num_td,
original_amount_td,
activity_reduce_amount_td,
coupon_reduce_amount_td,
total_amount_td
from dws_trade_user_order_td
where dt = date_add('2022-06-09', -1)
union all
select user_id,
'2022-06-09' order_date_first,
'2022-06-09' order_date_last,
order_count_1d,
order_num_1d,
order_original_amount_1d,
activity_reduce_amount_1d,
coupon_reduce_amount_1d,
order_total_amount_1d
from dws_trade_user_order_1d
where dt = '2022-06-09') t1
group by user_id;
select count(*)
from dws_trade_user_order_td;
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
SQL
1
https://gitee.com/xiaoming12139/gmalll.git
git@gitee.com:xiaoming12139/gmalll.git
xiaoming12139
gmalll
gmalll
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385