1 Star 2 Fork 1

小明/gmalll

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
DWS-1.sql 19.11 KB
一键复制 编辑 原始数据 按行查看 历史
小明 提交于 2024-06-02 18:59 . 优惠券使用统计
--交易域用户粒度加购最近1日汇总表
-- 建表语句
DROP TABLE IF EXISTS dws_trade_user_cart_add_1d;
CREATE EXTERNAL TABLE dws_trade_user_cart_add_1d
(
`user_id` STRING COMMENT '用户ID',
`cart_add_count_1d` BIGINT COMMENT '最近1日加购次数',
`cart_add_num_1d` BIGINT COMMENT '最近1日加购商品件数'
) COMMENT '交易域用户粒度加购最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_cart_add_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--1d表的首日包含历史数据
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.spark.client.server.connect.timeout=300000;
insert overwrite table dws_trade_user_cart_add_1d partition (dt)
select
user_id,
count(*),
sum(sku_num),
dt
from dwd_trade_cart_add_inc
group by user_id, dt ;
--每日数据
insert overwrite table dws_trade_user_cart_add_1d partition (dt="2022-06-09")
select
user_id,
count(*),
sum(sku_num)
from dwd_trade_cart_add_inc
where dt="2022-6-09"
group by user_id, dt ;
--交易域用户粒度支付最近1日汇总表
DROP TABLE IF EXISTS dws_trade_user_payment_1d;
CREATE EXTERNAL TABLE dws_trade_user_payment_1d
(
`user_id` STRING COMMENT '用户ID',
`payment_count_1d` BIGINT COMMENT '最近1日支付次数',
`payment_num_1d` BIGINT COMMENT '最近1日支付商品件数',
`payment_amount_1d` DECIMAL(16, 2) COMMENT '最近1日支付金额'
) COMMENT '交易域用户粒度支付最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_user_payment_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
insert overwrite table dws_trade_user_payment_1d partition (dt)
select
user_id,
count(distinct order_id),
sum(sku_num),
sum(split_payment_amount),
dt
from dwd_trade_pay_detail_suc_inc
group by user_id,dt;
insert overwrite table dws_trade_user_payment_1d partition (dt="2022-06-09")
select
user_id,
count(distinct order_id),
sum(sku_num),
sum(split_payment_amount)
from dwd_trade_pay_detail_suc_inc
where dt="2022-06-09"
group by user_id,dt;
---交易域省份粒度订单最近1日汇总表
DROP TABLE IF EXISTS dws_trade_province_order_1d;
CREATE EXTERNAL TABLE dws_trade_province_order_1d
(
`province_id` STRING COMMENT '省份ID',
`province_name` STRING COMMENT '省份名称',
`area_code` STRING COMMENT '地区编码',
`iso_code` STRING COMMENT '旧版国际标准地区编码',
`iso_3166_2` STRING COMMENT '新版国际标准地区编码',
`order_count_1d` BIGINT COMMENT '最近1日下单次数',
`order_original_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
`activity_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额',
`coupon_reduce_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单优惠券优惠金额',
`order_total_amount_1d` DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
) COMMENT '交易域省份粒度订单最近1日汇总表'
PARTITIONED BY (`dt` STRING);
--首日数据装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_trade_province_order_1d partition (dt)
select
`province_id` ,--STRING COMMENT '省份ID',
`province_name` ,--STRING COMMENT '省份名称',
`area_code` ,--STRING COMMENT '地区编码',
`iso_code` ,--STRING COMMENT '旧版国际标准地区编码',
`iso_3166_2` ,--STRING COMMENT '新版国际标准地区编码',
`order_count_1d` ,--BIGINT COMMENT '最近1日下单次数',
`order_original_amount_1d` ,--DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
`activity_reduce_amount_1d`,--DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额',
`coupon_reduce_amount_1d` ,--DECIMAL(16, 2) COMMENT '最近1日下单优惠券优惠金额',
`order_total_amount_1d` , --DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
dt
from (
select
province_id,
count(distinct order_id) order_count_1d,
sum(split_original_amount) order_original_amount_1d,
sum(split_activity_amount) activity_reduce_amount_1d,
sum(split_coupon_amount) coupon_reduce_amount_1d,
sum(split_total_amount)order_total_amount_1d,
dt
from dwd_trade_order_detail_inc
group by province_id,dt
)od left join (select
id,
province_name,
area_code,
iso_code,
iso_3166_2
from dim_province_full
)pr on od.province_id=pr.id;
--每日数据
insert overwrite table dws_trade_province_order_1d partition (dt="2022-06-09")
select
`province_id` ,--STRING COMMENT '省份ID',
`province_name` ,--STRING COMMENT '省份名称',
`area_code` ,--STRING COMMENT '地区编码',
`iso_code` ,--STRING COMMENT '旧版国际标准地区编码',
`iso_3166_2` ,--STRING COMMENT '新版国际标准地区编码',
`order_count_1d` ,--BIGINT COMMENT '最近1日下单次数',
`order_original_amount_1d` ,--DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
`activity_reduce_amount_1d`,--DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额',
`coupon_reduce_amount_1d` ,--DECIMAL(16, 2) COMMENT '最近1日下单优惠券优惠金额',
`order_total_amount_1d` --DECIMAL(16, 2) COMMENT '最近1日下单最终金额'
from (
select
province_id,
count(distinct order_id) order_count_1d,
sum(split_original_amount) order_original_amount_1d,
sum(split_activity_amount) activity_reduce_amount_1d,
sum(split_coupon_amount) coupon_reduce_amount_1d,
sum(split_total_amount)order_total_amount_1d
from dwd_trade_order_detail_inc
where dt="2022-06-09"
group by province_id
)od left join (select
id,
province_name,
area_code,
iso_code,
iso_3166_2
from dim_province_full
where dt="2022-06-09"
)pr on od.province_id=pr.id;
--交易域省份粒度订单最近n日汇总表
DROP TABLE IF EXISTS dws_trade_province_order_nd;
CREATE EXTERNAL TABLE dws_trade_province_order_nd
(
`province_id` STRING COMMENT '省份ID',
`province_name` STRING COMMENT '省份名称',
`area_code` STRING COMMENT '地区编码',
`iso_code` STRING COMMENT '旧版国际标准地区编码',
`iso_3166_2` STRING COMMENT '新版国际标准地区编码',
`order_count_7d` BIGINT COMMENT '最近7日下单次数',
`order_original_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单原始金额',
`activity_reduce_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单活动优惠金额',
`coupon_reduce_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单优惠券优惠金额',
`order_total_amount_7d` DECIMAL(16, 2) COMMENT '最近7日下单最终金额',
`order_count_30d` BIGINT COMMENT '最近30日下单次数',
`order_original_amount_30d` DECIMAL(16, 2) COMMENT '最近30日下单原始金额',
`activity_reduce_amount_30d` DECIMAL(16, 2) COMMENT '最近30日下单活动优惠金额',
`coupon_reduce_amount_30d` DECIMAL(16, 2) COMMENT '最近30日下单优惠券优惠金额',
`order_total_amount_30d` DECIMAL(16, 2) COMMENT '最近30日下单最终金额'
) COMMENT '交易域省份粒度订单最近n日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_trade_province_order_nd'
TBLPROPERTIES ('orc.compress' = 'snappy');
insert overwrite table dws_trade_province_order_nd partition (dt="2022-06-08")
select
`province_id` ,-- STRING COMMENT '省份ID',
`province_name` ,-- STRING COMMENT '省份名称',
`area_code` ,-- STRING COMMENT '地区编码',
`iso_code` ,-- STRING COMMENT '旧版国际标准地区编码',
`iso_3166_2` , -- STRING COMMENT '新版国际标准地区编码',
sum(if (dt>date_sub("2022-06-08",6),order_count_1d,0)),
sum(if (dt>date_sub("2022-06-08",6),order_original_amount_1d,0)),
sum(if (dt>date_sub("2022-06-08",6),activity_reduce_amount_1d,0)),
sum(if (dt>date_sub("2022-06-08",6),coupon_reduce_amount_1d,0)),
sum(if (dt>date_sub("2022-06-08",6),order_total_amount_1d,0)),
sum(`order_count_1d`) ,--BIGINT COMMENT '最近1日下单次数',
sum(order_original_amount_1d) ,--DECIMAL(16, 2) COMMENT '最近1日下单原始金额',
sum(activity_reduce_amount_1d),--DECIMAL(16, 2) COMMENT '最近1日下单活动优惠金额',
sum(coupon_reduce_amount_1d) ,--DECIMAL(16, 2) COMMENT '最近1日下单优惠券优惠金额',
sum(order_total_amount_1d)
from dws_trade_province_order_1d
where dt>=date_sub("2022-06-09",29)and dt<="2022-06-08"
group by `province_id`, `province_name`, `area_code`, `iso_code`, `iso_3166_2` ;
--工具域用户优惠券粒度优惠券使用(支付)最近1日汇总表
DROP TABLE IF EXISTS dws_tool_user_coupon_coupon_used_1d;
CREATE EXTERNAL TABLE dws_tool_user_coupon_coupon_used_1d
(
`user_id` STRING COMMENT '用户ID',
`coupon_id` STRING COMMENT '优惠券ID',
`coupon_name` STRING COMMENT '优惠券名称',
`coupon_type_code` STRING COMMENT '优惠券类型编码',
`coupon_type_name` STRING COMMENT '优惠券类型名称',
`benefit_rule` STRING COMMENT '优惠规则',
`used_count_1d` STRING COMMENT '使用(支付)次数'
) COMMENT '工具域用户优惠券粒度优惠券使用(支付)最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_tool_user_coupon_coupon_used_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--数据装载
insert overwrite table dws_tool_user_coupon_coupon_used_1d partition (dt)
select
`user_id` ,--STRING COMMENT '用户ID',
`coupon_id` ,--STRING COMMENT '优惠券ID',
`coupon_name` ,--STRING COMMENT '优惠券名称',
`coupon_type_code` ,--STRING COMMENT '优惠券类型编码',
`coupon_type_name` ,--STRING COMMENT '优惠券类型名称',
`benefit_rule` ,--STRING COMMENT '优惠规则',
`used_count_1d` ,--STRING COMMENT '使用(支付)次数'
dt
from (
select
user_id,
coupon_id,
count(*) used_count_1d,
dt
from dwd_tool_coupon_used_inc
group by user_id,coupon_id,dt
)cu
left join(
select
id,
coupon_name,
coupon_type_code,
coupon_type_name,
benefit_rule
from dim_coupon_full
where dt="2022-06-08"
)cp on cu.coupon_id=cp.id;
--每日装载
insert overwrite table dws_tool_user_coupon_coupon_used_1d partition (dt="2022-06-09")
select
`user_id` ,--STRING COMMENT '用户ID',
`coupon_id` ,--STRING COMMENT '优惠券ID',
`coupon_name` ,--STRING COMMENT '优惠券名称',
`coupon_type_code` ,--STRING COMMENT '优惠券类型编码',
`coupon_type_name` ,--STRING COMMENT '优惠券类型名称',
`benefit_rule` ,--STRING COMMENT '优惠规则',
`used_count_1d` --STRING COMMENT '使用(支付)次数'
from (
select
user_id,
coupon_id,
count(*) used_count_1d
from dwd_tool_coupon_used_inc
where dt="2022-06-09"
group by user_id,coupon_id
)cu
left join(
select
id,
coupon_name,
coupon_type_code,
coupon_type_name,
benefit_rule
from dim_coupon_full
where dt="2022-06-09"
)cp on cu.coupon_id=cp.id;
--互动域商品粒度收藏商品最近1日汇总表
--建表语句
DROP TABLE IF EXISTS dws_interaction_sku_favor_add_1d;
CREATE EXTERNAL TABLE dws_interaction_sku_favor_add_1d
(
`sku_id` STRING COMMENT 'SKU_ID',
`sku_name` STRING COMMENT 'SKU名称',
`category1_id` STRING COMMENT '一级品类ID',
`category1_name` STRING COMMENT '一级品类名称',
`category2_id` STRING COMMENT '二级品类ID',
`category2_name` STRING COMMENT '二级品类名称',
`category3_id` STRING COMMENT '三级品类ID',
`category3_name` STRING COMMENT '三级品类名称',
`tm_id` STRING COMMENT '品牌ID',
`tm_name` STRING COMMENT '品牌名称',
`favor_add_count_1d` BIGINT COMMENT '商品被收藏次数'
) COMMENT '互动域商品粒度收藏商品最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_interaction_sku_favor_add_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--数据装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_interaction_sku_favor_add_1d partition(dt)
select
sku_id,
sku_name,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
tm_id,
tm_name,
favor_add_count,
dt
from
(
select
dt,
sku_id,
count(*) favor_add_count
from dwd_interaction_favor_add_inc
group by dt,sku_id
)favor
left join
(
select
id,
sku_name,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
tm_id,
tm_name
from dim_sku_full
where dt='2022-06-08'
)sku
on favor.sku_id=sku.id;
--每日装载
insert overwrite table dws_interaction_sku_favor_add_1d partition(dt="2022-06-09")
select
sku_id,
sku_name,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
tm_id,
tm_name,
favor_add_count
from
(
select
sku_id,
count(*) favor_add_count
from dwd_interaction_favor_add_inc
where dt="2022-06-09"
group by sku_id
)favor
left join
(
select
id,
sku_name,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
tm_id,
tm_name
from dim_sku_full
where dt='2022-06-09'
)sku
on favor.sku_id=sku.id;
--流量域会话粒度页面浏览最近1日汇总表
DROP TABLE IF EXISTS dws_traffic_session_page_view_1d;
CREATE EXTERNAL TABLE dws_traffic_session_page_view_1d
(
`session_id` STRING COMMENT '会话ID',
`mid_id` string comment '设备ID',
`brand` string comment '手机品牌',
`model` string comment '手机型号',
`operate_system` string comment '操作系统',
`version_code` string comment 'APP版本号',
`channel` string comment '渠道',
`during_time_1d` BIGINT COMMENT '最近1日浏览时长',
`page_count_1d` BIGINT COMMENT '最近1日浏览页面数'
) COMMENT '流量域会话粒度页面浏览最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_traffic_session_page_view_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--数据装载
insert overwrite table dws_traffic_session_page_view_1d partition (dt="2022-06-08")
select
session_id,
mid_id,
brand,
model,
operate_system,
version_code,
channel,
sum(during_time),
count(page_id)
from dwd_traffic_page_view_inc
where dt="2022-06-08"
group by session_id, mid_id, brand, model, operate_system, version_code, channel ;
--流量域访客页面粒度页面浏览最近1日汇总表
DROP TABLE IF EXISTS dws_traffic_page_visitor_page_view_1d;
CREATE EXTERNAL TABLE dws_traffic_page_visitor_page_view_1d
(
`mid_id` STRING COMMENT '访客ID',
`brand` string comment '手机品牌',
`model` string comment '手机型号',
`operate_system` string comment '操作系统',
`page_id` STRING COMMENT '页面ID',
`during_time_1d` BIGINT COMMENT '最近1日浏览时长',
`view_count_1d` BIGINT COMMENT '最近1日访问次数'
) COMMENT '流量域访客页面粒度页面浏览最近1日汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_traffic_page_visitor_page_view_1d'
TBLPROPERTIES ('orc.compress' = 'snappy');
--数据装载
insert overwrite table dws_traffic_page_visitor_page_view_1d partition(dt='2022-06-08')
select
mid_id,
brand,
model,
operate_system,
page_id,
sum(during_time),
count(*)
from dwd_traffic_page_view_inc
where dt='2022-06-08'
group by mid_id,brand,model,operate_system,page_id;
--用户域用户粒度登录历史至今汇总表
DROP TABLE IF EXISTS dws_user_user_login_td;
CREATE EXTERNAL TABLE dws_user_user_login_td
(
`user_id` STRING COMMENT '用户ID',
`login_date_last` STRING COMMENT '历史至今末次登录日期',
`login_date_first` STRING COMMENT '历史至今首次登录日期',
`login_count_td` BIGINT COMMENT '历史至今累计登录次数'
) COMMENT '用户域用户粒度登录历史至今汇总表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dws/dws_user_user_login_td'
TBLPROPERTIES ('orc.compress' = 'snappy');
--数据装载
--首日
insert overwrite table dws_user_user_login_td partition (dt="2022-06-08")
select
user_id,
max(dt) login_date_last,
min(dt)login_date_first,
count(*)
from dwd_user_login_inc
group by user_id;
insert overwrite table dws_user_user_login_td partition (dt="2022-06-09")
select
user_id,
max(login_date_last),
min(login_date_first),
sum(login_count_td)
from (
select
user_id,
login_date_last,
login_date_first,
login_count_td
from dws_user_user_login_td
where dt=date_sub("2022-06-09",1)
union all
select
user_id,
"2022-06-09",
"2022-06-09",
count(*)
from dws_user_user_login_td
where dt="2022-06-09"
group by user_id
)t
group by user_id ;
--登录信息来自于日志表,但是日志表的数据只有8号以及其以后的数据,没有历史数据
--mysql数据库种不会保存行为数据,也就是说不会保存登录信息
--折中认为用户注册的时间就是用户登陆时间
insert overwrite table dws_user_user_login_td partition (dt="2022-06-08")
select
user_id,
max(login_date_first),
min(login_date_last),
sum(login_count_td)
from (
--历史(假)
select
id user_id,
date_format(create_time,"yyyy-MM-dd") login_date_first,
date_format(create_time,"yyyy-MM-dd") login_date_last,
1 login_count_td
from dim_user_zip
where dt="9999-12-31" and date_format(create_time,"yyyy-MM-dd")!="2022-06-08"
union all
select
user_id,
"2022-06-08",
"2022-06-08",
count(*)
from dwd_user_login_inc
where dt="2022-06-08"
group by user_id
)td
group by user_id;
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
SQL
1
https://gitee.com/xiaoming12139/gmalll.git
git@gitee.com:xiaoming12139/gmalll.git
xiaoming12139
gmalll
gmalll
master

搜索帮助