1 Star 2 Fork 1

小明/gmalll

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
ADS-1.sql 14.38 KB
一键复制 编辑 原始数据 按行查看 历史
小明 提交于 2024-06-02 18:59 . 优惠券使用统计
--ADS层
--数据量较少,不需要分区
--统计结果为最终结果,勿需进一步分析,所以不用snappy
--给sql所以一般tsv
--字段不用很多
--各渠道流量统计
DROP TABLE IF EXISTS ads_traffic_stats_by_channel;
CREATE EXTERNAL TABLE ads_traffic_stats_by_channel
(
`dt` STRING COMMENT '统计日期',
`recent_days` BIGINT COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`channel` STRING COMMENT '渠道',
`uv_count` BIGINT COMMENT '访客人数',
`avg_duration_sec` BIGINT COMMENT '会话平均停留时长,单位为秒',
`avg_page_count` BIGINT COMMENT '会话平均浏览页面数',
`sv_count` BIGINT COMMENT '会话数',
`bounce_rate` DECIMAL(16, 2) COMMENT '跳出率'
) COMMENT '各渠道流量统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_traffic_stats_by_channel/';
--数据源 dws_traffic_session_page_view_1d
insert overwrite table ads_traffic_stats_by_channel
select *
from ads_traffic_stats_by_channel
union
select *
from (
--最近一天
select "2022-06-08",
1,
channel,
count(distinct mid_id),
avg(during_time_1d / 1000),
avg(page_count_1d),
count(session_id),
count(`if`(page_count_1d = 1, session_id, null)) / count(session_id)
from dws_traffic_session_page_view_1d
where dt = "2022-06-08"
group by channel
union all
--最近7天
select "2022-06-08",
7,
channel,
count(distinct mid_id),
avg(during_time_1d / 1000),
avg(page_count_1d),
count(session_id),
count(`if`(page_count_1d = 1, session_id, null)) / count(session_id)
from dws_traffic_session_page_view_1d
where dt >= date_sub("2022-06-08", 6)
and dt <= "2022-06-08"
group by channel
union all
--最近30天
select "2022-06-08",
30,
channel,
count(distinct mid_id),
avg(during_time_1d / 1000),
avg(page_count_1d),
count(session_id),
count(`if`(page_count_1d = 1, session_id, null)) / count(session_id)
from dws_traffic_session_page_view_1d
where dt >= date_sub("2022-06-08", 29)
and dt <= "2022-06-08"
group by channel)tmp;
--使用炸裂函数实现
--1.将最大的范围数据获取
--2.将数据炸裂成多条数据,具体的条数取决u有应用场景,增加不同场景的标记
--3.根据条件筛选
--4.对数据增加的标记对数据分组聚合
insert overwrite table ads_traffic_stats_by_channel
select *
from ads_traffic_stats_by_channel
union
select *
from (select "2022-06-08",
days,
channel,
count(distinct mid_id),
avg(during_time_1d / 1000),
avg(page_count_1d),
count(session_id),
count(`if`(page_count_1d = 1, session_id, null)) / count(session_id)
from dws_traffic_session_page_view_1d lateral view explode(array(1, 7, 30)) tmp as days
where dt >= date_sub("2022-06-08", days - 1)
and dt <= "2022-06-08"
group by days, channel) tm;
--路径分析
--数据源:dwd_traffic_page_view_inc
DROP TABLE IF EXISTS ads_page_path;
CREATE EXTERNAL TABLE ads_page_path
(
`dt` STRING COMMENT '统计日期',
`source` STRING COMMENT '跳转起始页面ID',
`target` STRING COMMENT '跳转终到页面ID',
`path_count` BIGINT COMMENT '跳转次数'
) COMMENT '页面浏览路径分析'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_page_path/';
--数据装载
insert overwrite table ads_page_path
select *from ads_page_path
union
select
"2022-06-08",
last_page_id source,
page_id target,
count(*)
from dwd_traffic_page_view_inc
where dt="2022-06-08"
group by last_page_id, page_id;
--在页面跳转中
insert overwrite table ads_page_path
select *from ads_page_path
union
select
"2022-06-08" dt,
source,
nvl(target,"null"),
count(*)path_count
from (
select
concat("step-",rn+source) source,
concat("step-",(rn+1),target) target
from(
select
page_id source,
lead(page_id,1,"out") over (partition by session_id order by view_time)target,
row_number() over (partition by session_id order by view_time)rn
from dwd_traffic_page_view_inc
where dt="2022-06-08"
)tmp
)t2
group by source,target;
--用户变动统计(流失用户and回流用户)
--流失用户
--统计7日前活跃,但是近7日未活跃
--只在流失当天统计
--今天登录and上次登录时间为7天前
--上次登录就是昨天统计的末次登录
--回流用户
--一段时间未活跃,一段时间未活跃,今天又登录了
DROP TABLE IF EXISTS ads_user_change;
CREATE EXTERNAL TABLE ads_user_change
(
`dt` STRING COMMENT '统计日期',
`user_churn_count` BIGINT COMMENT '流失用户数',
`user_back_count` BIGINT COMMENT '回流用户数'
) COMMENT '用户变动统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_user_change/';
--流失用户
select
count(*)user_churn_count
from dws_user_user_login_td
where dt="2022-06-08"
and login_count_td=date_sub("2022-06-08",7);
--------------回流用户
select
count(*)user_back_count
from (
select
user_id,
login_date_last
from dws_user_user_login_td
where dt="2022-06-08"
and login_date_last="2022-06-08"
)tmp join (select user_id,
login_date_last
from dws_user_user_login_td
where dt = "2022-06-07")td on tmp.user_id=td.user_id
where datediff(tmp.login_date_last,td.login_date_last)>=8;
--用户变动统计装载
insert overwrite table ads_user_change
select *from ads_user_change
union
select
churn.dt,
user_churn_count,
user_back_count
from (
select
"2022-06-08" dt,
count(*)user_churn_count
from dws_user_user_login_td
where dt="2022-06-08"
and login_count_td=date_sub("2022-06-08",7)
)churn join (
select
"20220-06-08"dt,
count(*)user_back_count
from (
select
user_id,
login_date_last
from dws_user_user_login_td
where dt="2022-06-08"
and login_date_last="2022-06-08"
)tmp join (select user_id,
login_date_last
from dws_user_user_login_td
where dt = date_sub("2022-06-08",1))td on tmp.user_id=td.user_id
where datediff(tmp.login_date_last,td.login_date_last)>=8
)back on churn.dt=back.dt;
--用户留存率
--新增留存
--留存用户与新增用户的比率
--获取1天前的注册用户数+当天登录用户数量
--以此类推获取n天前的注册用户数+当天的登陆数量
DROP TABLE IF EXISTS ads_user_retention;
CREATE EXTERNAL TABLE ads_user_retention
(
`dt` STRING COMMENT '统计日期',
`create_date` STRING COMMENT '用户新增日期',
`retention_day` INT COMMENT '截至当前日期留存天数',
`retention_count` BIGINT COMMENT '留存用户数量',
`new_user_count` BIGINT COMMENT '新增用户数量',
`retention_rate` DECIMAL(16, 2) COMMENT '留存率'
) COMMENT '用户留存率'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_user_retention/';
--
select
"2022-06-08",
date_sub("2022-06-08",1),
1,
sum(`if`(login_date_last="2022-06-08"),1,0) retention_count,
count(*)new_user_count,
sum(`if`(login_date_last="2022-06-08"),1,0)/count(*) retention_rate
from dws_user_user_login_td
where dt="2022-06-08"
and login_date_first=date_sub("2022-06-08",1);
--7日留存
insert overwrite table ads_user_retention
select * from ads_user_retention
union
select
"2022-06-08",
login_date_first,
datediff("2022-06-08",login_date_first),
sum(`if`(login_date_last="2022-06-08",1,0))retention_count,
count(*)new_user_count,
sum(`if`(login_date_last="2022-06-08",1,0))/count(*) * 100
from dws_user_user_login_td
where dt="2022-06-08" and login_date_first>=date_sub("2022-06-08",7)and login_date_first<"2022-06-08"
group by login_date_first;
--用户新增活跃统计
DROP TABLE IF EXISTS ads_user_stats;
CREATE EXTERNAL TABLE ads_user_stats
(
`dt` STRING COMMENT '统计日期',
`recent_days` BIGINT COMMENT '最近n日,1:最近1日,7:最近7日,30:最近30日',
`new_user_count` BIGINT COMMENT '新增用户数',
`active_user_count` BIGINT COMMENT '活跃用户数'
) COMMENT '用户新增活跃统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_user_stats/';
---最近1日
select
"2022-06-08",
1,
sum(`if`(login_date_first=date_sub("2022-06-08",1),1,0))new_user_count,
count(*) as active_user_count
from dws_user_user_login_td
where dt="2022-06-08"
and login_date_last=date_sub("2022-06-08",1);
--近7日
select
"2022-06-08",
7,
sum(`if`(login_date_last>=date_sub("2022-06-08",6),1,0))new_user_count,
count(*) as active_user_count
from dws_user_user_login_td
where dt="2022-06-08"
and login_date_last>=date_sub("2022-06-08",6) and login_date_first<="2022-06-08";
--近30日
select
"2022-06-08",
30,
sum(`if`(login_date_last>=date_sub("2022-06-08",29),1,0))new_user_count,
count(*) as active_user_count
from dws_user_user_login_td
where dt="2022-06-08"
and login_date_last>=date_sub("2022-06-08",29) and login_date_first<="2022-06-08";
--融合
insert overwrite table ads_user_stats
select * from ads_user_stats
union
select
"2022-06-08",
day,
sum(`if`(login_date_last>=date_sub("2022-06-08",day-1),1,0))new_user_count,
count(*) as active_user_count
from dws_user_user_login_td lateral view explode(array(1,7,30))tmp as day
where dt="2022-06-08"
and login_date_last>=date_sub("2022-06-08",day-1) and login_date_first<="2022-06-08"
group by day ;
--用户行为漏斗分析
DROP TABLE IF EXISTS ads_user_action;
CREATE EXTERNAL TABLE ads_user_action
(
`dt` STRING COMMENT '统计日期',
`home_count` BIGINT COMMENT '浏览首页人数',
`good_detail_count` BIGINT COMMENT '浏览商品详情页人数',
`cart_count` BIGINT COMMENT '加购人数',
`order_count` BIGINT COMMENT '下单人数',
`payment_count` BIGINT COMMENT '支付人数'
) COMMENT '用户行为漏斗分析'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_user_action/';
insert overwrite table ads_user_action
select * from ads_user_action
union
select
"2022-06-08" ,-- STRING COMMENT '统计日期',
`home_count` ,--BIGINT COMMENT '浏览首页人数',
`good_detail_count` ,--BIGINT COMMENT '浏览商品详情页人数',
`cart_count` ,--BIGINT COMMENT '加购人数',
`order_count` ,--BIGINT COMMENT '下单人数',
`payment_count` --BIGINT COMMENT '支付人数'
from(
select
dt,
sum(`if`(page_id="home",1,0))home_count,
sum(`if`(page_id="good_detail",1,0))good_detail_count
from dws_traffic_page_visitor_page_view_1d
where dt="2022-06-08"
and (page_id="home"or page_id="good_detail")
group by dt
)page_view join (
select
dt,
count(user_id)cart_count
from dws_trade_user_cart_add_1d
where dt="2022-06-08"
group by dt
)cart_add on page_view.dt=cart_add.dt
join (
select
dt,
count(user_id)order_count
from gmall.dws_trade_user_order_1d
where dt="2022-06-08"
group by dt
)user_order on cart_add.dt=user_order.dt
join (
select
dt,
count(user_id)payment_count
from dws_trade_user_payment_1d
group by dt
)pay on user_order.dt=pay.dt;
--新增下单用户统计
DROP TABLE IF EXISTS ads_new_order_user_stats;
CREATE EXTERNAL TABLE ads_new_order_user_stats
(
`dt` STRING COMMENT '统计日期',
`recent_days` BIGINT COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`new_order_user_count` BIGINT COMMENT '新增下单人数'
) COMMENT '新增下单用户统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_new_order_user_stats/';
select
"2022-06-08",
1,
count(*)new_order_user_count
from dws_trade_user_order_td
where order_date_first="2022-06-08";
select
"2022-06-08",
7,
count(*)new_order_user_count
from dws_trade_user_order_td
where order_date_first>=date_sub("2022-06-08",6)and order_date_first<="2022-06-08";
select
"2022-06-08",
30,
count(*)new_order_user_count
from dws_trade_user_order_td
where order_date_first>=date_sub("2022-06-08",29)and order_date_first<="2022-06-08";
insert overwrite table ads_new_order_user_stats
select * from ads_new_order_user_stats
union
select
"2022-06-08",
day,
count(*)new_order_user_count
from dws_trade_user_order_td lateral view explode(array(1,7,30))tmp as day
where dt="2022-06-08"
and order_date_first>=date_sub("2022-06-08",day-1)and order_date_first<="2022-06-08"
group by day ;
--最近7日内连续三日下单用户数
--数据源 dws_trade_user_order_1d
DROP TABLE IF EXISTS ads_order_continuously_user_count;
CREATE EXTERNAL TABLE ads_order_continuously_user_count
(
`dt` STRING COMMENT '统计日期',
`recent_days` BIGINT COMMENT '最近天数,7:最近7天',
`order_continuously_user_count` BIGINT COMMENT '连续3日下单用户数'
) COMMENT '最近7日内连续3日下单用户数统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_order_continuously_user_count/';
insert overwrite table ads_order_continuously_user_count
select * from ads_order_continuously_user_count
union
select
"2022-06-08",
7,
count(distinct user_id)
from(
select
user_id,
datediff(lead(dt,2,"9999-12-31") over (partition by user_id),dt) diff
from dws_trade_user_order_td
where dt>=date_sub("2022-06-08",6)and dt<="2022-06-08"
)t where diff=2
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
SQL
1
https://gitee.com/xiaoming12139/gmalll.git
git@gitee.com:xiaoming12139/gmalll.git
xiaoming12139
gmalll
gmalll
master

搜索帮助