列表

详情


SQL164. 2021年11月每天新用户的次日留存率

描述

用户行为日志表tb_user_log

id uid artical_id in_time out_time sign_cin
1 101 0 2021-11-01 10:00:00 2021-11-01 10:00:42 1
2 102 9001
2021-11-01 10:00:00 2021-11-01 10:00:09 0
3 103 9001
2021-11-01 10:00:01 2021-11-01 10:01:50 0
4 101 9002 2021-11-02 10:00:09 2021-11-02 10:00:28 0
5 103 9002
2021-11-02 10:00:51 2021-11-02 10:00:59
0
6
104 9001 2021-11-02 11:00:28
2021-11-02 11:01:24
0
7 101 9003 2021-11-03 11:00:55
2021-11-03 11:01:24
0
8
104 9003
2021-11-03 11:00:45
2021-11-03 11:00:55
0
9 105 9003
2021-11-03 11:00:53
2021-11-03 11:00:59
0
10 101 9002
2021-11-04 11:00:55
2021-11-04 11:00:59
0

(uid-用户ID, artical_id-文章ID, in_time-进入时间, out_time-离开时间, sign_in-是否签到)

问题:统计2021年11月每天新用户的次日留存率(保留2位小数)

  • 次日留存率为当天新增的用户数中第二天又活跃了的用户数占比。
  • 如果in_time-进入时间out_time-离开时间跨天了,在两天里都记为该用户活跃过,结果按日期升序。

输出示例
示例数据的输出结果如下

dt uv_left_rate
2021-11-01 0.67
2021-11-02
1.00
2021-11-03
0.00

解释:
11.01有3个用户活跃101、102、103,均为新用户,在11.02只有101、103两个又活跃了,因此11.01的次日留存率为0.67;
11.02有104一位新用户,在11.03又活跃了,因此11.02的次日留存率为1.00;
11.03有105一位新用户,在11.04未活跃,因此11.03的次日留存率为0.00;
11.04没有新用户,不输出。

示例1

输入:

DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
    id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid INT NOT NULL COMMENT '用户ID',
    artical_id INT NOT NULL COMMENT '视频ID',
    in_time datetime COMMENT '进入时间',
    out_time datetime COMMENT '离开时间',
    sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;

INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
  (101, 0, '2021-11-01 10:00:00', '2021-11-01 10:00:42', 1),
  (102, 9001, '2021-11-01 10:00:00', '2021-11-01 10:00:09', 0),
  (103, 9001, '2021-11-01 10:00:01', '2021-11-01 10:01:50', 0),
  (101, 9002, '2021-11-02 10:00:09', '2021-11-02 10:00:28', 0),
  (103, 9002, '2021-11-02 10:00:51', '2021-11-02 10:00:59', 0),
  (104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
  (101, 9003, '2021-11-03 11:00:55', '2021-11-03 11:01:24', 0),
  (104, 9003, '2021-11-03 11:00:45', '2021-11-03 11:00:55', 0),
  (105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
  (101, 9002, '2021-11-04 11:00:55', '2021-11-04 11:00:59', 0);

输出:

2021-11-01|0.67
2021-11-02|1.00
2021-11-03|0.00

原站题解

上次编辑到这里,代码来自缓存 点击恢复默认模板

Mysql 解法, 执行用时: 36ms, 内存消耗: 6412KB, 提交时间: 2022-01-27

WITH 11_new_user AS (
    -- 2021年11月新用户
    SELECT
        uid 11_new_user,
        first_log
    FROM(
        -- 所有用户第一次登录时间
        SELECT
            uid,
            DATE(MIN(in_time)) first_log
        FROM tb_user_log
    GROUP BY uid
    ) t1
    WHERE DATE_FORMAT(first_log,'%Y%m') = '202111'
),all_user AS (
    -- 所有用户登录日期
    SELECT
        DISTINCT uid, DATE(in_time) log_time
    FROM tb_user_log
    UNION
    SELECT
        DISTINCT uid, DATE(out_time)
    FROM tb_user_log
)
-- 关联新用户和所有用户
SELECT
    n.first_log dt,
    -- count(DISTINCT 11_new_user) cnt_new_log,
    -- count(DISTINCT if(datediff(log_time,first_log) = 1,uid,null)) cnt_new_log
    round(count(DISTINCT if(datediff(log_time,first_log) = 1,uid,null)) /
        count(DISTINCT 11_new_user),2) uv_left_rate
FROM 11_new_user n,all_user a
WHERE uid = 11_new_user
GROUP BY n.first_log
ORDER BY n.first_log;

Mysql 解法, 执行用时: 36ms, 内存消耗: 6516KB, 提交时间: 2022-01-23

select a.first_day, round(count(distinct b.uid)/count(distinct a.uid),2) as rate
from (
    select uid, date(min(in_time)) as first_day
    from tb_user_log 
    -- where date_format(in_time, '%Y-%m') = '2021-11'
    group by uid
    having date_format(min(in_time), '%Y-%m') = '2021-11') a 
left join (
    select uid, date(in_time) as dt 
    from tb_user_log
    union 
    select uid, date(out_time) as dt
    from tb_user_log) b 
on a.uid = b.uid and datediff(b.dt, a.first_day) = 1
group by a.first_day
order by a.first_day

Mysql 解法, 执行用时: 36ms, 内存消耗: 6528KB, 提交时间: 2022-01-27

#新用户首次登录
with t1 as (
    select uid,dt,rank()over(partition by uid order by dt asc) as rk
    from (
select uid,date_format(in_time,'%Y-%m-%d') as dt
    from tb_user_log
    union
    select uid,date_format(out_time,'%Y-%m-%d') as dt
        from tb_user_log
        )temp
),
t2 as (
select a.uid,a.dt as dt1,a.rk as rk1,b.dt as dt2,b.rk as rk2
from t1 a 
join t1 b on a.uid=b.uid
where a.rk=1
and timestampdiff(day, a.dt,b.dt)=1
    and a.dt like '2021-11%'
    )
select t1.dt,round(ifnull(count(distinct t2.uid)/count(distinct t1.uid),0),2) as rate
from t1
left join t2 on t1.uid=t2.uid and t1.dt=t2.dt1
where t1.dt like '2021-11%'
and t1.rk=1
group by t1.dt
order by dt

Mysql 解法, 执行用时: 37ms, 内存消耗: 6296KB, 提交时间: 2021-12-01

select
date(t.in_time) dt
,round(sum(case when t.b_in_time is not null then 1 else 0 end)/count(*),2) uv_left_rate
from
(
    select
    a.uid
    ,a.in_time
    ,a.out_time
    ,b.in_time b_in_time
    ,b.out_time b_out_time
    ,row_number()over(partition by a.uid order by b.in_time) t_rank
    from
    tb_user_log a
    left join
    tb_user_log b
    on a.uid=b.uid and date_add(date(b.out_time),interval -1 day)=date(a.in_time)
    where
    (a.uid,date(a.out_time)) in (select uid,min(date(out_time)) from tb_user_log group by uid)
    and year(a.in_time)='2021' and month(a.in_time)='11'
) t
where 
t.t_rank=1
group by
date(t.in_time)
order by
dt

Mysql 解法, 执行用时: 37ms, 内存消耗: 6308KB, 提交时间: 2021-12-16

select a.dt, 
    round(count(distinct b.uid)/count(distinct a.uid),2) uv_left_rate
from (
    select uid , min(date(in_time)) dt
    from tb_user_log 
    group by uid ) a
left join 
     (select uid,date(in_time) dt
    from tb_user_log
    union 
    select uid,date(out_time) dt
    from tb_user_log) b
on DATE_ADD(a.dt,INTERVAL 1 DAY) = b.dt and a.uid=b.uid
where a.dt like '2021-11%'
group by a.dt 
order by a.dt