列表

详情


SQL144. 每月及截止当月的答题情况

描述

现有试卷作答记录表exam_record(uid用户ID, exam_id试卷ID, start_time开始作答时间, submit_time交卷时间, score得分):
id uid exam_id start_time submit_time score
1 1001 9001
2020-01-01 09:01:01
2020-01-01 09:21:59 90
2 1002
9001
2020-01-20 10:01:01
2020-01-20 10:10:01 89
3 1002
9001
2020-02-01 12:11:01
2020-02-01 12:31:01 83
4 1003
9001
2020-03-01 19:01:01
2020-03-01 19:30:01 75
5 1004 9001
2020-03-01 12:01:01
2020-03-01 12:11:01
60
6
1003 9001
2020-03-01 12:01:01
2020-03-01 12:41:01
90
7
1002 9001
2020-05-02 19:01:01
2020-05-02 19:32:00
90
8 1001
9002
2020-01-02 19:01:01
2020-01-02 19:59:01
69
9 1004 9002
2020-02-02 12:01:01
2020-02-02 12:20:01
99
10 1003
9002
2020-02-02 12:01:01 2020-02-02 12:31:01
68
11 1001
9002
2020-01-02 19:01:01
2020-02-02 12:43:01
81
12 1001
9002
2020-03-02 12:11:01 (NULL) (NULL)

请输出自从有用户作答记录以来,每月的试卷作答记录中月活用户数、新增用户数、截止当月的单月最大新增用户数、截止当月的累积用户数。结果按月份升序输出。
由示例数据结果输出如下:
start_month mau month_add_uv max_month_add_uv
cum_sum_uv
202001 2 2 2 2
202002
4
2 2 4
202003
3
0 2 4
202005 1
0 2 4

month 1001 1002 1003
1004
202001 1 1

202002
1
1 1 1
202003
1
1 1
202005
1

由上述矩阵可以看出,2020年1月有2个用户活跃(mau=2),当月新增用户数为2;
2020年2月有4个用户活跃,当月新增用户数为2,最大单月新增用户数为2,当前累积用户数为4。

示例1

输入:

drop table if exists exam_record;
CREATE TABLE exam_record (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int NOT NULL COMMENT '用户ID',
    exam_id int NOT NULL COMMENT '试卷ID',
    start_time datetime NOT NULL COMMENT '开始时间',
    submit_time datetime COMMENT '提交时间',
    score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-01 09:01:01', '2020-01-01 09:21:59', 90),
(1002, 9001, '2020-01-20 10:01:01', '2020-01-20 10:10:01', 89),
(1002, 9001, '2020-02-01 12:11:01', '2020-02-01 12:31:01', 83),
(1003, 9001, '2020-03-01 19:01:01', '2020-03-01 19:30:01', 75),
(1004, 9001, '2020-03-01 12:01:01', '2020-03-01 12:11:01', 60),
(1003, 9001, '2020-03-01 12:01:01', '2020-03-01 12:41:01', 90),
(1002, 9001, '2020-05-02 19:01:01', '2020-05-02 19:32:00', 90),
(1001, 9002, '2020-01-02 19:01:01', '2020-01-02 19:59:01', 69),
(1004, 9002, '2020-02-02 12:01:01', '2020-02-02 12:20:01', 99),
(1003, 9002, '2020-02-02 12:01:01', '2020-02-02 12:31:01', 68),
(1001, 9002, '2020-02-02 12:01:01', '2020-02-02 12:43:01', 81),
(1001, 9002, '2020-03-02 12:11:01', null, null);

输出:

202001|2|2|2|2
202002|4|2|2|4
202003|3|0|2|4
202005|1|0|2|4

原站题解

上次编辑到这里,代码来自缓存 点击恢复默认模板

Mysql 解法, 执行用时: 36ms, 内存消耗: 6436KB, 提交时间: 2021-12-06

SELECT stime,mau,if(month_add is not null,month_add,0) as month_add_uv,
max(if(month_add is not null, month_add,0)) over (order by stime) as max_month_add_uv,
sum(if(month_add is not null, month_add,0)) over (order by stime) as cum_sum_uv
FROM
      (SELECT stime, count(distinct uid) as mau FROM 
         (SELECT uid,DATE_FORMAT(start_time,'%Y%m') as stime FROM exam_record) a 
         GROUP BY stime) c 
      LEFT JOIN 
      (SELECT ntime, count(distinct uid) as month_add FROM
         (SELECT uid,min(DATE_FORMAT(start_time,'%Y%m')) as ntime from exam_record GROUP BY uid) b 
          GROUP BY ntime) d 
      ON c.stime=d.ntime

Mysql 解法, 执行用时: 36ms, 内存消耗: 6524KB, 提交时间: 2021-12-16

# mau
# (select date_format(start_time,'%Y%m') start_month, count(distinct uid) mau
# from exam_record
# group by start_month)

-- new customer
# select start_month, count(uid) month_add_uv
# from 
# (
#     select uid, min(date_format(start_time,'%Y%m')) start_month
#     from exam_record
#     group by uid
# ) A
# group by start_month

select A.start_month, mau, ifnull(month_add_uv,0), 
max(month_add_uv) over(order by start_month) max_month_add_uv,
sum(month_add_uv) over(order by start_month) cum_sum_uv
from(
    (select date_format(start_time,'%Y%m') start_month, count(distinct uid) mau
    from exam_record
    group by start_month) A
left join
    (select start_month, count(uid) month_add_uv
        from 
        (
            select uid, min(date_format(start_time,'%Y%m')) start_month
            from exam_record
            group by uid
        ) X
    group by start_month) B
on A.start_month = B.start_month


)

Mysql 解法, 执行用时: 36ms, 内存消耗: 6580KB, 提交时间: 2022-01-01

SELECT start_month, COUNT(DISTINCT uid),
COUNT(DISTINCT IF(start_month=last_month, uid, NULL)),
MAX(COUNT(DISTINCT IF(start_month=last_month, uid, NULL))) OVER (ORDER BY start_month) ,
SUM(COUNT(DISTINCT IF(start_month=last_month, uid, NULL))) OVER (ORDER BY start_month)
FROM
(SELECT DATE_FORMAT(start_time, '%Y%m') AS start_month, uid,
MIN(DATE_FORMAT(start_time, '%Y%m')) OVER (PARTITION BY uid ORDER BY DATE_FORMAT(start_time, '%Y%m')) AS last_month
FROM exam_record) t
GROUP BY start_month

Mysql 解法, 执行用时: 37ms, 内存消耗: 6380KB, 提交时间: 2021-12-21

#难点在于月新增:对比月份和最早的月份

#第三步,在月活和每月新增的基础上计算最大月活新增,和累计用户数
SELECT start_month, mau, month_add_uv,
    MAX(month_add_uv) OVER(ORDER BY start_month) AS max_month_add_uv,
    SUM(month_add_uv) OVER(ORDER BY start_month) AS cum_sum_uv
FROM(
#第二步,计算月活,每月新增
SELECT
    start_month,
    COUNT(DISTINCT uid) AS mau,
    COUNT(DISTINCT CASE WHEN start_month = first_month THEN uid ELSE NULL END) AS month_add_uv
FROM (
#第一步,找出月份及最小月份;窗口函数实现
    SELECT
        *,
        DATE_FORMAT(start_time, '%Y%m') AS start_month,
        MIN(DATE_FORMAT(start_time, '%Y%m')) OVER(PARTITION BY uid) AS first_month
    FROM exam_record) er1
GROUP BY start_month) er2
ORDER BY start_month

Mysql 解法, 执行用时: 37ms, 内存消耗: 6384KB, 提交时间: 2021-12-21

with new_tab as (select uid,exam_id,date_format(start_time,'%Y%m') as month
                 ,row_number()over(partition by uid order by date_format(start_time,'%Y%m')) as rank2
                from exam_record) # 1、建立一张纸含有用户id,标准月份格式,每个用户按照月份排序的编号方便计算每月的新用户

select t.month
    ,mau
    ,if(month_add_uv is null,0,month_add_uv) as month_add_uv
    ,max(month_add_uv)over(order by month) as max_month_add_uv # 5.利用max聚合窗口函数计算最大单月新增用户数
    ,cum_sum_uv
from # 2、建立一张有月份和各月活跃用户数的临时表
    (select month,count(distinct uid) as mau from new_tab group by month) t
left join # 3.外连接一张记录每月新增用户数的临时表,取每月rank2为1的即为新增用户
    (select month,count( uid) as month_add_uv from new_tab 
     where rank2=1 group by month
    ) t2 on t.month=t2.month 
left join  # 4、外连接一张记录累积用户数的临时表,只要记录新增用户即可,即记录rank2=1的用户数
    (select distinct month,sum(cum)over(order by month) as cum_sum_uv 
    from (select month,if(rank2=1,1,0) as cum from new_tab) t3
    ) t4 on t.month=t4.month
order by month