列表

详情


SQL140. 未完成率较高的50%用户近三个月答卷情况

描述

现有用户信息表user_info(uid用户ID,nick_name昵称, achievement成就值, level等级, job职业方向, register_time注册时间):
id uid nick_name achievement level job register_time
1 1001
牛客1号
3200
7 算法
2020-01-01 10:00:00
2 1002
牛客2号
2500
6 算法
2020-01-01 10:00:00
3 1003
牛客3号♂
2200
5 算法
2020-01-01 10:00:00

试卷信息表examination_info(exam_id试卷ID, tag试卷类别, difficulty试卷难度, duration考试时长, release_time发布时间)
id exam_id tag difficulty duration
release_time
1 9001 SQL hard 60 2020-01-01 10:00:00
2 9002 SQL hard
80
2020-01-01 10:00:00
3 9003
算法
hard
80
2020-01-01 10:00:00
4 9004
PYTHON medium
70 2020-01-01 10:00:00

试卷作答记录表exam_record(uid用户ID, exam_id试卷ID, start_time开始作答时间, submit_time交卷时间, score得分)
id uid exam_id start_time submit_time score
1 1001 9001 2020-01-01 09:01:01 2020-01-01 09:21:59
90
15 1002 9001
2020-01-01 18:01:01
2020-01-01 18:59:02
90
13 1001 9001
2020-01-02 10:01:01
2020-01-02 10:31:01
89
2 1002 9001
2020-01-20 10:01:01


3 1002 9001
2020-02-01 12:11:01


5 1001 9001
2020-03-01 12:01:01


6 1002 9001
2020-03-01 12:01:01
2020-03-01 12:41:01
90
4 1003 9001
2020-03-01 19:01:01


7 1002 9001
2020-05-02 19:01:01
2020-05-02 19:32:00
90
14 1001 9002
2020-01-01 12:11:01


8 1001 9002
2020-01-02 19:01:01
2020-01-02 19:59:01
69
9 1001 9002
2020-02-02 12:01:01 2020-02-02 12:20:01
99
10 1002 9002
2020-02-02 12:01:01

11 1002
9002
2020-02-02 12:01:01
2020-02-02 12:43:01
81
12 1002
9002
2020-03-02 12:11:01


17 1001 9002
2020-05-05 18:01:01


16 1002
9003
2020-05-06 12:01:01


请统计SQL试卷上未完成率较高的50%用户中,6级和7级用户在有试卷作答记录的近三个月中,每个月的答卷数目和完成数目。按用户ID、月份升序排序。
由示例数据结果输出如下:
uid start_month total_cnt complete_cnt
1002
202002 3 1
1002
202003
2
1
1002
202005
2 1
解释:各个用户对SQL试卷的未完成数、作答总数、未完成率如下:
uid incomplete_cnt total_cnt incomplete_rate
1001
3 7 0.4286
1002
4
8
0.5000
1003
1
1 1.0000
1001、1002、1003分别排在1.0、0.5、0.0的位置,因此较高的50%用户(排位<=0.5)为1002、1003;
1003不是6级或7级;
有试卷作答记录的近三个月为202005、202003、202002;
这三个月里1002的作答题数分别为3、2、2,完成数目分别为1、1、1。

示例1

输入:

drop table if exists examination_info,user_info,exam_record;
CREATE TABLE examination_info (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    exam_id int UNIQUE NOT NULL COMMENT '试卷ID',
    tag varchar(32) COMMENT '类别标签',
    difficulty varchar(8) COMMENT '难度',
    duration int NOT NULL COMMENT '时长',
    release_time datetime COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

CREATE TABLE user_info (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int UNIQUE NOT NULL COMMENT '用户ID',
    `nick_name` varchar(64) COMMENT '昵称',
    achievement int COMMENT '成就值',
    level int COMMENT '用户等级',
    job varchar(32) COMMENT '职业方向',
    register_time datetime COMMENT '注册时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

CREATE TABLE exam_record (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int NOT NULL COMMENT '用户ID',
    exam_id int NOT NULL COMMENT '试卷ID',
    start_time datetime NOT NULL COMMENT '开始时间',
    submit_time datetime COMMENT '提交时间',
    score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

INSERT INTO user_info(uid,`nick_name`,achievement,`level`,job,register_time) VALUES
  (1001, '牛客1', 3200, 7, '算法', '2020-01-01 10:00:00'),
  (1002, '牛客2号', 2500, 6, '算法', '2020-01-01 10:00:00'),
  (1003, '牛客3号♂', 2200, 5, '算法', '2020-01-01 10:00:00');

INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
  (9001, 'SQL', 'hard', 60, '2020-01-01 10:00:00'),
  (9002, 'SQL', 'hard', 80, '2020-01-01 10:00:00'),
  (9003, '算法', 'hard', 80, '2020-01-01 10:00:00'),
  (9004, 'PYTHON', 'medium', 70, '2020-01-01 10:00:00');
	
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-01 09:01:01', '2020-01-01 09:21:59', 90),
(1002, 9001, '2020-01-20 10:01:01', null, null),
(1002, 9001, '2020-02-01 12:11:01', null, null),
(1003, 9001, '2020-03-01 19:01:01', null, null),
(1001, 9001, '2020-03-01 12:01:01', null, null),
(1002, 9001, '2020-03-01 12:01:01', '2020-03-01 12:41:01', 90),
(1002, 9001, '2020-05-02 19:01:01', '2020-05-02 19:32:00', 90),
(1001, 9002, '2020-01-02 19:01:01', '2020-01-02 19:59:01', 69),
(1001, 9002, '2020-02-02 12:01:01', '2020-02-02 12:20:01', 99),
(1002, 9002, '2020-02-02 12:01:01', null, null),
(1002, 9002, '2020-02-02 12:01:01', '2020-02-02 12:43:01', 81),
(1002, 9002, '2020-03-02 12:11:01', null, null),
(1001, 9001, '2020-01-02 10:01:01', '2020-01-02 10:31:01', 89),
(1001, 9002, '2020-01-01 12:11:01', null, null),
(1002, 9001, '2020-01-01 18:01:01', '2020-01-01 18:59:02', 90),
(1002, 9003, '2020-05-06 12:01:01', null, null),
(1001, 9002, '2020-05-05 18:01:01', null, null);

输出:

1002|202002|3|1
1002|202003|2|1
1002|202005|2|1

原站题解

上次编辑到这里,代码来自缓存 点击恢复默认模板

Mysql 解法, 执行用时: 37ms, 内存消耗: 6400KB, 提交时间: 2021-12-06

# 第一步,先找出未完成率前50%高的用户ID,注意这里需要的sql试卷
with rote_tab as 
(select t.uid,t.f_rote,row_number()over(order by t.f_rote desc,uid) as rank2
,count(t.uid)over(partition by t.tag)as cnt
from (select er.uid,ef.tag,(sum(if(submit_time is null,1,0))/count(start_time)) as f_rote
from exam_record er left join examination_info ef on ef.exam_id=er.exam_id where tag='SQL' group by uid ) t)

select  #第四步,分用户和月份进行数据统计;同时需要注意,统计的试卷数是所有类型的,不是之前仅有SQL类型
    uid
    ,start_month
    ,count(start_time) as total_cnt
    ,count(submit_time) as complete_cnt
from 
(
select # 第三步,利用窗口函数对每个用户的月份进行降序排序,以便找出最近的三个月;
    uid
    ,start_time
    ,submit_time
    ,date_format(start_time,'%Y%m') as start_month
    ,dense_rank()over(partition by uid order by date_format(start_time,'%Y%m') desc) as rank3
from exam_record 
where uid in 
    (select distinct er.uid
    from exam_record er left join user_info uf on uf.uid=er.uid
    where er.uid in (select uid from rote_tab where rank2<=round(cnt/2,0))
    and uf.level in (6,7))  # 第二步,进一步找出满足等级为6或7的用户ID
) t2
where rank3<=3
group by uid,start_month
order by uid,start_month

Mysql 解法, 执行用时: 37ms, 内存消耗: 6404KB, 提交时间: 2021-12-16

with t as(SELECT uid from(
    select uid,row_number() over(order by uncomplete_rate desc)as ranking,
    count(1) over() as num
    from(
    select uid,((count(uid) - count(submit_time))/count(uid)) as uncomplete_rate
    from exam_record join examination_info using(exam_id) where tag ="SQL"
    group by uid)a)b
    join user_info using(uid) 
    where(ranking<=(num+1)/2) and level >=6
)
    
select uid,dtime as month,count(uid),count(score) as complete_cnt from (
    select uid,
    DATE_FORMAT(start_time,"%Y%m") as dtime,
    DENSE_RANK()over(partition by uid order by DATE_FORMAT(start_time,"%y%m") desc) as ranking,
    score from exam_record)a
inner join t using(uid)
where ranking <=3
group by a.uid,month
order by a.uid,month

Mysql 解法, 执行用时: 37ms, 内存消耗: 6408KB, 提交时间: 2021-12-27

with t as
(
    select uid
    from
    (select *,row_number()over(order by incomplete_rate desc) as rn,count(1)over() as num
    from
    (select uid,count(1)-count(submit_time) as incomplete_cnt,count(1) as total_cnt,(count(1)-count(submit_time))/count(1) as incomplete_rate
    from exam_record
    where exam_id in (select exam_id from examination_info where tag = 'SQL')
    group by uid) t1) t2
    where rn<=(num+1)/2 and uid in (select uid from user_info where level in ('6','7') )
)
select uid,dtime as start_month,count(1) as total_cnt,count(score) as complete_cnt
from
(select uid,DATE_FORMAT(start_time,"%Y%m") as dtime,DENSE_RANK()over(partition by uid order by DATE_FORMAT(start_time,"%y%m") desc) as rn,score
from exam_record
where uid in (select uid from t)) t1
where rn<=3
group by uid,start_month
order by uid,start_month

Mysql 解法, 执行用时: 37ms, 内存消耗: 6420KB, 提交时间: 2022-01-22

select uid,
       date_format(start_time, '%Y%m') as start_month,
       count(1) as total_cnt,
       count(submit_time) as complete_cnt
from
(select *, dense_rank() over (partition by uid order by date_format(start_time, '%Y%m') desc) as rn
from exam_record
) tb
join
(select uid
from
(select uid,
       percent_rank() over (order by incomp_rate desc) as rnk
from
(select uid, 1-count(submit_time) / count(1) as incomp_rate
from exam_record
join examination_info using (exam_id)
where tag = 'SQL'
group by uid) t ) t1
join user_info using (uid)
where rnk <= 0.5 and level >= 6 ) t2
 using (uid)
 where rn <=3
 group by uid, date_format(start_time, '%Y%m')
 order by uid, start_month

Mysql 解法, 执行用时: 37ms, 内存消耗: 6420KB, 提交时间: 2021-12-12

SELECT uid, DATE_FORMAT(start_time,'%Y%m') start_month, COUNT(start_time), COUNT(submit_time) 
FROM (SELECT uid, start_time, submit_time,
      dense_rank() over (PARTITION BY uid ORDER BY DATE_FORMAT(start_time,'%Y%m') DESC) rk
      FROM exam_record
      WHERE uid IN (SELECT uid FROM
                    (SELECT uid, row_number() over (ORDER BY count(submit_time)/count(start_time)) rnrk
                      FROM exam_record
                      WHERE exam_id IN (SELECT exam_id FROM examination_info WHERE tag = 'SQL')
                      GROUP BY uid) t2
                    WHERE t2.rnrk<=ROUND((SELECT COUNT(DISTINCT uid) FROM exam_record)/2,0)
                   )
      AND uid IN (SELECT uid FROM user_info WHERE level=7 OR level =6)
     ) t3
WHERE rk<=3
GROUP BY uid, start_month
ORDER BY uid, start_month