列表

详情


SQL142. 对试卷得分做min-max归一化

描述

现有试卷信息表examination_info(exam_id试卷ID, tag试卷类别, difficulty试卷难度, duration考试时长, release_time发布时间):

id exam_id tag difficulty duration
release_time
1 9001 SQL hard 60 2020-01-01 10:00:00
2 9002 C++ hard
80
2020-01-01 10:00:00
3 9003
算法
hard
80
2020-01-01 10:00:00
4 9004
PYTHON medium
70 2020-01-01 10:00:00

试卷作答记录表exam_record(uid用户ID, exam_id试卷ID, start_time开始作答时间, submit_time交卷时间, score得分)
id uid exam_id start_time submit_time score
6
1003
9001
2020-01-02 12:01:01
2020-01-02 12:31:01 68
9
1001
9001
2020-01-02 10:01:01 2020-01-02 10:31:01
89
1
1001
9001
2020-01-01 09:01:01 2020-01-01 09:21:59
90
12
1002
9002
2021-05-05 18:01:01
(NULL)
(NULL)
3
1004
9002
2020-01-01 12:01:01
2020-01-01 12:11:01
60
2
1003
9002
2020-01-01 19:01:01
2020-01-01 19:30:01 75
7
1001
9002
2020-01-02 12:01:01
2020-01-02 12:43:01 81
10
1002 9002
2020-01-01 12:11:01 2020-01-01 12:31:01 83
4
1003
9002
2020-01-01 12:01:01 2020-01-01 12:41:01
90
5
1002
9002
2020-01-02 19:01:01
2020-01-02 19:32:00
90
11
1002
9004
2021-09-06 12:01:01
(NULL)
(NULL)
8
1001
9005 2020-01-02 12:11:01
(NULL)
(NULL)

在物理学及统计学数据计算时,有个概念叫min-max标准化,也被称为离差标准化,是对原始数据的线性变换,使结果值映射到[0 - 1]之间。

转换函数为:

请你将用户作答高难度试卷的得分在每份试卷作答记录内执行min-max归一化后缩放到[0,100]区间,并输出用户ID、试卷ID、归一化后分数平均值;最后按照试卷ID升序、归一化分数降序输出。(注:得分区间默认为[0,100],如果某个试卷作答记录中只有一个得分,那么无需使用公式,归一化并缩放后分数仍为原分数)。
由示例数据结果输出如下:
uid exam_id avg_new_score
1001 9001 98
1003
9001
0
1002
9002
88
1003 9002
75
1001
9002
70
1004
9002
0
解释:高难度试卷有9001、9002、9003;
作答了9001的记录有3条,分数分别为68、89、90,按给定公式归一化后分数为:0、95、100,而后两个得分都是用户1001作答的,因此用户1001对试卷9001的新得分为(95+100)/2≈98(只保留整数部分),用户1003对于试卷9001的新得分为0。最后结果按照试卷ID升序、归一化分数降序输出。

示例1

输入:

drop table if exists examination_info,exam_record;
CREATE TABLE examination_info (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    exam_id int UNIQUE NOT NULL COMMENT '试卷ID',
    tag varchar(32) COMMENT '类别标签',
    difficulty varchar(8) COMMENT '难度',
    duration int NOT NULL COMMENT '时长',
    release_time datetime COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;

CREATE TABLE exam_record (
    id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
    uid int NOT NULL COMMENT '用户ID',
    exam_id int NOT NULL COMMENT '试卷ID',
    start_time datetime NOT NULL COMMENT '开始时间',
    submit_time datetime COMMENT '提交时间',
    score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;

INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
  (9001, 'SQL', 'hard', 60, '2020-01-01 10:00:00'),
  (9002, 'C++', 'hard', 80, '2020-01-01 10:00:00'),
  (9003, '算法', 'hard', 80, '2020-01-01 10:00:00'),
  (9004, 'PYTHON', 'medium', 70, '2020-01-01 10:00:00'),
  (9005, 'WEB', 'hard', 80, '2020-01-01 10:00:00'),
  (9006, 'PYTHON', 'hard', 80, '2020-01-01 10:00:00'),
  (9007, 'web', 'hard', 80, '2020-01-01 10:00:00'),
  (9008, 'Web', 'medium', 70, '2020-01-01 10:00:00'),
  (9009, 'WEB', 'medium', 70, '2020-01-01 10:00:00'),
  (9010, 'SQL', 'medium', 70, '2020-01-01 10:00:00');
	
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-01 09:01:01', '2020-01-01 09:21:59', 90),
(1003, 9002, '2020-01-01 19:01:01', '2020-01-01 19:30:01', 75),
(1004, 9002, '2020-01-01 12:01:01', '2020-01-01 12:11:01', 60),
(1003, 9002, '2020-01-01 12:01:01', '2020-01-01 12:41:01', 90),
(1002, 9002, '2020-01-02 19:01:01', '2020-01-02 19:32:00', 90),
(1003, 9001, '2020-01-02 12:01:01', '2020-01-02 12:31:01', 68),
(1001, 9002, '2020-01-02 12:01:01', '2020-01-02 12:43:01', 81),
(1001, 9005, '2020-01-02 12:11:01', null, null),
(1001, 9001, '2020-01-02 10:01:01', '2020-01-02 10:31:01', 89),
(1002, 9002, '2020-01-01 12:11:01', '2020-01-01 12:31:01', 83),
(1002, 9004, '2021-09-06 12:01:01', null, null),
(1002, 9002, '2021-05-05 18:01:01', null, null);

输出:

1001|9001|98
1003|9001|0
1002|9002|88
1003|9002|75
1001|9002|70
1004|9002|0

原站题解

上次编辑到这里,代码来自缓存 点击恢复默认模板

Mysql 解法, 执行用时: 36ms, 内存消耗: 6376KB, 提交时间: 2021-11-30

select
    uid,
    exam_id,
    round(avg(newscore),0) avg_new_score
from
    (SELECT
         uid,
         exam_id,
         if(maxs != mins,(score - mins) * 100 / (maxs - mins),maxs) newscore
     FROM
         (select
              uid,
              exam_record.exam_id,
              score,
              max(score) over(
                  partition by exam_record.exam_id
                  ) maxs,
              min(score) over(
                  partition by exam_record.exam_id
                  ) mins
          from
              exam_record
                  right JOIN
              examination_info
              ON
                      exam_record.exam_id = examination_info.exam_id
          WHERE
              score is not null
            and difficulty = 'hard'
         ) mai)new
GROUP BY
    uid,exam_id
order by
    exam_id,avg_new_score DESC
    
    

Mysql 解法, 执行用时: 36ms, 内存消耗: 6528KB, 提交时间: 2022-02-08

select uid,exam_id,round(avg((score-min_score)/(max_score-min_score)*100)) sc1
from(
SELECT ex.uid,ex.exam_id,score,
max(score) over(partition by ex.exam_id ) max_score,
min(score) over(partition by ex.exam_id ) min_score
from exam_record ex join examination_info e on ex.exam_id=e.exam_id
where difficulty='hard' and score is not null) a
where max_score!=min_score group by uid,exam_id
union 
select uid,exam_id,score sc1
from(
SELECT ex.uid,ex.exam_id,score,
max(score) over(partition by ex.exam_id ) max_score,
min(score) over(partition by ex.exam_id ) min_score
from exam_record ex join examination_info e on ex.exam_id=e.exam_id
where difficulty='hard' and score is not null) b
where max_score=min_score
order by exam_id,sc1 desc

Mysql 解法, 执行用时: 36ms, 内存消耗: 6528KB, 提交时间: 2021-11-30

with t as 
(
    select uid,exam_id,
    if(round((score-min(score) over(partition by exam_id))/(max(score) over(partition by exam_id)-min(score) over(partition by exam_id))*100,1) is not null,
       round((score-min(score) over(partition by exam_id))/(max(score) over(partition by exam_id)-min(score) over(partition by exam_id))*100,1),
       score
      ) as new_score
    from exam_record
    where exam_id in (select exam_id
                      from examination_info
                      where difficulty='hard')
    and score is not null
)
select uid,exam_id,round(avg(new_score),0) as avg_new_score
from t
group by uid,exam_id
order by exam_id asc,avg_new_score desc;

Mysql 解法, 执行用时: 37ms, 内存消耗: 6380KB, 提交时间: 2022-01-25

select 
    uid,
    exam_id,
    round(avg(new_score),0) avg_new_score
from
    (select 
        *,
        (case when min_score<>max_score then (score-min_score)/(max_score-min_score)*100 else score end) new_score
    from
        (select 
            a.*,
            max(score)over(partition by a.exam_id) max_score,
            min(score)over(partition by a.exam_id) min_score
        from
            exam_record a 
        left join 
            examination_info b 
        on 
            a.exam_id = b.exam_id 
        where 
            difficulty = 'hard'
        and 
            submit_time is not null)a)b
group by 
    uid,
    exam_id
order by 
    exam_id,
    avg_new_score desc

Mysql 解法, 执行用时: 37ms, 内存消耗: 6384KB, 提交时间: 2022-02-23

SELECT
uid,
exam_id,
round(avg(max_min),0) avg_new_score
FROM
(
select
    uid,
    exam_id,
    if(min_score = max_score,score,(score-min_score)/(max_score-min_score)*100) max_min
FROM
    (
    SELECT
        uid,
        exam_id,
        score,
        min(score) over(partition by exam_id) min_score,
        max(score) over(partition by exam_id) max_score
            #     max(score)-min(score) max_min_difference
        from exam_record inner join examination_info using(exam_id)
        where difficulty = 'hard' and score is not null
    ) t1
) t2
group by uid,exam_id
order by exam_id,avg_new_score desc