# Write your MySQL query statement below
550. 游戏玩法分析 IV
Table: Activity
+--------------+---------+ | Column Name | Type | +--------------+---------+ | player_id | int | | device_id | int | | event_date | date | | games_played | int | +--------------+---------+ (player_id,event_date)是此表的主键。 这张表显示了某些游戏的玩家的活动情况。 每一行是一个玩家的记录,他在某一天使用某个设备注销之前登录并玩了很多游戏(可能是 0)。
编写一个 SQL 查询,报告在首次登录的第二天再次登录的玩家的比率,四舍五入到小数点后两位。换句话说,您需要计算从首次登录日期开始至少连续两天登录的玩家的数量,然后除以玩家总数。
查询结果格式如下所示:
Activity table: +-----------+-----------+------------+--------------+ | player_id | device_id | event_date | games_played | +-----------+-----------+------------+--------------+ | 1 | 2 | 2016-03-01 | 5 | | 1 | 2 | 2016-03-02 | 6 | | 2 | 3 | 2017-06-25 | 1 | | 3 | 1 | 2016-03-02 | 0 | | 3 | 4 | 2018-07-03 | 5 | +-----------+-----------+------------+--------------+ Result table: +-----------+ | fraction | +-----------+ | 0.33 | +-----------+ 只有 ID 为 1 的玩家在第一天登录后才重新登录,所以答案是 1/3 = 0.33
原站题解
pythondata 解法, 执行用时: 781 ms, 内存消耗: 68.5 MB, 提交时间: 2024-05-27 11:26:34
# 使用merge import pandas as pd def gameplay_analysis2(activity: pd.DataFrame) -> pd.DataFrame: # 根据玩家分组,找到每位玩家的第一次登录日期 first_login = activity.groupby('player_id')['event_date'].min() # 计算每位玩家的第二次登录日期(第一次登录日期 + 1天) second_login = first_login + pd.DateOffset(days=1) # 将第二次登录日期与原数据表合并 merged_table = pd.merge(activity, second_login, on='player_id', how='left') # 计算满足条件的行数:第二次登录日期等于实际登录日期的行数 matching_rows_count = len(merged_table.query('event_date_x == event_date_y')) # 计算满足条件的玩家比例:满足条件的玩家数量 / 所有玩家数量 fraction = round(matching_rows_count / activity['player_id'].nunique(), 2) # 创建结果 DataFrame 并返回 result_df = pd.DataFrame({'fraction': [fraction]}) return result_df # 使用transform def gameplay_analysis(activity: pd.DataFrame) -> pd.DataFrame: # 根据玩家分组,计算每位玩家的第二次登录日期(第一次登录日期 + 1天) second_login = activity.groupby('player_id')['event_date'].transform(lambda x: x.min() + pd.DateOffset(days=1)) # 计算满足条件的行数:第二次登录日期等于实际登录日期的行数 matching_rows_count = (second_login == activity['event_date']).sum() # 计算满足条件的玩家比例:满足条件的玩家数量 / 所有玩家数量 fraction = round(matching_rows_count / activity['player_id'].nunique(), 2) # 创建结果 DataFrame 并返回 result_df = pd.DataFrame({'fraction': [fraction]}) return result_df
mysql 解法, 执行用时: 344 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:14:43
# Write your MySQL query statement below select ifnull( round( count(distinct player_id)/ (select count(distinct player_id) from activity) ,2) ,0) as fraction from activity where (player_id, event_date) in ( select player_id, date(min(event_date) + 1) from activity group by player_id )
mysql 解法, 执行用时: 355 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:14:29
# Write your MySQL query statement below # Write your MySQL query statement below select round(avg(a.event_date is not null), 2) fraction from (select player_id, min(event_date) as login from activity group by player_id) p left join activity a on p.player_id=a.player_id and datediff(a.event_date, p.login)=1
mysql 解法, 执行用时: 370 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:14:16
# Write your MySQL query statement below select round(avg(event_date is not null), 2) as fraction from ( select player_id, min(event_date) as first_login from activity group by player_id ) temp left join activity on temp.player_id = activity.player_id and datediff(event_date, first_login) = 1;
mysql 解法, 执行用时: 353 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:13:57
# Write your MySQL query statement below with temp as ( select player_id, datediff(event_date, min(event_date) over(partition by player_id)) as diff from activity ) select round( sum(case diff when 1 then 1 else 0 end) / count(distinct player_id), 2) as fraction from temp;
mysql 解法, 执行用时: 363 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:13:46
# Write your MySQL query statement below select round(( (select count(player_id) from ( select player_id, datediff(event_date, min(event_date) over(partition by player_id)) as diff from activity ) as temp where diff = 1) / (select count(distinct player_id) from activity) ), 2) as fraction;
mysql 解法, 执行用时: 516 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:13:11
# Write your MySQL query statement below select round((select count(player_id) from ( select player_id, event_date, lag(event_date) over(partition by player_id order by event_date) as next_date, rank() over(partition by player_id order by event_date) as login_times from activity ) as temp where datediff(event_date, next_date) = 1 and login_times = 2) / (select count(player_id) from ( select player_id from activity group by player_id ) as temp), 2) as fraction;