# Write your MySQL query statement below
1341. 电影评分
表:Movies
+---------------+---------+ | Column Name | Type | +---------------+---------+ | movie_id | int | | title | varchar | +---------------+---------+ movie_id 是这个表的主键。 title 是电影的名字。
表:Users
+---------------+---------+ | Column Name | Type | +---------------+---------+ | user_id | int | | name | varchar | +---------------+---------+ user_id 是表的主键。
表:MovieRating
+---------------+---------+ | Column Name | Type | +---------------+---------+ | movie_id | int | | user_id | int | | rating | int | | created_at | date | +---------------+---------+ (movie_id, user_id) 是这个表的主键。 这个表包含用户在其评论中对电影的评分 rating 。 created_at 是用户的点评日期。
请你编写一组 SQL 查询:
February 2020
平均评分最高 的电影名称。如果出现平局,返回字典序较小的电影名称。字典序 ,即按字母在字典中出现顺序对字符串排序,字典序较小则意味着排序靠前。
查询结果格式如下例所示。
示例:
输入: Movies 表: +-------------+--------------+ | movie_id | title | +-------------+--------------+ | 1 | Avengers | | 2 | Frozen 2 | | 3 | Joker | +-------------+--------------+ Users 表: +-------------+--------------+ | user_id | name | +-------------+--------------+ | 1 | Daniel | | 2 | Monica | | 3 | Maria | | 4 | James | +-------------+--------------+ MovieRating 表: +-------------+--------------+--------------+-------------+ | movie_id | user_id | rating | created_at | +-------------+--------------+--------------+-------------+ | 1 | 1 | 3 | 2020-01-12 | | 1 | 2 | 4 | 2020-02-11 | | 1 | 3 | 2 | 2020-02-12 | | 1 | 4 | 1 | 2020-01-01 | | 2 | 1 | 5 | 2020-02-17 | | 2 | 2 | 2 | 2020-02-01 | | 2 | 3 | 2 | 2020-03-01 | | 3 | 1 | 3 | 2020-02-22 | | 3 | 2 | 4 | 2020-02-25 | +-------------+--------------+--------------+-------------+ 输出: Result 表: +--------------+ | results | +--------------+ | Daniel | | Frozen 2 | +--------------+ 解释: Daniel 和 Monica 都点评了 3 部电影("Avengers", "Frozen 2" 和 "Joker") 但是 Daniel 字典序比较小。 Frozen 2 和 Joker 在 2 月的评分都是 3.5,但是 Frozen 2 的字典序比较小。
原站题解
pythondata 解法, 执行用时: 572 ms, 内存消耗: 67.7 MB, 提交时间: 2024-05-27 11:20:51
import pandas as pd def movie_rating(movies: pd.DataFrame, users: pd.DataFrame, movie_rating: pd.DataFrame) -> pd.DataFrame: # 存储结果 ans = [] # 1.查找评论电影数量最多的用户名。 comments = movie_rating['user_id'].value_counts().reset_index() # 连接表后按评论数降序,名称升序输出 user_comments = comments.merge(users, how='left', on='user_id').sort_values(by=['count', 'name'], ascending=[False, True]) ans.append(user_comments.iloc[0, 2]) # 2.查找在 February 2020 平均评分最高的电影名称。 mean_rating = movie_rating[movie_rating['created_at'].dt.strftime('%Y-%m')=='2020-02'].groupby('movie_id')['rating'].mean().reset_index() # 连接表后按评分降序,名称升序输出 rating_movies = mean_rating.merge(movies, how='left', on='movie_id').sort_values(by=['rating', 'title'], ascending=[False, True]) ans.append(rating_movies.iloc[0, 2]) # 返回DataFrame对象 return pd.DataFrame(ans, columns=['results'], dtype=object) def movie_rating2(movies: pd.DataFrame, users: pd.DataFrame, movie_rating: pd.DataFrame) -> pd.DataFrame: #先将3个表连接,提供聚合与输出维度 mtb = movie_rating.merge(users, on='user_id', how='inner').merge(movies, on='movie_id', how='inner') #求评论最多的用户 user_res = mtb.groupby(by=['user_id', 'name'])[['user_id', 'name']].value_counts().reset_index(name='cnt').sort_values(by=['cnt', 'name'], ascending=[False, True]).iloc[0]['name'] #求评分最高的电影 movie_res = mtb[mtb['created_at'].dt.strftime('%Y-%m')=='2020-02'].groupby(['movie_id', 'title'])['rating'].mean().reset_index(name='avg').sort_values(by=['avg', 'title'], ascending=[False, True]).iloc[0]['title'] #组合输出 return pd.DataFrame({'results':[user_res, movie_res]})
mysql 解法, 执行用时: 634 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:03:50
# Write your MySQL query statement below (select dd1.`name` as results from (select `name`, count(*) as n from users natural join MovieRating group by user_id) as dd1 order by dd1.n desc, dd1.`name` limit 1) union (select dd2.title as results from (select title, avg(rating) as n from MovieRating natural join movies where year(created_at)='2020' and month(created_at)='02' group by movie_id) as dd2 order by dd2.n desc, dd2.title limit 1);
mysql 解法, 执行用时: 706 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:01:41
SELECT T.results FROM ( SELECT U.name AS results,MR.user_id FROM Users U,MovieRating MR WHERE U.user_id = MR.user_id GROUP BY MR.user_id,U.name ORDER BY COUNT(1) DESC,U.NAME ASC limit 1 ) T UNION ALL SELECT Q.results FROM ( SELECT M.title AS results FROM Movies M,MovieRating MM WHERE M.movie_id = MM.movie_id AND MM.created_at LIKE '2020-02%' GROUP BY M.title ORDER BY AVG(MM.rating) DESC,M.title ASC limit 1 ) Q
mysql 解法, 执行用时: 812 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:01:19
# Write your MySQL query statement below # 评论电影数量最多且字典序较小的用户名 ( select Users.name as results FROM MovieRating JOIN Users ON MovieRating.user_id = Users.user_id GROUP BY MovieRating.user_id ORDER BY count(MovieRating.user_id) desc, Users.name LIMIT 1 ) UNION ( # 2020年2月份平均评分最高且字典序较小的电影名 select Movies.title as results FROM MovieRating JOIN Movies ON MovieRating.movie_id = Movies.movie_id WHERE MovieRating.created_at >= '2020-02-01' AND MovieRating.created_at < '2020-03-01' GROUP BY MovieRating.movie_id ORDER BY avg(MovieRating.rating) desc, Movies.title LIMIT 1 )
mysql 解法, 执行用时: 669 ms, 内存消耗: 0 B, 提交时间: 2023-04-02 12:01:04
# Write your MySQL query statement below # 每个用户的评论数 with User_Rating as ( select user_id, count(rating) rating from MovieRating group by user_id ), # 2020-02,每个电影的平均评分 Avg_Rating as ( select movie_id, avg(rating) rating from MovieRating where date_format(created_at, '%Y-%m') = '2020-02' group by movie_id ) select min(a.name) results from Users a join User_Rating b on a.user_id = b.user_id where b.rating = ( select max(rating) rating from User_Rating ) union all select min(a.title) results from Movies a join Avg_Rating b on a.movie_id = b.movie_id where b.rating = ( select max(rating) rating from Avg_Rating )