DA47. 筛选某店铺最有价值用户中消费最多前5名
描述
现有某店铺会员消费情况sales.csv。包含以下字段:
请你统计最有价值的用户中消费金额最多的前5名用户。
请你先对每个用户销售情况的每个特征进行评分,分值为1-4分。再将所有评分拼接到一起形成新的列RFMClass。
评分规则如下: 对于recency特征,值越小越好。对于frequency和monetary值越大越好。如对于recency:
对于frequency和monetary则方法刚好相反。
请你输出评分后的数据的前5行并输出最有价值的用户(评分为“444”)中销售总金额最高的前5位(索引从0开始),以上数据集的输出如下图所示(两次输出之间有一个空行)。
Python 3 解法, 执行用时: 798ms, 内存消耗: 524288KB, 提交时间: 2022-07-18
import pandas as pd sales = pd.read_csv('sales.csv') sales["monetary"] = sales["monetary"].astype("float") sales["R_Quartile"] = pd.qcut(sales["recency"], [0, 0.25, 0.5, 0.75, 1], labels=["4", "3", "2", "1"]).astype("str") sales["F_Quartile"] = pd.qcut(sales["frequency"], [0, 0.25, 0.5, 0.75, 1], labels=["1", "2", "3", "4"]).astype("str") sales["M_Quartile"] = pd.qcut(sales["monetary"], [0, 0.25, 0.5, 0.75, 1], labels=["1", "2", "3", "4"]).astype("str") sales['RFMClass']=sales['R_Quartile']+sales['F_Quartile']+sales['M_Quartile'] sales1=sales[['user_id','recency','frequency','monetary','RFMClass']] data1=sales1.head(5) print(data1) data2=sales1[sales1['RFMClass']=='444'].sort_values(by='monetary',ascending=False).head(5).reset_index(drop=True) print('\n') print(data2)
Python 3 解法, 执行用时: 803ms, 内存消耗: 524288KB, 提交时间: 2022-08-06
import pandas as pd sales = pd.read_csv('sales.csv') # # 按照结果要求转换类型 # sales[['monetary']] = sales[['monetary']].astype('float32') # # 求百分位 # des = sales[['recency', 'frequency', 'monetary']].describe().loc['25%':'75%'] # # 计算RFM # R = sales['recency'].apply(lambda x: 4 if x <= des.iloc[0,0] else (3 if x <= des.iloc[1,0] else (2 if x <= des.iloc[2,0] else 1))).astype('str') # F = sales['frequency'].apply(lambda x: 1 if x <= des.iloc[0,1] else (2 if x <= des.iloc[1,1] else (3 if x <= des.iloc[2,1] else 4))).astype('str') # M = sales['monetary'].apply(lambda x: 1 if x <= des.iloc[0,2] else (2 if x <= des.iloc[1,2] else (3 if x <= des.iloc[2,2] else 4))).astype('str') # # 合并RFM # sales['RFMClass'] = R+F+M # # # print(sales.head()) # # 筛选444用户 # sales1 = sales[sales['RFMClass'] == '444'].sort_values(by='monetary', ascending=False).reset_index(drop=True) # # # print(sales1.head()) sales['monetary']=sales['monetary'].astype('float') R=pd.qcut(sales['recency'],[0,0.25,0.5,0.75,1],['4','3','2','1']).astype('str') F=pd.qcut(sales['frequency'],[0,0.25,0.5,0.75,1],['1','2','3','4']).astype('str') M=pd.qcut(sales['monetary'],[0,0.25,0.5,0.75,1],['1','2','3','4']).astype('str') sales['RFMClass']=R+F+M print(sales.head(5)) print('\n') sales1=sales[sales['RFMClass']=='444'].sort_values('monetary',ascending=False).reset_index(drop=True) print(sales1.head(5))
Python 3 解法, 执行用时: 806ms, 内存消耗: 524288KB, 提交时间: 2022-07-03
import pandas as pd sales = pd.read_csv('sales.csv') sales_quantile=sales.quantile([0.25,0.5,0.75]) #获取分位数 dict_list=sales_quantile.to_dict('records') # 转化为内容为字典的列表 def deal(x,name): if name == 'recency': if x<=dict_list[0][name]: return '4' elif x>dict_list[0][name] and x<=dict_list[1][name]: return '3' elif x>dict_list[1][name] and x<=dict_list[2][name]: return '2' else: return '1' else: if x<=dict_list[0][name]: return '1' elif x>dict_list[0][name] and x<=dict_list[1][name]: return '2' elif x>dict_list[1][name] and x<=dict_list[2][name]: return '3' else: return '4' sales['RFMClass'] = sales['recency'].apply(lambda x: deal(x,'recency'))+sales['frequency'].apply(lambda x: deal(x,'frequency'))+sales['monetary'].apply(lambda x: deal(x,'monetary')) #字符串的+可以拼接 print(sales.head(5),'\n') print(sales[sales['RFMClass']=='444'].sort_values(by='monetary',ascending='False').reset_index(drop-True).head(5))
Python 3 解法, 执行用时: 810ms, 内存消耗: 524288KB, 提交时间: 2022-06-28
import pandas as pd sales = pd.read_csv('sales.csv') def deal(x,name): if name == 'recency': if x <= dict_list[0][name]: return '4' elif x > dict_list[0][name] and x <= dict_list[1][name]: return '3' elif x > dict_list[1][name] and x <= dict_list[2][name]: return '2' else: return '1' else: if x <= dict_list[0][name]: return '1' elif x > dict_list[0][name] and x <= dict_list[1][name]: return '2' elif x > dict_list[1][name] and x <= dict_list[2][name]: return '3' else: return '4' sales_quantile = sales.quantile([.25,.50,.75]) dict_list = sales_quantile.to_dict('records') sales['RFMClass'] = sales['recency'].apply(lambda x: deal(x,'recency'))+sales['frequency'].apply(lambda x: deal(x,'frequency'))+sales['monetary'].apply(lambda x: deal(x,'monetary')) print(sales.head(5),'\n') print(sales[sales['RFMClass'] == '444'].sort_values(by= 'monetary',ascending = False).reset_index(drop = True).head(5))
Python 3 解法, 执行用时: 811ms, 内存消耗: 524288KB, 提交时间: 2022-07-23
import pandas as pd sales = pd.read_csv('sales.csv') sales["monetary"] = sales["monetary"].astype("float") sales["R_Quartile"] = pd.qcut(sales["recency"], [0, 0.25, 0.5, 0.75, 1], labels=["4", "3", "2", "1"]).astype("str") sales["F_Quartile"] = pd.qcut(sales["frequency"], [0, 0.25, 0.5, 0.75, 1], labels=["1", "2", "3", "4"]).astype("str") sales["M_Quartile"] = pd.qcut(sales["monetary"], [0, 0.25, 0.5, 0.75, 1], labels=["1", "2", "3", "4"]).astype("str") sales['RFMClass']=sales['R_Quartile']+sales['F_Quartile']+sales['M_Quartile'] sales1 = sales[['user_id', 'recency', 'frequency', 'monetary', 'RFMClass']] print(sales1.head(5)) print('\n') df = sales1[sales1['RFMClass'] == '444'].sort_values(by='monetary', ascending=False) print(df.head(5).reset_index(drop=True))