hotproduct_analyzer_service.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. """
  2. SKU爆款系数分析服务
  3. 根据《2025-11-23:SKU 爆款系数算法逻辑总结(V2.0)》实现
  4. """
  5. import pandas as pd
  6. import numpy as np
  7. from sklearn.preprocessing import MinMaxScaler
  8. from datetime import datetime
  9. def analyze_hotproduct(df, filename):
  10. """
  11. 分析SKU爆款系数
  12. Args:
  13. df: 清洗后的DataFrame,需包含test2.csv相关字段
  14. filename: 文件名(用于日志)
  15. Returns:
  16. dict: 爆款分析结果
  17. """
  18. print(f"🔥 开始爆款系数分析: {filename}")
  19. # 1. 数据准备与字段映射
  20. df_work = df.copy()
  21. # 字段映射(适配系统现有字段)
  22. required_fields = {
  23. '订单付款时间': '下单时间',
  24. 'SKU': '商品ID',
  25. '购买数量': '数量',
  26. '价格': '价格',
  27. '买家实际支付金额': '商家实收',
  28. '订单状态': '订单状态'
  29. }
  30. # 重命名字段以匹配算法文档
  31. for sys_field, doc_field in required_fields.items():
  32. if sys_field in df_work.columns:
  33. df_work[doc_field] = df_work[sys_field]
  34. # 添加小时字段
  35. df_work['小时'] = pd.to_datetime(df_work['下单时间']).dt.hour
  36. # 添加用户ID(如果没有则用订单号模拟)
  37. if '买家会员名' in df_work.columns:
  38. df_work['用户ID'] = df_work['买家会员名']
  39. elif '订单编号' in df_work.columns:
  40. df_work['用户ID'] = df_work['订单编号']
  41. else:
  42. # 生成模拟用户ID
  43. df_work['用户ID'] = range(len(df_work))
  44. # 2. 筛选交易成功订单
  45. valid = df_work[df_work['订单状态'] == '交易成功'].copy()
  46. print(f"✅ 有效订单数: {len(valid)}")
  47. if len(valid) == 0:
  48. return {
  49. 'success': False,
  50. 'message': '没有交易成功的订单',
  51. 'data': {}
  52. }
  53. # 3. 聚合到SKU级别
  54. sku_agg = valid.groupby('商品ID').agg(
  55. 总销量=('数量', 'sum'),
  56. 总实收=('商家实收', 'sum'),
  57. 标价=('价格', 'mean'),
  58. 首单=('下单时间', 'min'),
  59. 末单=('下单时间', 'max'),
  60. 总UID=('用户ID', 'nunique')
  61. ).reset_index()
  62. # 计算天数跨度
  63. sku_agg['天数跨度'] = (sku_agg['末单'] - sku_agg['首单']).dt.days + 1
  64. # 避免除零
  65. sku_agg['天数跨度'] = sku_agg['天数跨度'].apply(lambda x: max(x, 1))
  66. # 计算单位时间销量
  67. sku_agg['单位时间销量'] = sku_agg['总销量'] / sku_agg['天数跨度']
  68. # 计算实收率(价格接受度)
  69. sku_agg['实收率'] = sku_agg['总实收'] / (sku_agg['标价'] * sku_agg['总销量'])
  70. # 限制实收率范围在0-1之间(可能存在折扣或优惠券导致>1的情况)
  71. sku_agg['实收率'] = sku_agg['实收率'].clip(0, 1)
  72. # 4. 计算退款率
  73. if '退款状态' in df_work.columns:
  74. refund_count = df_work[df_work['退款状态'] == '退款成功'].groupby('商品ID').size()
  75. total_count = df_work.groupby('商品ID').size()
  76. refund_rate = refund_count / total_count
  77. else:
  78. # 如果没有退款状态字段,假设退款率为0
  79. refund_rate = pd.Series(0, index=sku_agg['商品ID'])
  80. sku_agg['退款率'] = sku_agg['商品ID'].map(refund_rate).fillna(0)
  81. sku_agg['稳定性得分'] = 1 - sku_agg['退款率']
  82. # 5. 计算复购率
  83. uid_buy = valid.groupby(['商品ID', '用户ID']).size().reset_index(name='购买次数')
  84. rep_uid = uid_buy[uid_buy['购买次数'] > 1].groupby('商品ID').size()
  85. sku_agg['复购UID数'] = sku_agg['商品ID'].map(rep_uid).fillna(0)
  86. sku_agg['复购率'] = (sku_agg['复购UID数'] / sku_agg['总UID']).fillna(0)
  87. # 6. 计算夜间占比(0-6点)
  88. night_orders = valid[valid['小时'].between(0, 6)]
  89. night_count = night_orders.groupby('商品ID').size()
  90. total_orders = valid.groupby('商品ID').size()
  91. night_ratio = night_count / total_orders
  92. sku_agg['夜间占比'] = sku_agg['商品ID'].map(night_ratio).fillna(0)
  93. # 7. 归一化处理(MinMaxScaler)
  94. metrics_cols = ['单位时间销量', '实收率', '稳定性得分', '复购率', '夜间占比']
  95. scaler = MinMaxScaler()
  96. sku_agg[metrics_cols] = scaler.fit_transform(sku_agg[metrics_cols])
  97. # 8. 计算爆款系数(按权重加权)
  98. weights = {
  99. '单位时间销量': 0.4, # 销售热度 40%
  100. '实收率': 0.3, # 价格接受度 30%
  101. '稳定性得分': 0.1, # 退款稳定性 10%
  102. '复购率': 0.1, # 复购热度 10%
  103. '夜间占比': 0.1 # 夜间爆发力 10%
  104. }
  105. sku_agg['爆款系数'] = (
  106. weights['单位时间销量'] * sku_agg['单位时间销量'] +
  107. weights['实收率'] * sku_agg['实收率'] +
  108. weights['稳定性得分'] * sku_agg['稳定性得分'] +
  109. weights['复购率'] * sku_agg['复购率'] +
  110. weights['夜间占比'] * sku_agg['夜间占比']
  111. )
  112. # 9. 分级标记
  113. def classify_level(score):
  114. if score >= 0.80:
  115. return '超级爆款'
  116. elif score >= 0.60:
  117. return '潜力爆款'
  118. elif score >= 0.40:
  119. return '常规款'
  120. else:
  121. return '清货款'
  122. sku_agg['爆款等级'] = sku_agg['爆款系数'].apply(classify_level)
  123. # 10. 获取商品标题
  124. title_map = valid.groupby('商品ID')['商品标题'].first() if '商品标题' in valid.columns else {}
  125. sku_agg['商品标题'] = sku_agg['商品ID'].map(title_map).fillna('未知商品')
  126. # 11. 格式化结果
  127. results = {}
  128. for _, row in sku_agg.iterrows():
  129. sku_id = row['商品ID']
  130. results[sku_id] = {
  131. 'sku_id': sku_id,
  132. 'product_title': row['商品标题'],
  133. 'hotproduct_score': round(float(row['爆款系数']), 4),
  134. 'hotproduct_level': row['爆款等级'],
  135. 'metrics': {
  136. 'sales_heat': round(float(row['单位时间销量']), 4), # 销售热度
  137. 'price_acceptance': round(float(row['实收率']), 4), # 价格接受度
  138. 'refund_stability': round(float(row['稳定性得分']), 4), # 退款稳定性
  139. 'repurchase_rate': round(float(row['复购率']), 4), # 复购热度
  140. 'night_burst': round(float(row['夜间占比']), 4) # 夜间爆发力
  141. },
  142. 'raw_data': {
  143. 'total_quantity': int(row['总销量']),
  144. 'total_revenue': round(float(row['总实收']), 2),
  145. 'avg_price': round(float(row['标价']), 2),
  146. 'days_span': int(row['天数跨度']),
  147. 'daily_sales': round(float(row['总销量'] / row['天数跨度']), 2),
  148. 'unique_buyers': int(row['总UID']),
  149. 'repurchase_buyers': int(row['复购UID数']),
  150. 'refund_rate': round(float(row['退款率']), 4)
  151. }
  152. }
  153. # 12. 统计概览
  154. level_dist = sku_agg['爆款等级'].value_counts().to_dict()
  155. summary = {
  156. 'total_sku_count': len(sku_agg),
  157. 'level_distribution': level_dist,
  158. 'avg_score': round(float(sku_agg['爆款系数'].mean()), 4),
  159. 'max_score': round(float(sku_agg['爆款系数'].max()), 4),
  160. 'min_score': round(float(sku_agg['爆款系数'].min()), 4),
  161. 'top_5_skus': sku_agg.nlargest(5, '爆款系数')['商品ID'].tolist()
  162. }
  163. print(f"✅ 爆款分析完成: {len(results)} 个SKU")
  164. print(f"📊 等级分布: {level_dist}")
  165. return {
  166. 'success': True,
  167. 'message': '爆款系数分析完成',
  168. 'data': {
  169. 'sku_results': results,
  170. 'summary': summary,
  171. 'weights': weights
  172. }
  173. }