sale_effect_service.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 促销效果分析服务
  5. """
  6. import pandas as pd
  7. import numpy as np
  8. from datetime import datetime
  9. from collections import defaultdict
  10. import json
  11. def analyze_promotion_effect(df, filename):
  12. """
  13. 分析促销效果
  14. Args:
  15. df: 销售数据DataFrame
  16. filename: 文件名
  17. Returns:
  18. dict: 分析结果
  19. """
  20. try:
  21. print(f"开始分析促销效果: {filename}")
  22. print(f"数据形状: {df.shape}")
  23. # 1. 数据预处理
  24. df = preprocess_data(df)
  25. # 2. 计算总体指标
  26. summary = calculate_summary(df)
  27. # 3. 按促销类型分析
  28. promotion_types = analyze_promotion_types(df)
  29. # 4. 按时间分析促销效果
  30. time_analysis = analyze_time_effect(df)
  31. # 5. 按品类分析促销效果
  32. category_analysis = analyze_category_effect(df)
  33. # 6. 按SKU分析促销效果
  34. sku_analysis = analyze_sku_effect(df)
  35. # 7. 促销效果评估
  36. effect_evaluation = evaluate_promotion_effect(df)
  37. # 8. 构建结果
  38. results = {
  39. 'summary': summary,
  40. 'promotion_types': promotion_types,
  41. 'time_analysis': time_analysis,
  42. 'category_analysis': category_analysis,
  43. 'sku_analysis': sku_analysis,
  44. 'effect_evaluation': effect_evaluation
  45. }
  46. # 转换所有 numpy 类型为 Python 原生类型
  47. results = convert_numpy_types(results)
  48. print("促销效果分析完成")
  49. return results
  50. except Exception as e:
  51. print(f"分析促销效果时出错: {str(e)}")
  52. raise
  53. def preprocess_data(df):
  54. """
  55. 预处理数据
  56. """
  57. # 复制数据以避免修改原始数据
  58. df = df.copy()
  59. # 处理日期列
  60. if '订单创建时间' in df.columns:
  61. df['订单创建时间'] = pd.to_datetime(df['订单创建时间'], errors='coerce')
  62. # 提取日期部分
  63. df['日期'] = df['订单创建时间'].dt.date
  64. # 处理数值列
  65. numeric_columns = ['价格', '购买数量', '买家应付货款', '买家实际支付金额', '退款金额']
  66. for col in numeric_columns:
  67. if col in df.columns:
  68. df[col] = pd.to_numeric(df[col], errors='coerce')
  69. # 填充缺失值
  70. df = df.fillna({
  71. '价格': 0,
  72. '购买数量': 0,
  73. '买家应付货款': 0,
  74. '买家实际支付金额': 0,
  75. '退款金额': 0
  76. })
  77. # 确保SKU列存在
  78. if '商家编码' in df.columns:
  79. df['SKU'] = df['商家编码']
  80. elif '外部系统编号' in df.columns:
  81. df['SKU'] = df['外部系统编号']
  82. else:
  83. # 如果没有SKU列,使用商品名称作为SKU
  84. df['SKU'] = df['商品名称']
  85. # 确保品类列存在
  86. if '品类' not in df.columns:
  87. # 简单品类划分:基于商品名称
  88. df['品类'] = df['商品名称'].apply(lambda x: categorize_product(x))
  89. # 计算促销力度
  90. df['促销力度'] = 1 - (df['买家实际支付金额'] / df['买家应付货款'])
  91. df['促销力度'] = df['促销力度'].fillna(0)
  92. # 标记是否为促销商品
  93. df['是否促销'] = df['促销力度'] > 0
  94. return df
  95. def categorize_product(product_name):
  96. """
  97. 根据商品名称简单分类
  98. """
  99. product_name = str(product_name).lower()
  100. if '腰垫' in product_name or '靠垫' in product_name:
  101. return '家居用品'
  102. elif '手机' in product_name or '电脑' in product_name:
  103. return '电子产品'
  104. elif '服装' in product_name or '鞋' in product_name:
  105. return '服装鞋包'
  106. else:
  107. return '其他'
  108. def calculate_summary(df):
  109. """
  110. 计算总体指标
  111. """
  112. # 计算促销商品数量
  113. promotional_products = df[df['是否促销']]
  114. non_promotional_products = df[~df['是否促销']]
  115. # 促销商品指标
  116. promo_quantity = promotional_products['购买数量'].sum()
  117. promo_revenue = promotional_products['买家实际支付金额'].sum()
  118. promo_avg_price = promo_revenue / promo_quantity if promo_quantity > 0 else 0
  119. promo_avg_promotion = promotional_products['促销力度'].mean() * 100
  120. # 非促销商品指标
  121. non_promo_quantity = non_promotional_products['购买数量'].sum()
  122. non_promo_revenue = non_promotional_products['买家实际支付金额'].sum()
  123. non_promo_avg_price = non_promo_revenue / non_promo_quantity if non_promo_quantity > 0 else 0
  124. # 计算促销效果
  125. quantity_effect = (promo_quantity - non_promo_quantity) / non_promo_quantity * 100 if non_promo_quantity > 0 else 0
  126. revenue_effect = (promo_revenue - non_promo_revenue) / non_promo_revenue * 100 if non_promo_revenue > 0 else 0
  127. summary = {
  128. 'total_orders': len(df),
  129. 'promotional_orders': len(promotional_products),
  130. 'non_promotional_orders': len(non_promotional_products),
  131. 'promotional_ratio': len(promotional_products) / len(df) * 100 if len(df) > 0 else 0,
  132. 'promo_quantity': int(promo_quantity),
  133. 'non_promo_quantity': int(non_promo_quantity),
  134. 'promo_revenue': round(promo_revenue, 2),
  135. 'non_promo_revenue': round(non_promo_revenue, 2),
  136. 'promo_avg_price': round(promo_avg_price, 2),
  137. 'non_promo_avg_price': round(non_promo_avg_price, 2),
  138. 'avg_promotion': round(promo_avg_promotion, 2),
  139. 'quantity_effect': round(quantity_effect, 2),
  140. 'revenue_effect': round(revenue_effect, 2)
  141. }
  142. # 转换所有 numpy 类型为 Python 原生类型
  143. summary = convert_numpy_types(summary)
  144. return summary
  145. def analyze_promotion_types(df):
  146. """
  147. 按促销类型分析
  148. """
  149. # 基于促销力度划分促销类型
  150. def get_promotion_type(promotion):
  151. if promotion == 0:
  152. return '无促销'
  153. elif promotion < 0.1:
  154. return '小幅促销'
  155. elif promotion < 0.3:
  156. return '中幅促销'
  157. else:
  158. return '大幅促销'
  159. df['促销类型'] = df['促销力度'].apply(get_promotion_type)
  160. # 按促销类型分组
  161. grouped = df.groupby('促销类型')
  162. promotion_types = {}
  163. type_list = []
  164. for promotion_type, group in grouped:
  165. type_list.append(promotion_type)
  166. quantity = group['购买数量'].sum()
  167. revenue = group['买家实际支付金额'].sum()
  168. avg_price = revenue / quantity if quantity > 0 else 0
  169. avg_promotion = group['促销力度'].mean() * 100
  170. order_count = len(group)
  171. promotion_types[promotion_type] = {
  172. 'quantity': int(quantity),
  173. 'revenue': round(revenue, 2),
  174. 'avg_price': round(avg_price, 2),
  175. 'avg_promotion': round(avg_promotion, 2),
  176. 'order_count': order_count
  177. }
  178. return {
  179. 'types': promotion_types,
  180. 'type_list': type_list
  181. }
  182. def analyze_time_effect(df):
  183. """
  184. 按时间分析促销效果
  185. """
  186. # 按日期分组
  187. date_grouped = df.groupby('日期')
  188. date_series = []
  189. promo_quantity_series = []
  190. non_promo_quantity_series = []
  191. promo_revenue_series = []
  192. non_promo_revenue_series = []
  193. avg_promotion_series = []
  194. for date in sorted(date_grouped.groups.keys()):
  195. date_series.append(str(date))
  196. date_data = date_grouped.get_group(date)
  197. # 促销商品
  198. promo_data = date_data[date_data['是否促销']]
  199. promo_quantity = promo_data['购买数量'].sum()
  200. promo_revenue = promo_data['买家实际支付金额'].sum()
  201. # 非促销商品
  202. non_promo_data = date_data[~date_data['是否促销']]
  203. non_promo_quantity = non_promo_data['购买数量'].sum()
  204. non_promo_revenue = non_promo_data['买家实际支付金额'].sum()
  205. # 平均促销力度
  206. avg_promotion = promo_data['促销力度'].mean() * 100 if len(promo_data) > 0 else 0
  207. promo_quantity_series.append(int(promo_quantity))
  208. non_promo_quantity_series.append(int(non_promo_quantity))
  209. promo_revenue_series.append(round(promo_revenue, 2))
  210. non_promo_revenue_series.append(round(non_promo_revenue, 2))
  211. avg_promotion_series.append(round(avg_promotion, 2))
  212. return {
  213. 'date_series': date_series,
  214. 'promo_quantity_series': promo_quantity_series,
  215. 'non_promo_quantity_series': non_promo_quantity_series,
  216. 'promo_revenue_series': promo_revenue_series,
  217. 'non_promo_revenue_series': non_promo_revenue_series,
  218. 'avg_promotion_series': avg_promotion_series
  219. }
  220. def analyze_category_effect(df):
  221. """
  222. 按品类分析促销效果
  223. """
  224. # 按品类分组
  225. grouped = df.groupby('品类')
  226. category_effects = {}
  227. category_list = []
  228. for category, group in grouped:
  229. category_list.append(category)
  230. # 促销商品
  231. promo_data = group[group['是否促销']]
  232. promo_quantity = promo_data['购买数量'].sum()
  233. promo_revenue = promo_data['买家实际支付金额'].sum()
  234. # 非促销商品
  235. non_promo_data = group[~group['是否促销']]
  236. non_promo_quantity = non_promo_data['购买数量'].sum()
  237. non_promo_revenue = non_promo_data['买家实际支付金额'].sum()
  238. # 计算促销效果
  239. quantity_effect = (promo_quantity - non_promo_quantity) / non_promo_quantity * 100 if non_promo_quantity > 0 else 0
  240. revenue_effect = (promo_revenue - non_promo_revenue) / non_promo_revenue * 100 if non_promo_revenue > 0 else 0
  241. # 平均促销力度
  242. avg_promotion = promo_data['促销力度'].mean() * 100 if len(promo_data) > 0 else 0
  243. category_effects[category] = {
  244. 'promo_quantity': int(promo_quantity),
  245. 'non_promo_quantity': int(non_promo_quantity),
  246. 'promo_revenue': round(promo_revenue, 2),
  247. 'non_promo_revenue': round(non_promo_revenue, 2),
  248. 'quantity_effect': round(quantity_effect, 2),
  249. 'revenue_effect': round(revenue_effect, 2),
  250. 'avg_promotion': round(avg_promotion, 2),
  251. 'promo_order_count': len(promo_data),
  252. 'non_promo_order_count': len(non_promo_data)
  253. }
  254. return {
  255. 'category_effects': category_effects,
  256. 'category_list': category_list
  257. }
  258. def analyze_sku_effect(df):
  259. """
  260. 按SKU分析促销效果
  261. """
  262. # 按SKU分组
  263. grouped = df.groupby('SKU')
  264. sku_effects = {}
  265. sku_list = []
  266. for sku, group in grouped:
  267. sku_list.append(sku)
  268. # 促销商品
  269. promo_data = group[group['是否促销']]
  270. promo_quantity = promo_data['购买数量'].sum()
  271. promo_revenue = promo_data['买家实际支付金额'].sum()
  272. # 非促销商品
  273. non_promo_data = group[~group['是否促销']]
  274. non_promo_quantity = non_promo_data['购买数量'].sum()
  275. non_promo_revenue = non_promo_data['买家实际支付金额'].sum()
  276. # 计算促销效果
  277. quantity_effect = (promo_quantity - non_promo_quantity) / non_promo_quantity * 100 if non_promo_quantity > 0 else 0
  278. revenue_effect = (promo_revenue - non_promo_revenue) / non_promo_revenue * 100 if non_promo_revenue > 0 else 0
  279. # 平均促销力度
  280. avg_promotion = promo_data['促销力度'].mean() * 100 if len(promo_data) > 0 else 0
  281. sku_effects[sku] = {
  282. 'promo_quantity': int(promo_quantity),
  283. 'non_promo_quantity': int(non_promo_quantity),
  284. 'promo_revenue': round(promo_revenue, 2),
  285. 'non_promo_revenue': round(non_promo_revenue, 2),
  286. 'quantity_effect': round(quantity_effect, 2),
  287. 'revenue_effect': round(revenue_effect, 2),
  288. 'avg_promotion': round(avg_promotion, 2),
  289. 'promo_order_count': len(promo_data),
  290. 'non_promo_order_count': len(non_promo_data)
  291. }
  292. return {
  293. 'sku_effects': sku_effects,
  294. 'sku_list': sku_list
  295. }
  296. def evaluate_promotion_effect(df):
  297. """
  298. 评估促销效果
  299. """
  300. # 促销商品
  301. promo_data = df[df['是否促销']]
  302. # 非促销商品
  303. non_promo_data = df[~df['是否促销']]
  304. # 计算各项指标
  305. # 1. 销量提升率
  306. promo_quantity = promo_data['购买数量'].sum()
  307. non_promo_quantity = non_promo_data['购买数量'].sum()
  308. quantity_lift = (promo_quantity - non_promo_quantity) / non_promo_quantity * 100 if non_promo_quantity > 0 else 0
  309. # 2. revenue提升率
  310. promo_revenue = promo_data['买家实际支付金额'].sum()
  311. non_promo_revenue = non_promo_data['买家实际支付金额'].sum()
  312. revenue_lift = (promo_revenue - non_promo_revenue) / non_promo_revenue * 100 if non_promo_revenue > 0 else 0
  313. # 3. 促销商品占比
  314. promo_ratio = len(promo_data) / len(df) * 100 if len(df) > 0 else 0
  315. # 4. 平均促销力度
  316. avg_promotion = promo_data['促销力度'].mean() * 100 if len(promo_data) > 0 else 0
  317. # 5. 促销效果评分
  318. # 基于销量提升率、revenue提升率和促销力度计算综合评分
  319. score = 0
  320. if quantity_lift > 0:
  321. score += quantity_lift * 0.4
  322. if revenue_lift > 0:
  323. score += revenue_lift * 0.4
  324. score += (100 - avg_promotion) * 0.2 # 促销力度越小,得分越高
  325. score = min(100, max(0, score))
  326. # 6. 效果等级
  327. if score >= 80:
  328. level = '优秀'
  329. elif score >= 60:
  330. level = '良好'
  331. elif score >= 40:
  332. level = '一般'
  333. else:
  334. level = '需改进'
  335. return {
  336. 'quantity_lift': round(quantity_lift, 2),
  337. 'revenue_lift': round(revenue_lift, 2),
  338. 'promo_ratio': round(promo_ratio, 2),
  339. 'avg_promotion': round(avg_promotion, 2),
  340. 'score': round(score, 2),
  341. 'level': level
  342. }
  343. def convert_numpy_types(obj):
  344. """
  345. 递归转换所有 numpy 类型为 Python 原生类型
  346. Args:
  347. obj: 要转换的对象
  348. Returns:
  349. 转换后的对象
  350. """
  351. if isinstance(obj, dict):
  352. return {key: convert_numpy_types(value) for key, value in obj.items()}
  353. elif isinstance(obj, list):
  354. return [convert_numpy_types(item) for item in obj]
  355. elif isinstance(obj, np.integer):
  356. return int(obj)
  357. elif isinstance(obj, np.floating):
  358. return float(obj)
  359. elif isinstance(obj, np.ndarray):
  360. return [convert_numpy_types(item) for item in obj.tolist()]
  361. else:
  362. return obj