sale_overview_service.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 销售概览分析服务
  5. """
  6. import pandas as pd
  7. import numpy as np
  8. from datetime import datetime
  9. from collections import defaultdict
  10. import json
  11. def analyze_sale_overview(df, filename):
  12. """
  13. 分析销售概览数据
  14. Args:
  15. df: 销售数据DataFrame
  16. filename: 文件名
  17. Returns:
  18. dict: 分析结果
  19. """
  20. try:
  21. print(f"开始分析销售概览数据: {filename}")
  22. print(f"数据形状: {df.shape}")
  23. # 1. 数据预处理
  24. df = preprocess_data(df)
  25. # 2. 计算总体指标
  26. summary = calculate_summary(df)
  27. # 3. 按品类分析
  28. categories = analyze_categories(df)
  29. # 4. 按SKU分析
  30. sku_data = analyze_skus(df)
  31. # 5. 趋势分析
  32. trends = analyze_trends(df)
  33. # 6. 异常检测
  34. anomalies = detect_anomalies(df)
  35. # 7. 构建结果
  36. results = {
  37. 'summary': summary,
  38. 'categories': categories['categories_data'],
  39. 'category_list': categories['category_list'],
  40. 'category_skus': categories['category_skus'],
  41. 'data': sku_data['sku_data'],
  42. 'sku_list': sku_data['sku_list'],
  43. 'trends': trends,
  44. 'anomalies': anomalies
  45. }
  46. print("销售概览分析完成")
  47. return results
  48. except Exception as e:
  49. print(f"分析销售概览时出错: {str(e)}")
  50. raise
  51. def preprocess_data(df):
  52. """
  53. 预处理数据
  54. """
  55. # 复制数据以避免修改原始数据
  56. df = df.copy()
  57. # 处理日期列
  58. if '订单创建时间' in df.columns:
  59. df['订单创建时间'] = pd.to_datetime(df['订单创建时间'], errors='coerce')
  60. # 提取日期部分
  61. df['日期'] = df['订单创建时间'].dt.date
  62. # 处理数值列
  63. numeric_columns = ['价格', '购买数量', '买家应付货款', '买家实际支付金额', '退款金额']
  64. for col in numeric_columns:
  65. if col in df.columns:
  66. df[col] = pd.to_numeric(df[col], errors='coerce')
  67. # 填充缺失值
  68. df = df.fillna({
  69. '价格': 0,
  70. '购买数量': 0,
  71. '买家应付货款': 0,
  72. '买家实际支付金额': 0,
  73. '退款金额': 0
  74. })
  75. # 确保SKU列存在
  76. if '商家编码' in df.columns:
  77. df['SKU'] = df['商家编码']
  78. elif '外部系统编号' in df.columns:
  79. df['SKU'] = df['外部系统编号']
  80. else:
  81. # 如果没有SKU列,使用商品名称作为SKU
  82. df['SKU'] = df['商品名称']
  83. # 确保品类列存在
  84. if '品类' not in df.columns:
  85. # 简单品类划分:基于商品名称
  86. df['品类'] = df['商品名称'].apply(lambda x: categorize_product(x))
  87. return df
  88. def categorize_product(product_name):
  89. """
  90. 根据商品名称简单分类
  91. """
  92. product_name = str(product_name).lower()
  93. if '腰垫' in product_name or '靠垫' in product_name:
  94. return '家居用品'
  95. elif '手机' in product_name or '电脑' in product_name:
  96. return '电子产品'
  97. elif '服装' in product_name or '鞋' in product_name:
  98. return '服装鞋包'
  99. else:
  100. return '其他'
  101. def calculate_summary(df):
  102. """
  103. 计算总体指标
  104. """
  105. total_quantity = df['购买数量'].sum()
  106. total_revenue = df['买家实际支付金额'].sum()
  107. avg_price = total_revenue / total_quantity if total_quantity > 0 else 0
  108. # 计算促销力度
  109. df['促销力度'] = 1 - (df['买家实际支付金额'] / df['买家应付货款'])
  110. df['促销力度'] = df['促销力度'].fillna(0)
  111. avg_promotion = df['促销力度'].mean() * 100 # 转换为百分比
  112. # 计算退款相关指标
  113. refunded_orders = df[df['退款状态'] != '没有申请退款']
  114. total_refund = refunded_orders['退款金额'].sum()
  115. refund_rate = len(refunded_orders) / len(df) * 100 if len(df) > 0 else 0
  116. summary = {
  117. 'total_orders': len(df),
  118. 'total_quantity': int(total_quantity),
  119. 'total_revenue': round(total_revenue, 2),
  120. 'avg_price': round(avg_price, 2),
  121. 'avg_promotion': round(avg_promotion, 2),
  122. 'total_refund': round(total_refund, 2),
  123. 'refund_rate': round(refund_rate, 2)
  124. }
  125. return summary
  126. def analyze_categories(df):
  127. """
  128. 按品类分析
  129. """
  130. categories_data = {}
  131. category_list = []
  132. category_skus = defaultdict(list)
  133. # 按品类分组
  134. grouped = df.groupby('品类')
  135. for category, group in grouped:
  136. category_list.append(category)
  137. # 计算品类指标
  138. total_quantity = group['购买数量'].sum()
  139. total_revenue = group['买家实际支付金额'].sum()
  140. avg_price = total_revenue / total_quantity if total_quantity > 0 else 0
  141. # 计算促销力度
  142. group['促销力度'] = 1 - (group['买家实际支付金额'] / group['买家应付货款'])
  143. group['促销力度'] = group['促销力度'].fillna(0)
  144. avg_promotion = group['促销力度'].mean() * 100
  145. # 计算退款相关指标
  146. refunded_orders = group[group['退款状态'] != '没有申请退款']
  147. total_refund = refunded_orders['退款金额'].sum()
  148. refund_rate = len(refunded_orders) / len(group) * 100 if len(group) > 0 else 0
  149. # 趋势数据
  150. date_series = []
  151. quantity_series = []
  152. price_series = []
  153. # 按日期排序
  154. date_grouped = group.groupby('日期')
  155. for date in sorted(date_grouped.groups.keys()):
  156. date_series.append(str(date))
  157. date_data = date_grouped.get_group(date)
  158. quantity_series.append(int(date_data['购买数量'].sum()))
  159. avg_date_price = date_data['买家实际支付金额'].sum() / date_data['购买数量'].sum() if date_data['购买数量'].sum() > 0 else 0
  160. price_series.append(round(avg_date_price, 2))
  161. categories_data[category] = {
  162. 'total_quantity': int(total_quantity),
  163. 'total_revenue': round(total_revenue, 2),
  164. 'avg_price': round(avg_price, 2),
  165. 'avg_promotion': round(avg_promotion, 2),
  166. 'total_refund': round(total_refund, 2),
  167. 'refund_rate': round(refund_rate, 2),
  168. 'date_series': date_series,
  169. 'quantity_series': quantity_series,
  170. 'price_series': price_series
  171. }
  172. # 收集该品类下的SKU
  173. skus = group['SKU'].unique().tolist()
  174. category_skus[category] = skus
  175. return {
  176. 'categories_data': categories_data,
  177. 'category_list': category_list,
  178. 'category_skus': dict(category_skus)
  179. }
  180. def analyze_skus(df):
  181. """
  182. 按SKU分析
  183. """
  184. sku_data = {}
  185. sku_list = []
  186. # 按SKU分组
  187. grouped = df.groupby('SKU')
  188. for sku, group in grouped:
  189. sku_list.append(sku)
  190. # 计算SKU指标
  191. total_quantity = group['购买数量'].sum()
  192. total_revenue = group['买家实际支付金额'].sum()
  193. avg_price = total_revenue / total_quantity if total_quantity > 0 else 0
  194. # 计算促销力度
  195. group['促销力度'] = 1 - (group['买家实际支付金额'] / group['买家应付货款'])
  196. group['促销力度'] = group['促销力度'].fillna(0)
  197. avg_promotion = group['促销力度'].mean() * 100
  198. # 计算退款相关指标
  199. refunded_orders = group[group['退款状态'] != '没有申请退款']
  200. total_refund = refunded_orders['退款金额'].sum()
  201. refund_rate = len(refunded_orders) / len(group) * 100 if len(group) > 0 else 0
  202. # 趋势数据
  203. date_series = []
  204. quantity_series = []
  205. price_series = []
  206. # 按日期排序
  207. date_grouped = group.groupby('日期')
  208. for date in sorted(date_grouped.groups.keys()):
  209. date_series.append(str(date))
  210. date_data = date_grouped.get_group(date)
  211. quantity_series.append(int(date_data['购买数量'].sum()))
  212. avg_date_price = date_data['买家实际支付金额'].sum() / date_data['购买数量'].sum() if date_data['购买数量'].sum() > 0 else 0
  213. price_series.append(round(avg_date_price, 2))
  214. sku_data[sku] = {
  215. 'total_quantity': int(total_quantity),
  216. 'total_revenue': round(total_revenue, 2),
  217. 'avg_price': round(avg_price, 2),
  218. 'avg_promotion': round(avg_promotion, 2),
  219. 'total_refund': round(total_refund, 2),
  220. 'refund_rate': round(refund_rate, 2),
  221. 'date_series': date_series,
  222. 'quantity_series': quantity_series,
  223. 'price_series': price_series
  224. }
  225. return {
  226. 'sku_data': sku_data,
  227. 'sku_list': sku_list
  228. }
  229. def analyze_trends(df):
  230. """
  231. 分析趋势数据
  232. """
  233. # 按日期分组
  234. date_grouped = df.groupby('日期')
  235. date_series = []
  236. quantity_series = []
  237. revenue_series = []
  238. avg_price_series = []
  239. avg_promotion_series = []
  240. for date in sorted(date_grouped.groups.keys()):
  241. date_series.append(str(date))
  242. date_data = date_grouped.get_group(date)
  243. quantity = date_data['购买数量'].sum()
  244. revenue = date_data['买家实际支付金额'].sum()
  245. avg_price = revenue / quantity if quantity > 0 else 0
  246. # 计算促销力度
  247. date_data['促销力度'] = 1 - (date_data['买家实际支付金额'] / date_data['买家应付货款'])
  248. date_data['促销力度'] = date_data['促销力度'].fillna(0)
  249. avg_promotion = date_data['促销力度'].mean() * 100
  250. quantity_series.append(int(quantity))
  251. revenue_series.append(round(revenue, 2))
  252. avg_price_series.append(round(avg_price, 2))
  253. avg_promotion_series.append(round(avg_promotion, 2))
  254. trends = {
  255. 'date_series': date_series,
  256. 'quantity_series': quantity_series,
  257. 'revenue_series': revenue_series,
  258. 'avg_price_series': avg_price_series,
  259. 'avg_promotion_series': avg_promotion_series
  260. }
  261. return trends
  262. def detect_anomalies(df):
  263. """
  264. 检测异常数据
  265. """
  266. anomalies = []
  267. # 按SKU分组检测异常
  268. grouped = df.groupby('SKU')
  269. for sku, group in grouped:
  270. # 检测销量异常
  271. quantity_mean = group['购买数量'].mean()
  272. quantity_std = group['购买数量'].std()
  273. if quantity_std > 0:
  274. for idx, row in group.iterrows():
  275. quantity = row['购买数量']
  276. z_score = abs((quantity - quantity_mean) / quantity_std)
  277. if z_score > 2.5:
  278. anomalies.append({
  279. 'date': str(row['日期']) if pd.notna(row['日期']) else '未知',
  280. 'sku': sku,
  281. 'type': 'quantity_spike' if quantity > quantity_mean else 'quantity_drop',
  282. 'reason': f'销量异常,偏离均值 {z_score:.2f} 个标准差',
  283. 'value': float(quantity),
  284. 'expected': float(quantity_mean),
  285. 'deviation': float(z_score)
  286. })
  287. # 检测价格异常
  288. price_mean = group['价格'].mean()
  289. price_std = group['价格'].std()
  290. if price_std > 0:
  291. for idx, row in group.iterrows():
  292. price = row['价格']
  293. z_score = abs((price - price_mean) / price_std)
  294. if z_score > 2.5:
  295. anomalies.append({
  296. 'date': str(row['日期']) if pd.notna(row['日期']) else '未知',
  297. 'sku': sku,
  298. 'type': 'price_spike' if price > price_mean else 'price_drop',
  299. 'reason': f'价格异常,偏离均值 {z_score:.2f} 个标准差',
  300. 'value': float(price),
  301. 'expected': float(price_mean),
  302. 'deviation': float(z_score)
  303. })
  304. anomaly_count = len(anomalies)
  305. anomaly_rate = (anomaly_count / len(df)) * 100 if len(df) > 0 else 0
  306. return {
  307. 'anomaly_count': anomaly_count,
  308. 'anomaly_rate': round(anomaly_rate, 2),
  309. 'anomalies': anomalies
  310. }