sale_trend_service.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 销售趋势分析和预测服务
  5. """
  6. import pandas as pd
  7. import numpy as np
  8. from datetime import datetime, timedelta
  9. from collections import defaultdict
  10. import json
  11. def analyze_sale_trend(df, filename):
  12. """
  13. 分析销售趋势数据
  14. Args:
  15. df: 销售数据DataFrame
  16. filename: 文件名
  17. Returns:
  18. dict: 分析结果
  19. """
  20. try:
  21. print(f"开始分析销售趋势数据: {filename}")
  22. print(f"数据形状: {df.shape}")
  23. # 1. 数据预处理
  24. df = preprocess_data(df)
  25. # 2. 计算总体指标
  26. summary = calculate_summary(df)
  27. # 3. 按品类分析趋势
  28. categories = analyze_categories(df)
  29. # 4. 按SKU分析趋势
  30. sku_data = analyze_skus(df)
  31. # 5. 整体趋势分析
  32. trends = analyze_trends(df)
  33. # 6. 季节性分析
  34. seasonality = analyze_seasonality(df)
  35. # 7. 计算特征重要性
  36. feature_importance = calculate_feature_importance(df)
  37. # 8. 构建结果
  38. results = {
  39. 'summary': summary,
  40. 'categories': categories['categories_data'],
  41. 'category_list': categories['category_list'],
  42. 'category_skus': categories['category_skus'],
  43. 'data': sku_data['sku_data'],
  44. 'sku_list': sku_data['sku_list'],
  45. 'trends': trends,
  46. 'seasonality': seasonality,
  47. 'feature_importance': feature_importance
  48. }
  49. # 转换所有 numpy 类型为 Python 原生类型
  50. results = convert_numpy_types(results)
  51. print("销售趋势分析完成")
  52. return results
  53. except Exception as e:
  54. print(f"分析销售趋势时出错: {str(e)}")
  55. raise
  56. def predict_sales_trend(df, filename, predict_days=30):
  57. """
  58. 预测销售趋势
  59. Args:
  60. df: 销售数据DataFrame
  61. filename: 文件名
  62. predict_days: 预测天数
  63. Returns:
  64. dict: 预测结果
  65. """
  66. try:
  67. print(f"开始预测销售趋势: {filename}")
  68. print(f"预测天数: {predict_days}")
  69. # 1. 数据预处理
  70. df = preprocess_data(df)
  71. # 2. 计算基础指标
  72. summary = calculate_summary(df)
  73. # 3. 整体趋势预测
  74. overall_prediction = predict_overall_trend(df, predict_days)
  75. # 4. 按品类预测
  76. category_predictions = predict_category_trends(df, predict_days)
  77. # 5. 按SKU预测
  78. sku_predictions = predict_sku_trends(df, predict_days)
  79. # 6. 计算特征重要性
  80. feature_importance = calculate_feature_importance(df)
  81. # 7. 构建结果
  82. results = {
  83. 'summary': summary,
  84. 'overall_prediction': overall_prediction,
  85. 'category_predictions': category_predictions,
  86. 'sku_predictions': sku_predictions,
  87. 'feature_importance': feature_importance,
  88. 'predict_days': predict_days
  89. }
  90. # 转换所有 numpy 类型为 Python 原生类型
  91. results = convert_numpy_types(results)
  92. print("销售趋势预测完成")
  93. return results
  94. except Exception as e:
  95. print(f"预测销售趋势时出错: {str(e)}")
  96. raise
  97. def convert_numpy_types(obj):
  98. """
  99. 递归转换所有 numpy 类型为 Python 原生类型
  100. Args:
  101. obj: 要转换的对象
  102. Returns:
  103. 转换后的对象
  104. """
  105. if isinstance(obj, dict):
  106. return {key: convert_numpy_types(value) for key, value in obj.items()}
  107. elif isinstance(obj, list):
  108. return [convert_numpy_types(item) for item in obj]
  109. elif isinstance(obj, np.integer):
  110. return int(obj)
  111. elif isinstance(obj, np.floating):
  112. return float(obj)
  113. elif isinstance(obj, np.ndarray):
  114. return [convert_numpy_types(item) for item in obj.tolist()]
  115. else:
  116. return obj
  117. def calculate_feature_importance(df):
  118. """
  119. 计算特征重要性
  120. Args:
  121. df: 销售数据DataFrame
  122. Returns:
  123. dict: 特征重要性数据
  124. """
  125. try:
  126. print("计算特征重要性")
  127. # 特征重要性数据
  128. feature_importance = {
  129. 'features': [
  130. '价格',
  131. '促销力度',
  132. '退款率',
  133. '季节性',
  134. '时间趋势',
  135. '品类影响',
  136. 'SKU影响'
  137. ],
  138. 'importance': []
  139. }
  140. # 计算价格重要性
  141. if '价格' in df.columns:
  142. price_std = df['价格'].std()
  143. price_importance = min(100, (price_std / df['价格'].mean() * 100) if df['价格'].mean() > 0 else 50)
  144. else:
  145. price_importance = 50
  146. feature_importance['importance'].append(round(price_importance, 2))
  147. # 计算促销力度重要性
  148. if '促销力度' in df.columns:
  149. promotion_std = df['促销力度'].std()
  150. promotion_importance = min(100, (promotion_std * 100) if promotion_std > 0 else 40)
  151. else:
  152. # 计算促销力度
  153. df['促销力度'] = 1 - (df['买家实际支付金额'] / df['买家应付货款'])
  154. df['促销力度'] = df['促销力度'].fillna(0)
  155. promotion_std = df['促销力度'].std()
  156. promotion_importance = min(100, (promotion_std * 100) if promotion_std > 0 else 40)
  157. feature_importance['importance'].append(round(promotion_importance, 2))
  158. # 计算退款率重要性
  159. refunded_orders = df[df['退款状态'] != '没有申请退款']
  160. refund_rate = len(refunded_orders) / len(df) * 100 if len(df) > 0 else 0
  161. refund_importance = min(100, refund_rate * 2)
  162. feature_importance['importance'].append(round(refund_importance, 2))
  163. # 计算季节性重要性
  164. # 基于销量的波动性
  165. if '购买数量' in df.columns:
  166. quantity_std = df['购买数量'].std()
  167. quantity_mean = df['购买数量'].mean()
  168. seasonality_importance = min(100, (quantity_std / quantity_mean * 100) if quantity_mean > 0 else 30)
  169. else:
  170. seasonality_importance = 30
  171. feature_importance['importance'].append(round(seasonality_importance, 2))
  172. # 计算时间趋势重要性
  173. # 基于销量的变化趋势
  174. time_importance = 60 # 默认值
  175. feature_importance['importance'].append(time_importance)
  176. # 计算品类影响重要性
  177. if '品类' in df.columns:
  178. category_count = df['品类'].nunique()
  179. category_importance = min(100, category_count * 10)
  180. else:
  181. category_importance = 40
  182. feature_importance['importance'].append(round(category_importance, 2))
  183. # 计算SKU影响重要性
  184. if 'SKU' in df.columns:
  185. sku_count = df['SKU'].nunique()
  186. sku_importance = min(100, sku_count * 5)
  187. else:
  188. sku_importance = 30
  189. feature_importance['importance'].append(round(sku_importance, 2))
  190. # 标准化重要性值,确保总和为100
  191. total_importance = sum(feature_importance['importance'])
  192. if total_importance > 0:
  193. feature_importance['importance'] = [round((imp / total_importance) * 100, 2) for imp in feature_importance['importance']]
  194. print("特征重要性计算完成")
  195. return feature_importance
  196. except Exception as e:
  197. print(f"计算特征重要性时出错: {str(e)}")
  198. # 返回默认值
  199. return {
  200. 'features': [
  201. '价格',
  202. '促销力度',
  203. '退款率',
  204. '季节性',
  205. '时间趋势',
  206. '品类影响',
  207. 'SKU影响'
  208. ],
  209. 'importance': [15, 15, 10, 15, 20, 10, 15]
  210. }
  211. def preprocess_data(df):
  212. """
  213. 预处理数据
  214. """
  215. # 复制数据以避免修改原始数据
  216. df = df.copy()
  217. # 处理日期列
  218. if '订单创建时间' in df.columns:
  219. df['订单创建时间'] = pd.to_datetime(df['订单创建时间'], errors='coerce')
  220. # 提取日期部分
  221. df['日期'] = df['订单创建时间'].dt.date
  222. # 处理数值列
  223. numeric_columns = ['价格', '购买数量', '买家应付货款', '买家实际支付金额', '退款金额']
  224. for col in numeric_columns:
  225. if col in df.columns:
  226. df[col] = pd.to_numeric(df[col], errors='coerce')
  227. # 填充缺失值
  228. df = df.fillna({
  229. '价格': 0,
  230. '购买数量': 0,
  231. '买家应付货款': 0,
  232. '买家实际支付金额': 0,
  233. '退款金额': 0
  234. })
  235. # 确保SKU列存在
  236. if '商家编码' in df.columns:
  237. df['SKU'] = df['商家编码']
  238. elif '外部系统编号' in df.columns:
  239. df['SKU'] = df['外部系统编号']
  240. else:
  241. # 如果没有SKU列,使用商品名称作为SKU
  242. df['SKU'] = df['商品名称']
  243. # 确保品类列存在
  244. if '品类' not in df.columns:
  245. # 简单品类划分:基于商品名称
  246. df['品类'] = df['商品名称'].apply(lambda x: categorize_product(x))
  247. return df
  248. def categorize_product(product_name):
  249. """
  250. 根据商品名称简单分类
  251. """
  252. product_name = str(product_name).lower()
  253. if '腰垫' in product_name or '靠垫' in product_name:
  254. return '家居用品'
  255. elif '手机' in product_name or '电脑' in product_name:
  256. return '电子产品'
  257. elif '服装' in product_name or '鞋' in product_name:
  258. return '服装鞋包'
  259. else:
  260. return '其他'
  261. def calculate_summary(df):
  262. """
  263. 计算总体指标
  264. """
  265. total_quantity = df['购买数量'].sum()
  266. total_revenue = df['买家实际支付金额'].sum()
  267. avg_price = total_revenue / total_quantity if total_quantity > 0 else 0
  268. # 计算促销力度
  269. df['促销力度'] = 1 - (df['买家实际支付金额'] / df['买家应付货款'])
  270. df['促销力度'] = df['促销力度'].fillna(0)
  271. avg_promotion = df['促销力度'].mean() * 100 # 转换为百分比
  272. # 计算退款相关指标
  273. refunded_orders = df[df['退款状态'] != '没有申请退款']
  274. total_refund = refunded_orders['退款金额'].sum()
  275. refund_rate = len(refunded_orders) / len(df) * 100 if len(df) > 0 else 0
  276. summary = {
  277. 'total_orders': len(df),
  278. 'total_quantity': int(total_quantity),
  279. 'total_revenue': round(total_revenue, 2),
  280. 'avg_price': round(avg_price, 2),
  281. 'avg_promotion': round(avg_promotion, 2),
  282. 'total_refund': round(total_refund, 2),
  283. 'refund_rate': round(refund_rate, 2)
  284. }
  285. # 转换所有 numpy 类型为 Python 原生类型
  286. summary = convert_numpy_types(summary)
  287. return summary
  288. def analyze_categories(df):
  289. """
  290. 按品类分析趋势
  291. """
  292. categories_data = {}
  293. category_list = []
  294. category_skus = defaultdict(list)
  295. # 按品类分组
  296. grouped = df.groupby('品类')
  297. for category, group in grouped:
  298. category_list.append(category)
  299. # 计算品类指标
  300. total_quantity = group['购买数量'].sum()
  301. total_revenue = group['买家实际支付金额'].sum()
  302. avg_price = total_revenue / total_quantity if total_quantity > 0 else 0
  303. # 计算促销力度
  304. group['促销力度'] = 1 - (group['买家实际支付金额'] / group['买家应付货款'])
  305. group['促销力度'] = group['促销力度'].fillna(0)
  306. avg_promotion = group['促销力度'].mean() * 100
  307. # 计算退款相关指标
  308. refunded_orders = group[group['退款状态'] != '没有申请退款']
  309. total_refund = refunded_orders['退款金额'].sum()
  310. refund_rate = len(refunded_orders) / len(group) * 100 if len(group) > 0 else 0
  311. # 趋势数据
  312. date_series = []
  313. quantity_series = []
  314. price_series = []
  315. # 按日期排序
  316. date_grouped = group.groupby('日期')
  317. for date in sorted(date_grouped.groups.keys()):
  318. date_series.append(str(date))
  319. date_data = date_grouped.get_group(date)
  320. quantity_series.append(int(date_data['购买数量'].sum()))
  321. avg_date_price = date_data['买家实际支付金额'].sum() / date_data['购买数量'].sum() if date_data['购买数量'].sum() > 0 else 0
  322. price_series.append(round(avg_date_price, 2))
  323. categories_data[category] = {
  324. 'total_quantity': int(total_quantity),
  325. 'total_revenue': round(total_revenue, 2),
  326. 'avg_price': round(avg_price, 2),
  327. 'avg_promotion': round(avg_promotion, 2),
  328. 'total_refund': round(total_refund, 2),
  329. 'refund_rate': round(refund_rate, 2),
  330. 'date_series': date_series,
  331. 'quantity_series': quantity_series,
  332. 'price_series': price_series
  333. }
  334. # 收集该品类下的SKU
  335. skus = group['SKU'].unique().tolist()
  336. category_skus[category] = skus
  337. result = {
  338. 'categories_data': categories_data,
  339. 'category_list': category_list,
  340. 'category_skus': dict(category_skus)
  341. }
  342. # 转换所有 numpy 类型为 Python 原生类型
  343. result = convert_numpy_types(result)
  344. return result
  345. def analyze_skus(df):
  346. """
  347. 按SKU分析趋势
  348. """
  349. sku_data = {}
  350. sku_list = []
  351. # 按SKU分组
  352. grouped = df.groupby('SKU')
  353. for sku, group in grouped:
  354. sku_list.append(sku)
  355. # 计算SKU指标
  356. total_quantity = group['购买数量'].sum()
  357. total_revenue = group['买家实际支付金额'].sum()
  358. avg_price = total_revenue / total_quantity if total_quantity > 0 else 0
  359. # 计算促销力度
  360. group['促销力度'] = 1 - (group['买家实际支付金额'] / group['买家应付货款'])
  361. group['促销力度'] = group['促销力度'].fillna(0)
  362. avg_promotion = group['促销力度'].mean() * 100
  363. # 计算退款相关指标
  364. refunded_orders = group[group['退款状态'] != '没有申请退款']
  365. total_refund = refunded_orders['退款金额'].sum()
  366. refund_rate = len(refunded_orders) / len(group) * 100 if len(group) > 0 else 0
  367. # 趋势数据
  368. date_series = []
  369. quantity_series = []
  370. price_series = []
  371. # 按日期排序
  372. date_grouped = group.groupby('日期')
  373. for date in sorted(date_grouped.groups.keys()):
  374. date_series.append(str(date))
  375. date_data = date_grouped.get_group(date)
  376. quantity_series.append(int(date_data['购买数量'].sum()))
  377. avg_date_price = date_data['买家实际支付金额'].sum() / date_data['购买数量'].sum() if date_data['购买数量'].sum() > 0 else 0
  378. price_series.append(round(avg_date_price, 2))
  379. sku_data[sku] = {
  380. 'total_quantity': int(total_quantity),
  381. 'total_revenue': round(total_revenue, 2),
  382. 'avg_price': round(avg_price, 2),
  383. 'avg_promotion': round(avg_promotion, 2),
  384. 'total_refund': round(total_refund, 2),
  385. 'refund_rate': round(refund_rate, 2),
  386. 'date_series': date_series,
  387. 'quantity_series': quantity_series,
  388. 'price_series': price_series
  389. }
  390. result = {
  391. 'sku_data': sku_data,
  392. 'sku_list': sku_list
  393. }
  394. # 转换所有 numpy 类型为 Python 原生类型
  395. result = convert_numpy_types(result)
  396. return result
  397. def analyze_trends(df):
  398. """
  399. 分析整体趋势数据
  400. """
  401. # 按日期分组
  402. date_grouped = df.groupby('日期')
  403. date_series = []
  404. quantity_series = []
  405. revenue_series = []
  406. avg_price_series = []
  407. avg_promotion_series = []
  408. for date in sorted(date_grouped.groups.keys()):
  409. date_series.append(str(date))
  410. date_data = date_grouped.get_group(date)
  411. quantity = date_data['购买数量'].sum()
  412. revenue = date_data['买家实际支付金额'].sum()
  413. avg_price = revenue / quantity if quantity > 0 else 0
  414. # 计算促销力度
  415. date_data['促销力度'] = 1 - (date_data['买家实际支付金额'] / date_data['买家应付货款'])
  416. date_data['促销力度'] = date_data['促销力度'].fillna(0)
  417. avg_promotion = date_data['促销力度'].mean() * 100
  418. quantity_series.append(int(quantity))
  419. revenue_series.append(round(revenue, 2))
  420. avg_price_series.append(round(avg_price, 2))
  421. avg_promotion_series.append(round(avg_promotion, 2))
  422. trends = {
  423. 'date_series': date_series,
  424. 'quantity_series': quantity_series,
  425. 'revenue_series': revenue_series,
  426. 'avg_price_series': avg_price_series,
  427. 'avg_promotion_series': avg_promotion_series
  428. }
  429. # 转换所有 numpy 类型为 Python 原生类型
  430. trends = convert_numpy_types(trends)
  431. return trends
  432. def analyze_seasonality(df):
  433. """
  434. 分析季节性
  435. """
  436. # 按日期分组
  437. date_grouped = df.groupby('日期')
  438. # 构建日期到销量的映射
  439. date_quantity_map = {}
  440. for date, group in date_grouped:
  441. date_quantity_map[date] = group['购买数量'].sum()
  442. # 计算7天移动平均
  443. dates = sorted(date_quantity_map.keys())
  444. quantities = [date_quantity_map[date] for date in dates]
  445. # 计算移动平均
  446. window = 7
  447. moving_avg = []
  448. for i in range(len(quantities)):
  449. start = max(0, i - window + 1)
  450. window_data = quantities[start:i+1]
  451. moving_avg.append(sum(window_data) / len(window_data))
  452. # 计算季节性因子
  453. seasonality_factors = []
  454. for i in range(len(quantities)):
  455. if moving_avg[i] > 0:
  456. seasonality_factors.append(quantities[i] / moving_avg[i])
  457. else:
  458. seasonality_factors.append(1.0)
  459. seasonality = {
  460. 'date_series': [str(date) for date in dates],
  461. 'quantity_series': quantities,
  462. 'moving_avg_series': [round(avg, 2) for avg in moving_avg],
  463. 'seasonality_factors': [round(factor, 2) for factor in seasonality_factors]
  464. }
  465. # 转换所有 numpy 类型为 Python 原生类型
  466. seasonality = convert_numpy_types(seasonality)
  467. return seasonality
  468. def predict_overall_trend(df, predict_days):
  469. """
  470. 预测整体销售趋势
  471. 使用 a+x+y 模型
  472. """
  473. # 按日期分组
  474. date_grouped = df.groupby('日期')
  475. # 构建历史数据
  476. dates = sorted(date_grouped.groups.keys())
  477. quantities = []
  478. for date in dates:
  479. quantities.append(date_grouped.get_group(date)['购买数量'].sum())
  480. # 计算基础销量 a(历史平均)
  481. a = np.mean(quantities) if quantities else 0
  482. # 计算时间趋势因子 x(线性回归)
  483. x = 0
  484. if len(quantities) > 1:
  485. # 简单线性回归
  486. days = np.arange(len(quantities))
  487. slope, intercept = np.polyfit(days, quantities, 1)
  488. x = slope
  489. # 计算季节性因子 y
  490. # 这里使用简单的7天周期性
  491. y = 1.0
  492. if len(quantities) >= 7:
  493. # 计算最近7天的平均季节性因子
  494. recent_quantities = quantities[-7:]
  495. recent_avg = np.mean(recent_quantities)
  496. y = recent_quantities[-1] / recent_avg if recent_avg > 0 else 1.0
  497. # 生成预测日期
  498. last_date = dates[-1] if dates else datetime.now().date()
  499. predict_dates = []
  500. for i in range(1, predict_days + 1):
  501. predict_dates.append(last_date + timedelta(days=i))
  502. # 生成预测值
  503. predict_quantities = []
  504. predict_revenues = []
  505. # 计算历史平均价格
  506. avg_price = df['买家实际支付金额'].sum() / df['购买数量'].sum() if df['购买数量'].sum() > 0 else 0
  507. for i, predict_date in enumerate(predict_dates):
  508. # 计算预测值: a + x * (days_since_start) * y
  509. days_since_start = len(quantities) + i
  510. predicted_quantity = max(0, a + x * days_since_start * y)
  511. predict_quantities.append(round(predicted_quantity, 2))
  512. predict_revenues.append(round(predicted_quantity * avg_price, 2))
  513. overall_prediction = {
  514. 'date_series': [str(date) for date in predict_dates],
  515. 'quantity_series': predict_quantities,
  516. 'revenue_series': predict_revenues,
  517. 'model_params': {
  518. 'a': round(a, 2),
  519. 'x': round(x, 4),
  520. 'y': round(y, 2)
  521. }
  522. }
  523. # 转换所有 numpy 类型为 Python 原生类型
  524. overall_prediction = convert_numpy_types(overall_prediction)
  525. return overall_prediction
  526. def predict_category_trends(df, predict_days):
  527. """
  528. 按品类预测销售趋势
  529. """
  530. category_predictions = {}
  531. # 按品类分组
  532. grouped = df.groupby('品类')
  533. for category, group in grouped:
  534. # 按日期分组
  535. date_grouped = group.groupby('日期')
  536. # 构建历史数据
  537. dates = sorted(date_grouped.groups.keys())
  538. quantities = []
  539. for date in dates:
  540. quantities.append(date_grouped.get_group(date)['购买数量'].sum())
  541. # 计算基础销量 a(历史平均)
  542. a = np.mean(quantities) if quantities else 0
  543. # 计算时间趋势因子 x(线性回归)
  544. x = 0
  545. if len(quantities) > 1:
  546. # 简单线性回归
  547. days = np.arange(len(quantities))
  548. slope, intercept = np.polyfit(days, quantities, 1)
  549. x = slope
  550. # 计算季节性因子 y
  551. y = 1.0
  552. if len(quantities) >= 7:
  553. # 计算最近7天的平均季节性因子
  554. recent_quantities = quantities[-7:]
  555. recent_avg = np.mean(recent_quantities)
  556. y = recent_quantities[-1] / recent_avg if recent_avg > 0 else 1.0
  557. # 生成预测日期
  558. last_date = dates[-1] if dates else datetime.now().date()
  559. predict_dates = []
  560. for i in range(1, predict_days + 1):
  561. predict_dates.append(last_date + timedelta(days=i))
  562. # 生成预测值
  563. predict_quantities = []
  564. predict_revenues = []
  565. # 计算该品类的平均价格
  566. category_avg_price = group['买家实际支付金额'].sum() / group['购买数量'].sum() if group['购买数量'].sum() > 0 else 0
  567. for i, predict_date in enumerate(predict_dates):
  568. # 计算预测值: a + x * (days_since_start) * y
  569. days_since_start = len(quantities) + i
  570. predicted_quantity = max(0, a + x * days_since_start * y)
  571. predict_quantities.append(round(predicted_quantity, 2))
  572. predict_revenues.append(round(predicted_quantity * category_avg_price, 2))
  573. category_predictions[category] = {
  574. 'date_series': [str(date) for date in predict_dates],
  575. 'quantity_series': predict_quantities,
  576. 'revenue_series': predict_revenues,
  577. 'model_params': {
  578. 'a': round(a, 2),
  579. 'x': round(x, 4),
  580. 'y': round(y, 2)
  581. }
  582. }
  583. # 转换所有 numpy 类型为 Python 原生类型
  584. category_predictions = convert_numpy_types(category_predictions)
  585. return category_predictions
  586. def predict_sku_trends(df, predict_days):
  587. """
  588. 按SKU预测销售趋势
  589. """
  590. sku_predictions = {}
  591. # 按SKU分组
  592. grouped = df.groupby('SKU')
  593. for sku, group in grouped:
  594. # 按日期分组
  595. date_grouped = group.groupby('日期')
  596. # 构建历史数据
  597. dates = sorted(date_grouped.groups.keys())
  598. quantities = []
  599. for date in dates:
  600. quantities.append(date_grouped.get_group(date)['购买数量'].sum())
  601. # 计算基础销量 a(历史平均)
  602. a = np.mean(quantities) if quantities else 0
  603. # 计算时间趋势因子 x(线性回归)
  604. x = 0
  605. if len(quantities) > 1:
  606. # 简单线性回归
  607. days = np.arange(len(quantities))
  608. slope, intercept = np.polyfit(days, quantities, 1)
  609. x = slope
  610. # 计算季节性因子 y
  611. y = 1.0
  612. if len(quantities) >= 7:
  613. # 计算最近7天的平均季节性因子
  614. recent_quantities = quantities[-7:]
  615. recent_avg = np.mean(recent_quantities)
  616. y = recent_quantities[-1] / recent_avg if recent_avg > 0 else 1.0
  617. # 生成预测日期
  618. last_date = dates[-1] if dates else datetime.now().date()
  619. predict_dates = []
  620. for i in range(1, predict_days + 1):
  621. predict_dates.append(last_date + timedelta(days=i))
  622. # 生成预测值
  623. predict_quantities = []
  624. predict_revenues = []
  625. # 计算该SKU的平均价格
  626. sku_avg_price = group['买家实际支付金额'].sum() / group['购买数量'].sum() if group['购买数量'].sum() > 0 else 0
  627. for i, predict_date in enumerate(predict_dates):
  628. # 计算预测值: a + x * (days_since_start) * y
  629. days_since_start = len(quantities) + i
  630. predicted_quantity = max(0, a + x * days_since_start * y)
  631. predict_quantities.append(round(predicted_quantity, 2))
  632. predict_revenues.append(round(predicted_quantity * sku_avg_price, 2))
  633. sku_predictions[sku] = {
  634. 'date_series': [str(date) for date in predict_dates],
  635. 'quantity_series': predict_quantities,
  636. 'revenue_series': predict_revenues,
  637. 'model_params': {
  638. 'a': round(a, 2),
  639. 'x': round(x, 4),
  640. 'y': round(y, 2)
  641. }
  642. }
  643. # 转换所有 numpy 类型为 Python 原生类型
  644. sku_predictions = convert_numpy_types(sku_predictions)
  645. return sku_predictions