lifecycle_analyzer_service.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. """
  2. 生命周期分析核心模块
  3. """
  4. import pandas as pd
  5. import numpy as np
  6. from scipy.signal import savgol_filter, find_peaks
  7. import hashlib
  8. from .utils import ensure_native_type
  9. # 分析参数
  10. MIN_PROMINENCE = 0.1 # 峰值检测最小突出度
  11. HOT_PRODUCT_THRESHOLD = 0.6 # 爆款阈值
  12. COMPLETENESS_SCORE_THRESHOLD = 80 # 完整性得分阈值
  13. MIN_DATA_DAYS = 30 # 最小数据天数
  14. def get_dataframe_hash(df):
  15. """计算DataFrame的哈希值用于缓存键"""
  16. sample_data = df[['订单状态', '订单付款时间', '买家实际支付金额', 'SKU']].head(100)
  17. hash_str = f"{sample_data.to_string().encode()}_{df.shape}"
  18. return hashlib.md5(hash_str.encode()).hexdigest()
  19. def enhanced_data_preprocessing(revenue_series, quantity_series):
  20. """增强的数据预处理,使用多种平滑技术"""
  21. # 移除异常值
  22. def remove_outliers(series):
  23. Q1 = series.quantile(0.25)
  24. Q3 = series.quantile(0.75)
  25. IQR = Q3 - Q1
  26. lower_bound = Q1 - 1.5 * IQR
  27. upper_bound = Q3 + 1.5 * IQR
  28. return series.clip(lower=max(0, lower_bound), upper=upper_bound)
  29. revenue_clean = remove_outliers(revenue_series)
  30. quantity_clean = remove_outliers(quantity_series)
  31. # 自适应平滑窗口大小
  32. data_length = len(revenue_series)
  33. if data_length <= 30:
  34. window_length = 5
  35. elif data_length <= 90:
  36. window_length = 7
  37. elif data_length <= 180:
  38. window_length = 11
  39. else:
  40. window_length = 15
  41. window_length = min(window_length, data_length - 1 if data_length % 2 == 0 else data_length)
  42. if window_length < 3:
  43. window_length = 3
  44. if window_length % 2 == 0:
  45. window_length -= 1
  46. # 使用Savgol滤波器进行平滑
  47. smoothed_revenue = savgol_filter(revenue_clean.values, window_length, 2)
  48. smoothed_quantity = savgol_filter(quantity_clean.values, window_length, 2)
  49. # 应用移动平均进行二次平滑
  50. ma_window = max(3, window_length // 3)
  51. revenue_ma = pd.Series(smoothed_revenue, index=revenue_series.index).rolling(window=ma_window, center=True).mean()
  52. revenue_ma = pd.Series(revenue_ma).bfill().ffill()
  53. quantity_ma = pd.Series(smoothed_quantity, index=quantity_series.index).rolling(window=ma_window, center=True).mean()
  54. quantity_ma = pd.Series(quantity_ma).bfill().ffill()
  55. return revenue_ma, quantity_ma
  56. def detect_significant_peaks(data, min_prominence=MIN_PROMINENCE):
  57. """检测显著峰值,考虑多峰情况"""
  58. data_range = np.max(data) - np.min(data)
  59. prominence_threshold = data_range * min_prominence
  60. peaks, properties = find_peaks(data, prominence=prominence_threshold, distance=10)
  61. if len(peaks) == 0:
  62. return [np.argmax(data)]
  63. if len(peaks) == 1:
  64. return peaks.tolist()
  65. else:
  66. main_peak_idx = np.argmax(properties['prominences'])
  67. return [peaks[main_peak_idx]]
  68. def calculate_lifecycle_indicators(revenue_data, quantity_data):
  69. """计算生命周期关键指标"""
  70. revenue_peaks = detect_significant_peaks(revenue_data)
  71. quantity_peaks = detect_significant_peaks(quantity_data)
  72. revenue_peak_idx = revenue_peaks[0] if revenue_peaks else len(revenue_data) // 2
  73. quantity_peak_idx = quantity_peaks[0] if quantity_peaks else len(quantity_data) // 2
  74. revenue_peak = revenue_data.iloc[revenue_peak_idx]
  75. quantity_peak = quantity_data.iloc[quantity_peak_idx]
  76. revenue_growth_rate = (revenue_peak - revenue_data.iloc[0]) / (revenue_data.iloc[0] + 1e-8)
  77. quantity_growth_rate = (quantity_peak - quantity_data.iloc[0]) / (quantity_data.iloc[0] + 1e-8)
  78. revenue_decline_rate = (revenue_data.iloc[-1] - revenue_peak) / (revenue_peak + 1e-8)
  79. quantity_decline_rate = (quantity_data.iloc[-1] - quantity_peak) / (quantity_peak + 1e-8)
  80. revenue_mean = np.mean(revenue_data)
  81. quantity_mean = np.mean(quantity_data)
  82. revenue_cv = np.std(revenue_data) / (revenue_mean + 1e-8)
  83. quantity_cv = np.std(quantity_data) / (quantity_mean + 1e-8)
  84. return {
  85. 'revenue_peak_idx': revenue_peak_idx,
  86. 'quantity_peak_idx': quantity_peak_idx,
  87. 'revenue_peak': revenue_peak,
  88. 'quantity_peak': quantity_peak,
  89. 'revenue_growth_rate': revenue_growth_rate,
  90. 'quantity_growth_rate': quantity_growth_rate,
  91. 'revenue_decline_rate': revenue_decline_rate,
  92. 'quantity_decline_rate': quantity_decline_rate,
  93. 'revenue_cv': revenue_cv,
  94. 'quantity_cv': quantity_cv,
  95. 'total_length': len(revenue_data),
  96. 'revenue_mean': revenue_mean,
  97. 'quantity_mean': quantity_mean
  98. }
  99. def assess_lifecycle_completeness(indicators):
  100. """评估生命周期完整性"""
  101. score = 0
  102. breakdown = {}
  103. # 1. 时间长度(权重15)
  104. sufficient_time = indicators['total_length'] >= 120
  105. score += 15 if sufficient_time else 0
  106. breakdown['sufficient_time'] = {'hit': sufficient_time, 'weight': 15}
  107. # 2. 峰值位置合理性(权重15)
  108. reasonable_peak_position = (
  109. 0.25 <= indicators['revenue_peak_idx'] / indicators['total_length'] <= 0.75 and
  110. 0.25 <= indicators['quantity_peak_idx'] / indicators['total_length'] <= 0.75
  111. )
  112. score += 15 if reasonable_peak_position else 0
  113. breakdown['reasonable_peak_position'] = {'hit': reasonable_peak_position, 'weight': 15}
  114. # 3. 显著增长(权重15)
  115. significant_growth = (
  116. indicators['revenue_growth_rate'] >= 1.0 or
  117. indicators['quantity_growth_rate'] >= 0.8
  118. )
  119. score += 15 if significant_growth else 0
  120. breakdown['significant_growth'] = {'hit': significant_growth, 'weight': 15}
  121. # 4. 明显衰退(权重15)
  122. noticeable_decline = (
  123. indicators['revenue_decline_rate'] <= -0.35 or
  124. indicators['quantity_decline_rate'] <= -0.30
  125. )
  126. score += 15 if noticeable_decline else 0
  127. breakdown['noticeable_decline'] = {'hit': noticeable_decline, 'weight': 15}
  128. # 5. 生命周期形状(权重10)
  129. has_lifecycle_shape = (
  130. indicators['revenue_cv'] >= 0.3 and
  131. indicators['quantity_cv'] >= 0.25
  132. )
  133. score += 10 if has_lifecycle_shape else 0
  134. breakdown['has_lifecycle_shape'] = {'hit': has_lifecycle_shape, 'weight': 10}
  135. # 6. 峰值显著性(权重10)
  136. peak_significance = (
  137. indicators['revenue_peak'] >= indicators['revenue_mean'] * 1.8 and
  138. indicators['quantity_peak'] >= indicators['quantity_mean'] * 1.8
  139. )
  140. score += 10 if peak_significance else 0
  141. breakdown['peak_significance'] = {'hit': peak_significance, 'weight': 10}
  142. # 7. 数据质量检查(权重10)
  143. data_quality_check = (
  144. indicators['total_length'] >= 120 and
  145. indicators['revenue_peak'] > 0 and
  146. indicators['quantity_peak'] > 0 and
  147. (indicators['revenue_growth_rate'] > 0 or indicators['quantity_growth_rate'] > 0) and
  148. (indicators['revenue_decline_rate'] < 0 or indicators['quantity_decline_rate'] < 0)
  149. )
  150. score += 10 if data_quality_check else 0
  151. breakdown['data_quality_check'] = {'hit': data_quality_check, 'weight': 10}
  152. # 8. 周期完整性(权重5)
  153. cycle_completeness = (
  154. sufficient_time and reasonable_peak_position and
  155. significant_growth and noticeable_decline and has_lifecycle_shape
  156. )
  157. score += 5 if cycle_completeness else 0
  158. breakdown['cycle_completeness'] = {'hit': cycle_completeness, 'weight': 5}
  159. # 9. 趋势一致性(权重5)
  160. trend_consistency = (
  161. abs(indicators['revenue_growth_rate'] - indicators['quantity_growth_rate']) <= 0.8 or
  162. abs(indicators['revenue_decline_rate'] - indicators['quantity_decline_rate']) <= 0.4 or
  163. (indicators['revenue_growth_rate'] * indicators['quantity_growth_rate'] > 0 and
  164. indicators['revenue_decline_rate'] * indicators['quantity_decline_rate'] > 0)
  165. )
  166. score += 5 if trend_consistency else 0
  167. breakdown['trend_consistency'] = {'hit': trend_consistency, 'weight': 5}
  168. is_complete = bool(score >= COMPLETENESS_SCORE_THRESHOLD)
  169. return is_complete, float(score) if isinstance(score, (np.integer, np.floating)) else score, breakdown
  170. def calculate_hot_product_coefficient(result, indicators, data_length):
  171. """计算爆款系数"""
  172. peak_score = min(100, (indicators['revenue_peak'] / (indicators['revenue_mean'] + 1e-8)) * 25)
  173. growth_score = max(0, min(100, (indicators['revenue_growth_rate'] + indicators['quantity_growth_rate']) * 10))
  174. stability_score = max(0, min(100, (1 - indicators['revenue_cv'] - indicators['quantity_cv']) * 15))
  175. completeness_factor = 20 if result.get('is_complete', False) else 0
  176. growth_stage_score = 0
  177. if '成长期' in result.get('stages_map', []):
  178. growth_stage_score = 20
  179. total_score = (peak_score + growth_score + stability_score + completeness_factor + growth_stage_score) / 100
  180. return min(1.0, total_score)