1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
| import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score import matplotlib.pyplot as plt
# 示例数据生成(替换为实际数据) # dates = pd.date_range(start="2020-01-01", end="2025-06-26", freq="D") # prices = np.cumsum(np.random.normal(0.1, 2, len(dates))) + 1500 # 模拟黄金期货价格 # df = pd.DataFrame({"收益价": prices}, index=dates) # df = df['收盘价']
# 1. 数据预处理 def preprocess_data(df): # 处理缺失值(前向填充) df.fillna(method="ffill", inplace=True) # 添加时间特征(可选) df["year"] = df.index.year df["month"] = df.index.month df["day"] = df.index.day return df
df = preprocess_data(df)
# 2. 特征工程:创建滞后特征和趋势特征[7](@ref) def create_lag_features(df, target_col="收盘价", lags=[1, 2, 3, 5, 7]): """生成滞后特征和移动平均特征""" for lag in lags: df[f"lag_{lag}"] = df[target_col].shift(lag) # 添加7日滑动平均 df["rolling_mean_7"] = df[target_col].rolling(7).mean() return df.dropna()
df_lagged = create_lag_features(df)
# 3. 划分数据集(按时间顺序) features = df_lagged.drop("收盘价", axis=1) # 包含滞后特征和时间特征 target = df_lagged["收盘价"] split_idx = int(len(df_lagged) * 0.8) X_train, X_test = features.iloc[:split_idx], features.iloc[split_idx:] y_train, y_test = target.iloc[:split_idx], target.iloc[split_idx:]
# 4. 训练线性回归模型[1,6](@ref) model = LinearRegression() model.fit(X_train, y_train)
# 5. 预测与评估 y_pred = model.predict(X_test) rmse = np.sqrt(mean_squared_error(y_test, y_pred)) r2 = r2_score(y_test, y_pred) print(f"RMSE: {rmse:.2f}, R²: {r2:.2f}")
# 6. 模型系数分析 coef_df = pd.DataFrame({"特征": X_train.columns, "系数": model.coef_}) print("\n模型系数权重:") print(coef_df.sort_values("系数", ascending=False))
# 7. 可视化结果[1,7](@ref) plt.figure(figsize=(14, 7)) plt.plot(y_test.index, y_test, label="实际价格", color="blue", alpha=0.7) plt.plot(y_test.index, y_pred, label="预测价格", color="red", linestyle="--") plt.fill_between( y_test.index, y_pred - 1.96 * np.std(y_test - y_pred), y_pred + 1.96 * np.std(y_test - y_pred), color="pink", alpha=0.2, label="95%置信区间" ) plt.title(f"黄金期货价格预测 (RMSE={rmse:.2f}, R²={r2:.2f})") plt.xlabel("日期") plt.ylabel("收盘价") plt.legend() plt.grid(alpha=0.3) plt.show()
# 预测下一个时间点的收盘价 next_day_close = model.predict(features.iloc[[-2]]) # 用倒数第2天的数据预测 print("预测的下一个时间点的收盘价:", next_day_close) #实际是410.72
|