#个人练手 python代码实现自动采集入库建模训练预测福彩3D
import re
import requests
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
while True:
file_path = r'D:\shaoxiao\文档\机器学习\福彩3D.csv'
df = pd.read_csv(file_path)
bd_lastday = df['datetime'].iloc[-1]
url = f"https://6.17500.cn/?lottery=more&lotteryId=sd&page=1"
resp = requests.get(url, headers={'user-agent': 'Mozilla/5.0'})
lastday = re.compile(r'<ul class="lskj_list"> <li> <i>(.*?)</i>').findall(resp.text)[0]
bd_lastday = pd.to_datetime(bd_lastday)
lastday = pd.to_datetime(lastday)
if lastday > bd_lastday:
caijiriqi = (bd_lastday + pd.Timedelta(days=1)).strftime('%Y-%m-%d')
d1 = re.compile(rf'<i>{caijiriqi}</i> <font> 第 <b>\d+</b> 期 </font> <span class="red_ball">(.*?)</span> <span class="red_ball">(.*?)</span> <span class="red_ball">(.*?)</span>').findall(resp.text)
d1 = int(d1[0][0]),int(d1[0][1]),int(d1[0][2])
d1 = list(d1)
d1.insert(0,caijiriqi)
d1.insert(1,int(str(d1[-3])+str(d1[-2])+str(d1[-1])))
xieru = open(file_path,'a')
xieru.write(','.join(map(str,d1))+'\n')
xieru.close()
print(f'更新{caijiriqi}数据成功')
else:
print(f'数据已更新完毕,最新数据日期:{bd_lastday.strftime('%Y-%m-%d')}')
break
print("开始根据最新数据建模分析预测")
file_path = r'D:\shaoxiao\文档\机器学习\福彩3D.csv'
df = pd.read_csv(file_path)
def create_dataset(data_df, look_back=5):
data = data_df[['num1', 'num2', 'num3']].values
X, y = [], []
for i in range(len(data) - look_back):
X.append(data[i:i + look_back].flatten()) # 展平为一维向量
y.append(data[i + look_back]) # 下一期的三个号码
return np.array(X), np.array(y)
look_back = 20
X, y = create_dataset(df, look_back)
# ----------------------------
# 模型构建:分别预测每个位置的数字
# ----------------------------
model1 = RandomForestClassifier(n_estimators=100, random_state=42)
model2 = RandomForestClassifier(n_estimators=100, random_state=42)
model3 = RandomForestClassifier(n_estimators=100, random_state=42)
# 训练模型
model1.fit(X, y[:, 0])
model2.fit(X, y[:, 1])
model3.fit(X, y[:, 2])
# ----------------------------
# 模型评估
# ----------------------------
pred1 = model1.predict(X)
pred2 = model2.predict(X)
pred3 = model3.predict(X)
acc1 = accuracy_score(y[:, 0], pred1)
acc2 = accuracy_score(y[:, 1], pred2)
acc3 = accuracy_score(y[:, 2], pred3)
# ----------------------------
# 实际预测下一期号码
# ----------------------------
# 取最近几期的数据作为输入进行预测
last_data = df[['num1', 'num2', 'num3']].tail(look_back).values.flatten().reshape(1, -1)
def predict_top_n_combinations(model1, model2, model3, last_data, top_n=5):
# 获取每个位置上 0~9 的概率分布
proba1 = model1.predict_proba(last_data)[0]
proba2 = model2.predict_proba(last_data)[0]
proba3 = model3.predict_proba(last_data)[0]
# 获取每个位置概率最高的 top_n 数字索引
top_digits = lambda proba: np.argsort(proba)[-top_n:][::-1]
digits1 = top_digits(proba1)
digits2 = top_digits(proba2)
digits3 = top_digits(proba3)
# 生成所有组合并计算联合概率
all_combos = []
for d1 in digits1:
for d2 in digits2:
for d3 in digits3:
joint_prob = proba1[d1] * proba2[d2] * proba3[d3]
all_combos.append((f"{d1}{d2}{d3}", joint_prob))
# 按照联合概率排序,取前 top_n 组
all_combos.sort(key=lambda x: x[1], reverse=True)
top_combos = all_combos[:top_n]
return [combo[0] for combo in top_combos]
# 预测前5组号码
N = 5
top_5_predictions = predict_top_n_combinations(model1, model2, model3, last_data, top_n=N)
print(f'\n【{(bd_lastday + pd.Timedelta(days=1)).strftime('%Y-%m-%d')}预测结果 - 前{N}组】')
for i, combo in enumerate(top_5_predictions, 1):
print(f'第{i}组预测号码为: {combo}')5月25日随机森林模型预测福彩3D
本节3907字2025-05-29 20:43:14