#个人练手 python代码实现自动采集入库建模训练预测福彩3D import re import requests import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score while True: file_path = r'D:\shaoxiao\文档\机器学习\福彩3D.csv' df = pd.read_csv(file_path) bd_lastday = df['datetime'].iloc[-1] url = f"https://6.17500.cn/?lottery=more&lotteryId=sd&page=1" resp = requests.get(url, headers={'user-agent': 'Mozilla/5.0'}) lastday = re.compile(r'<ul class="lskj_list"> <li> <i>(.*?)</i>').findall(resp.text)[0] bd_lastday = pd.to_datetime(bd_lastday) lastday = pd.to_datetime(lastday) if lastday > bd_lastday: caijiriqi = (bd_lastday + pd.Timedelta(days=1)).strftime('%Y-%m-%d') d1 = re.compile(rf'<i>{caijiriqi}</i> <font> 第 <b>\d+</b> 期 </font> <span class="red_ball">(.*?)</span> <span class="red_ball">(.*?)</span> <span class="red_ball">(.*?)</span>').findall(resp.text) d1 = int(d1[0][0]),int(d1[0][1]),int(d1[0][2]) d1 = list(d1) d1.insert(0,caijiriqi) d1.insert(1,int(str(d1[-3])+str(d1[-2])+str(d1[-1]))) xieru = open(file_path,'a') xieru.write(','.join(map(str,d1))+'\n') xieru.close() print(f'更新{caijiriqi}数据成功') else: print(f'数据已更新完毕,最新数据日期:{bd_lastday.strftime('%Y-%m-%d')}') break print("开始根据最新数据建模分析预测") file_path = r'D:\shaoxiao\文档\机器学习\福彩3D.csv' df = pd.read_csv(file_path) def create_dataset(data_df, look_back=5): data = data_df[['num1', 'num2', 'num3']].values X, y = [], [] for i in range(len(data) - look_back): X.append(data[i:i + look_back].flatten()) # 展平为一维向量 y.append(data[i + look_back]) # 下一期的三个号码 return np.array(X), np.array(y) look_back = 20 X, y = create_dataset(df, look_back) # ---------------------------- # 模型构建:分别预测每个位置的数字 # ---------------------------- model1 = RandomForestClassifier(n_estimators=100, random_state=42) model2 = RandomForestClassifier(n_estimators=100, random_state=42) model3 = RandomForestClassifier(n_estimators=100, random_state=42) # 训练模型 model1.fit(X, y[:, 0]) model2.fit(X, y[:, 1]) model3.fit(X, y[:, 2]) # ---------------------------- # 模型评估 # ---------------------------- pred1 = model1.predict(X) pred2 = model2.predict(X) pred3 = model3.predict(X) acc1 = accuracy_score(y[:, 0], pred1) acc2 = accuracy_score(y[:, 1], pred2) acc3 = accuracy_score(y[:, 2], pred3) # ---------------------------- # 实际预测下一期号码 # ---------------------------- # 取最近几期的数据作为输入进行预测 last_data = df[['num1', 'num2', 'num3']].tail(look_back).values.flatten().reshape(1, -1) def predict_top_n_combinations(model1, model2, model3, last_data, top_n=5): # 获取每个位置上 0~9 的概率分布 proba1 = model1.predict_proba(last_data)[0] proba2 = model2.predict_proba(last_data)[0] proba3 = model3.predict_proba(last_data)[0] # 获取每个位置概率最高的 top_n 数字索引 top_digits = lambda proba: np.argsort(proba)[-top_n:][::-1] digits1 = top_digits(proba1) digits2 = top_digits(proba2) digits3 = top_digits(proba3) # 生成所有组合并计算联合概率 all_combos = [] for d1 in digits1: for d2 in digits2: for d3 in digits3: joint_prob = proba1[d1] * proba2[d2] * proba3[d3] all_combos.append((f"{d1}{d2}{d3}", joint_prob)) # 按照联合概率排序,取前 top_n 组 all_combos.sort(key=lambda x: x[1], reverse=True) top_combos = all_combos[:top_n] return [combo[0] for combo in top_combos] # 预测前5组号码 N = 5 top_5_predictions = predict_top_n_combinations(model1, model2, model3, last_data, top_n=N) print(f'\n【{(bd_lastday + pd.Timedelta(days=1)).strftime('%Y-%m-%d')}预测结果 - 前{N}组】') for i, combo in enumerate(top_5_predictions, 1): print(f'第{i}组预测号码为: {combo}')
5月25日随机森林模型预测福彩3D
本节3907字2025-05-29 20:43:14