""" Auther:少校 Time:2025/5/26 14:28 越努力,越幸运 """ # 数据来源:https://p2.ssl.qhimgs1.com/t024638e771ce2aa922.jpg import numpy as np play_yes = 9/14 play_no = 5/14 play = -(play_yes * np.log2(play_yes) + play_no * np.log2(play_no)) * (14 / 14) print("play:",play) print("===================分割线===================") hot_play_yes = 2/4 hot_play_no = 2/4 hot = -(hot_play_yes * np.log2(hot_play_yes) + hot_play_no * np.log2(hot_play_no)) * (4 / 14) mild_play_yes = 4/6 mild_play_no = 2/6 mild = -(mild_play_yes * np.log2(mild_play_yes) + mild_play_no * np.log2(mild_play_no)) * (6 / 14) cool_play_yes = 3/4 cool_play_no = 1/4 cool = -(cool_play_yes * np.log2(cool_play_yes) + cool_play_no * np.log2(cool_play_no)) * (4 / 14) temperature = hot + mild + cool high_play_yes = 3/7 high_play_no = 4/7 high = -(high_play_yes * np.log2(high_play_yes) + high_play_no * np.log2(high_play_no)) * (7 / 14) normal_play_yes = 6/7 normal_play_no = 1/7 normal = -(normal_play_yes * np.log2(normal_play_yes) + normal_play_no * np.log2(normal_play_no)) * (7 / 14) humidity = high + normal windy_false_play_yes = 6/8 windy_false_play_no = 2/8 windy_false = -(windy_false_play_yes * np.log2(windy_false_play_yes) + windy_false_play_no * np.log2(windy_false_play_no)) * (8 / 14) windy_true_play_yes = 3/6 windy_true_play_no = 3/6 windy_true = -(windy_true_play_yes * np.log2(windy_true_play_yes) + windy_true_play_no * np.log2(windy_true_play_no)) * (6 / 14) windy = windy_false + windy_true sunny_play_yes = 2/5 sunny_play_no = 3/5 sunny = -(sunny_play_yes * np.log2(sunny_play_yes) + sunny_play_no * np.log2(sunny_play_no)) * (5 / 14) overcast_play_yes = 4/4 overcast_play_no = 0/4 overcast = 0 rain_play_yes = 2/5 rain_play_no = 3/5 rain = -(rain_play_yes * np.log2(rain_play_yes) + rain_play_no * np.log2(rain_play_no)) * (5 / 14) outlook= sunny+overcast+rain print("outlook 熵增益:",play-outlook) print("temperature 熵增益:",play-temperature) print("humidity 熵增益:",play-humidity) print("windy 熵增益:",play-windy) #运行结果 """ play: 0.9402859586706311 ===================分割线=================== outlook 熵增益: 0.24674981977443933 temperature 熵增益: 0.02922256565895487 humidity 熵增益: 0.15183550136234159 windy 熵增益: 0.04812703040826949 """ # 数据来源: 账号是否真实_yes = 7/10 账号是否真实_no = 3/10 账号是否真实 = -(账号是否真实_yes*np.log2(账号是否真实_yes) + 账号是否真实_no*np.log2(账号是否真实_no)) print("账号是否真实:",账号是否真实) print("===================分割线===================") 日志密度_s_yes = 1/3 日志密度_s_no = 2/3 日志密度_s = -(日志密度_s_yes*np.log2(日志密度_s_yes) + 日志密度_s_no*np.log2(日志密度_s_no)) * (3/10) 日志密度_m_yes = 3/4 日志密度_m_no = 1/4 日志密度_m = -(日志密度_m_yes*np.log2(日志密度_m_yes) + 日志密度_m_no*np.log2(日志密度_m_no)) * (4/10) 日志密度_l_yes = 3/3 日志密度_l_no = 0/3 日志密度_l = 0 日志密度 = 日志密度_s + 日志密度_m + 日志密度_l print("日志密度 熵增益:",账号是否真实-日志密度) 好友密度_s_yes = 1/4 好友密度_s_no = 3/4 好友密度_s = -(好友密度_s_yes*np.log2(好友密度_s_yes) + 好友密度_s_no*np.log2(好友密度_s_no)) * (4/10) 好友密度_l = 0 好友密度_m = 0 好友密度 = 好友密度_s + 好友密度_l + 好友密度_m print("好友密度 熵增益:",账号是否真实-好友密度) 真实头像_yes_yes = 4/5 真实头像_yes_no = 1/5 真实头像_yes = -(真实头像_yes_yes*np.log2(真实头像_yes_yes) + 真实头像_yes_no*np.log2(真实头像_yes_no)) * (5/10) 真实头像_no_yes = 3/5 真实头像_no_no = 2/5 真实头像_no = -(真实头像_no_yes*np.log2(真实头像_no_yes) + 真实头像_no_no*np.log2(真实头像_no_no)) * (5/10) 真实头像 = 真实头像_yes + 真实头像_no print("真实头像 熵增益:",账号是否真实-真实头像) #运行结果 """ 账号是否真实: 0.8812908992306927 ===================分割线=================== 日志密度 熵增益: 0.2812908992306927 好友密度 熵增益: 0.5567796494470396 真实头像 熵增益: 0.034851554559677256 """
10.决策树熵增益计算练习
本节3556字2025-05-26 17:49:08