import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from deap import base, creator, tools, algorithms
import random
import matplotlib.font_manager as fm

warnings.filterwarnings("ignore")

plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置字体为黑体
plt.rcParams['axes.unicode_minus'] = False  # 解决坐标轴负号显示问题

data = pd.read_excel('堤防训练集数据0.xlsx')

inputs = data.iloc[:, :-1].values  # 输入数据（特征）
output = data.iloc[:, -1].values.astype(int)  # 输出标签（确保为整数类型）

scaler = MinMaxScaler()
inputs = scaler.fit_transform(inputs)

train_inputs, val_inputs, train_output, val_output = train_test_split(
    inputs, output, test_size=0.2, random_state=20, shuffle=True)

def evaluate(individual):
    hidden_layer_1 = max(1, int(abs(individual[0])))
    hidden_layer_2 = max(1, int(abs(individual[1])))
    learning_rate_init = max(0.0001, float(abs(individual[2])))
    batch_size = min(200, train_inputs.shape[0])
    clf = MLPClassifier(hidden_layer_sizes=(hidden_layer_1, hidden_layer_2),
                        learning_rate_init=learning_rate_init,
                        max_iter=100,  # 初始迭代次数
                        solver='sgd',
                        random_state=42,
                        batch_size=batch_size,
                        tol=1e-6,
                        n_iter_no_change=10)

    clf.fit(train_inputs, train_output)
    val_pred = clf.predict(val_inputs)
    accuracy = accuracy_score(val_output, val_pred)
    return (accuracy,)

def custom_mutate(individual, indpb=0.2):
    for i in range(len(individual)):
        if random.random() < indpb:
            if i < 2:  # 隐含层神经元数量
                individual[i] = int(max(1, individual[i] + random.randint(-10, 10)))
            else:  # 学习率
                individual[i] = float(max(0.0001, min(0.1, individual[i] + random.uniform(-0.01, 0.01))))
    return individual,

POP_SIZE = 20  # 种群大小
N_GEN = 10  # 迭代次数

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_int", random.randint, 1, 100)  # 隐含层神经元数量，确保非零
toolbox.register("attr_float", random.uniform, 0.0001, 0.1)  # 学习率
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_int, toolbox.attr_int, toolbox.attr_float), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", custom_mutate, indpb=0.2)  # 使用自定义的变异函数
toolbox.register("select", tools.selTournament, tournsize=3)

population = toolbox.population(n=POP_SIZE)

for gen in range(N_GEN):
    offspring = toolbox.select(population, len(population))
    offspring = list(map(toolbox.clone, offspring))

    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < 0.5:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if random.random() < 0.2:
            toolbox.mutate(mutant)
            del mutant.fitness.values

    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    population[:] = offspring

best_ind = tools.selBest(population, 1)[0]
print("最佳个体的参数:", best_ind)
print("验证集准确率:", best_ind.fitness.values[0])

best_hidden_layer_1 = int(best_ind[0])
best_hidden_layer_2 = int(best_ind[1])
best_learning_rate_init = best_ind[2]

best_clf = MLPClassifier(hidden_layer_sizes=(best_hidden_layer_1, best_hidden_layer_2),
                         learning_rate_init=best_learning_rate_init,
                         max_iter=300,  # 最终训练时使用更多迭代次数
                         solver='sgd',
                         random_state=42,
                         batch_size=min(200, train_inputs.shape[0]),
                         tol=1e-6,
                         n_iter_no_change=10)
best_clf.fit(train_inputs, train_output)

# 最终评估
train_pred = best_clf.predict(train_inputs)
val_pred = best_clf.predict(val_inputs)

# 计算并输出训练集和验证集的准确率、召回率、精度和F1分数
metrics = ['准确率', '召回率', '精度', 'F1分数']
train_metrics = [
    accuracy_score(train_output, train_pred),
    recall_score(train_output, train_pred, average='macro'),
    precision_score(train_output, train_pred, average='macro'),
    f1_score(train_output, train_pred, average='macro')
]

val_metrics = [
    accuracy_score(val_output, val_pred),
    recall_score(val_output, val_pred, average='macro'),
    precision_score(val_output, val_pred, average='macro'),
    f1_score(val_output, val_pred, average='macro')
]

print("\n训练集指标：")
for metric, value in zip(metrics, train_metrics):
    print(f'{metric}: {value:.4f}')

print("\n验证集指标：")
for metric, value in zip(metrics, val_metrics):
    print(f'{metric}: {value:.4f}')

train_cm = confusion_matrix(train_output, train_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(train_cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title('训练集混淆矩阵')
plt.xlabel('预测类别')
plt.ylabel('实际类别')
plt.show()

val_cm = confusion_matrix(val_output, val_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(val_cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title('验证集混淆矩阵')
plt.xlabel('预测类别')
plt.ylabel('实际类别')
plt.show()

train_results = pd.DataFrame({
    '真实值': train_output,
    '预测值': train_pred
})

val_results = pd.DataFrame({
    '真实值': val_output,
    '预测值': val_pred
})

output_file = '预测结果.xlsx'  # 输出文件名

with pd.ExcelWriter(output_file) as writer:
    train_results.to_excel(writer, sheet_name='训练集结果', index=False)
    val_results.to_excel(writer, sheet_name='验证集结果', index=False)

print(f"\n训练集和验证集的真实值与预测值已成功导出到 {output_file}")