-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathBat Algorithm Feature Selection.py
More file actions
125 lines (103 loc) · 4.89 KB
/
Bat Algorithm Feature Selection.py
File metadata and controls
125 lines (103 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
# Bat Algorithm Parameters
POPULATION_SIZE = 3
MAX_GENERATIONS = 30
LOUDNESS = 0.5
PULSE_RATE = 0.5
FREQ_MIN = 0
FREQ_MAX = 2
NUM_SAMPLES = 500
NUM_FEATURES = 10
NUM_CLASSES = 3
NUM_SELECTED_FEATURES = 5
# Generate random dataset
def generate_random_data(num_samples, num_features, num_classes):
X = np.random.rand(num_samples, num_features)
y = np.random.randint(0, num_classes, num_samples)
return X, y
# Cost function
def cost_function(X, y, selected_features):
if np.sum(selected_features) == 0:
return float('inf')
X_selected = X[:, selected_features == 1]
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
return 1 - accuracy_score(y_test, y_pred) # Minimize error
# Initialize population
def initialize_population(size, num_features):
population = np.zeros((size, num_features))
for i in range(size):
selected_indices = np.random.choice(num_features, NUM_SELECTED_FEATURES, replace=False)
population[i, selected_indices] = 1
return population
# Update position using Bat Algorithm
def update_position(individual, velocity, frequency, best_individual):
velocity += (individual - best_individual) * frequency
new_position = individual + velocity
return np.clip(np.round(new_position), 0, 1)
# Bat Algorithm for Feature Selection
def bat_algorithm(X, y):
num_features = X.shape[1]
population = initialize_population(POPULATION_SIZE, num_features)
velocities = np.zeros((POPULATION_SIZE, num_features))
cost = np.array([cost_function(X, y, individual) for individual in population])
best_individual = population[np.argmin(cost)]
best_cost = np.min(cost)
costs_over_time = []
for generation in range(MAX_GENERATIONS):
for i in range(POPULATION_SIZE):
frequency = FREQ_MIN + (FREQ_MAX - FREQ_MIN) * random.random()
velocities[i] = update_position(population[i], velocities[i], frequency, best_individual)
candidate = update_position(population[i], velocities[i], frequency, best_individual)
if random.random() > PULSE_RATE:
candidate = best_individual.copy()
mutation_index = random.randint(0, num_features - 1)
candidate[mutation_index] = 1 - candidate[mutation_index]
candidate_cost = cost_function(X, y, candidate)
if candidate_cost < cost[i] and random.random() < LOUDNESS:
population[i] = candidate
cost[i] = candidate_cost
if candidate_cost < best_cost:
best_individual = candidate
best_cost = candidate_cost
costs_over_time.append(best_cost)
print(f"Generation {generation + 1}, Best Cost: {best_cost:.4f}")
return best_individual, 1 - best_cost, costs_over_time
def main():
X, y = generate_random_data(NUM_SAMPLES, NUM_FEATURES, NUM_CLASSES)
# Split data for original accuracy evaluation
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(X, y, test_size=0.3, random_state=42)
model_full = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model_full.fit(X_train_full, y_train_full)
y_pred_full = model_full.predict(X_test_full)
original_accuracy = accuracy_score(y_test_full, y_pred_full)
best_features, best_accuracy, costs_over_time = bat_algorithm(X, y)
# Evaluate accuracy with selected features
X_selected = X[:, best_features == 1]
X_train_selected, X_test_selected, y_train_selected, y_test_selected = train_test_split(X_selected, y, test_size=0.3, random_state=42)
model_selected = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model_selected.fit(X_train_selected, y_train_selected)
y_pred_selected = model_selected.predict(X_test_selected)
selected_accuracy = accuracy_score(y_test_selected, y_pred_selected)
print("\nOriginal Accuracy with All Features:", original_accuracy)
print("Selected Features (1=selected, 0=not selected):", best_features)
print("Accuracy with Selected Features:", selected_accuracy)
# Count selected features
print("Number of Selected Features:", np.sum(best_features))
# Plot optimization progress
plt.figure(figsize=(10, 6))
plt.plot(costs_over_time, marker='o')
plt.title("Bat Algorithm Optimization Progress")
plt.xlabel("Generation")
plt.ylabel("Best Cost")
plt.grid()
plt.show()
if __name__ == "__main__":
main()