-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.py
More file actions
169 lines (132 loc) · 5.91 KB
/
evaluate.py
File metadata and controls
169 lines (132 loc) · 5.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import numpy as np
from tensorflow.keras.models import load_model
import pandas as pd
import autokeras as ak
from sklearn.metrics import roc_curve, auc, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
model_path = 'Model_path'
test_data_path = 'test_data_path'
Run_ID = 'Run_ID'
# Load and prepare your unseen test data
# This assumes the last column is the label, the second to last is the data type
test_data = pd.read_csv(test_data_path, header = None)
X_test = test_data.iloc[:, :-2].values # Features
y_test = test_data.iloc[:, -2].values # Labels
data_types = test_data.iloc[:, -1].values # Data types
# Real data Path !
path = 'Real_data_path'
Test_data = np.load(path)
X = Test_data[:, :-1] # Features: All columns except the last
y = Test_data[:, -1] # Labels: The last column
# Define the list of example indices
#example_no = list(range(45))
example_no = [0,1,2,3,4,10,11,12,13,14,20,
21,22,23,24,40,41,42,43,44,50,51,52,53,54]
model = load_model(model_path)
# Initialize a list to store the results
results = []
# Iterate over the example indices and make predictions
for index in example_no:
example_data = X[index].reshape(1, -1)
probability = model.predict(example_data)
prediction = (probability > 0.5).astype(int)[0][0]
confidence = np.max(probability)
# Format confidence as a decimal string
formatted_confidence = f"{confidence:.3f}" # Format confidence to 3 decimal places
results.append({
"Index": index,
"Binary Prediction": prediction,
"Confidence": formatted_confidence, # Use formatted confidence
"Actual Classification": y[index]
})
# Convert the results list to a DataFrame for neat presentation
results_df = pd.DataFrame(results)
results_df['Prediction Correct?'] = results_df['Binary Prediction'] == results_df['Actual Classification']
correct_predictions_percentage = results_df['Prediction Correct?'].mean() * 100
# Print the updated results DataFrame with formatted confidence
print(results_df.to_string(index=False))
# Print the percentage of correct predictions
print(f"Percentage of correct predictions: {correct_predictions_percentage:.2f}%")
# Make predictions on the test data
y_pred_prob = model.predict(X_test).ravel()
# Calculate the ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)
# Generate predicted labels based on the probability threshold of 0.5
y_pred = (y_pred_prob > 0.5).astype(int)
# Create a figure for the ROC curve and overall confusion matrix
fig, ax = plt.subplots(1, 2, figsize=(16, 6))
# Plot ROC curve on the first subplot
ax[0].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
ax[0].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
ax[0].set_xlim([0.0, 1.0])
ax[0].set_ylim([0.0, 1.05])
ax[0].set_xlabel('False Positive Rate')
ax[0].set_ylabel('True Positive Rate')
ax[0].set_title('Receiver Operating Characteristic (ROC) Curve')
ax[0].legend(loc="lower right")
# Plot overall confusion matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax[1])
ax[1].set_title('Overall Confusion Matrix')
ax[1].set_ylabel('Actual label')
ax[1].set_xlabel('Predicted label')
# Adjust layout for overall plots
plt.tight_layout()
# Save the figure containing both plots
plt.savefig(f'ML_stuff/figs/overall_roc_cm_{Run_ID}.png')
plt.close()
# Define function to safely predict if data is present
def safe_predict(model, X):
if X.size == 0:
return np.array([]) # Return an empty array if no data is present
else:
return model.predict(X).ravel()
# Split the test data based on data types
X_test_target = X_test[data_types == 2]
X_test_mix = X_test[data_types == 3]
X_test_false_alarm = X_test[data_types == 4]
# Predict probabilities for each subset using the safe predict function
predictions_target = safe_predict(model, X_test_target)
predictions_mix = safe_predict(model, X_test_mix)
predictions_false_alarm = safe_predict(model, X_test_false_alarm)
# Define actual labels
actual_target = np.ones(len(predictions_target))
actual_mix = np.ones(len(predictions_mix))
actual_false_alarm = np.zeros(len(predictions_false_alarm))
# Convert probabilities to binary predictions, safely handle empty arrays
binary_predict = lambda p: (p > 0.5).astype(int)
predictions_target = binary_predict(predictions_target)
predictions_mix = binary_predict(predictions_mix)
predictions_false_alarm = binary_predict(predictions_false_alarm)
# Combine actuals and predictions into a DataFrame
results = pd.DataFrame({
'Actual': np.concatenate([actual_target, actual_mix, actual_false_alarm]),
'Predicted': np.concatenate([predictions_target, predictions_mix, predictions_false_alarm]),
'Data Type': ['Target'] * len(actual_target) + ['Mix'] * len(actual_mix) + ['False Alarm'] * len(actual_false_alarm)
})
# Add an 'Outcome' column
results['Outcome'] = np.where(results['Actual'] == results['Predicted'], 'Correct', 'Incorrect')
# Calculate the number of correct predictions
results['Correct'] = results['Actual'] == results['Predicted']
# Calculate overall accuracy
accuracy = results['Correct'].mean() * 100
# Print the accuracy
print(f"Accuracy: {accuracy:.2f}%")
# Pivot the DataFrame to create a 3x2 matrix (3 data types x Correct/Incorrect outcome)
matrix = pd.pivot_table(results, index='Outcome', columns='Data Type', aggfunc='size', fill_value=0)
# Convert counts to percentages
for col in matrix.columns:
matrix[col] = (matrix[col] / matrix[col].sum()) * 100
# Plot the matrix
plt.figure(figsize=(9, 6))
sns.heatmap(matrix, annot=True, fmt=".2f", cmap="Greens", cbar_kws={'label': 'Percentage (%)'})
plt.title('Classification Results by Data Type and Outcome (Percentages)')
plt.ylabel('Outcome')
plt.xlabel('Data Type')
plt.tight_layout() # Adjust layout to prevent clipping of ylabel
plt.show()
plt.savefig(f'ML_stuff/figs/cm_data_type_{Run_ID}.png')
plt.close()
print("All plots have been saved.")