-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
130 lines (97 loc) · 4.75 KB
/
Copy pathmodel.py
File metadata and controls
130 lines (97 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-
"""Machine Knight Season 2- Personalized Learning Paths.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1ZoUS9JyV59xUt-ygU0y7vdXvwi_woxdc
"""
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import jaccard_score
df= pd.read_csv("data.csv")
df
def train_stacking_model():
#Extract features (X) and target variable (y)
X = df.iloc[:, 7:34] #Features
y = df['Learning preference'] #Target variable
#Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#Base models
base_models = [
('random_forest', RandomForestClassifier(n_estimators=100, random_state=42)),
('gradient_boosting', GradientBoostingClassifier(n_estimators=100, random_state=42))
]
#Meta-model (final estimator)
meta_model = LogisticRegression()
#Stacking classifier
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model)
#Train the stacking model
stacking_model.fit(X_train, y_train)
#Make predictions on testing set
y_pred = stacking_model.predict(X_test)
#Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
#Evaluation metrics
print(f"Stacking Accuracy: {accuracy}")
print("Classification Report:")
print(report)
return stacking_model
model=train_stacking_model()
def cosine_similarity_test(set1, set2):
#Reshape the arrays to a 2D array (required by cosine_similarity)
binary_array1 = np.array([set1])
binary_array2 = np.array([set2])
#Calculate cosine similarity
similarity_matrix = cosine_similarity(binary_array1, binary_array2)
return similarity_matrix[0][0]
def jaccard_similarity(set1, set2):
intersection = len(set1.intersection(set2))
union = len(set1.union(set2))
return intersection / union if union > 0 else 0
def get_recommendations(student_row, num_recommendations=5):
student_id = student_row['ID']
student_activities = set(student_row.iloc[2:36][student_row.iloc[2:36] == 1].index)
similarity_scores = []
#Iterate over other students to calculate Jaccard similarity
for index, row in df.iterrows():
if row['ID'] != student_id:
other_student_activities = set(row.iloc[2:36][row.iloc[2:36] == 1].index)
similarity = jaccard_similarity(student_activities, other_student_activities)
similarity_scores.append((index, similarity))
#Sort by similarity scores and get the most similar students
similarity_scores.sort(key=lambda x: x[1], reverse=True)
similar_student_indices = [index for index, _ in similarity_scores[:num_recommendations]]
#Extract recommended activities from similar students
recommended_activities = []
recommended_students = []
for index in similar_student_indices:
recommended_activities.extend(df.iloc[index, 2:36][df.iloc[index, 2:36] == 1].index.tolist())
recommended_students.append(df.iloc[index]['ID'])
#Deduplicate and get unique recommended activities
unique_recommendations = list(set(recommended_activities))
return unique_recommendations, recommended_students
'''-----------EXAMPLE---------------------'''
#Get the recommended learning path for a specific student
row_of_data = df.loc[df['ID'] == '01HNCCSMC2GPAGY8VAZVWEK4J1'].iloc[0]
recommended_learning_paths, recommended_students = get_recommendations(row_of_data)
print(f"Recommended Learning Paths and Students for 01HNCCSMC2GPAGY8VAZVWEK4J1:\n{recommended_learning_paths}\n{recommended_students}")
#Initialize an empty list to store binary values
recommended_set = []
#Iterate through columns 7 to -2 of the DataFrame
for column_name in df.columns[7:-2]:
#Append 1 if the column name is in the recommended learning path set, otherwise append 0
recommended_set.append(1 if column_name in recommended_learning_paths else 0)
#Display the resulting binary values
print("Recommended Learning Path:", recommended_set)
#Get the actual learning path for the specific student
actual_set = df.loc[df['ID'] == '01HNCCSMC2GPAGY8VAZVWEK4J1'].iloc[:, 7:-2].values.flatten().tolist()
print("Actual Learning Path:", actual_set)
#Calculate cosine similarity
similarity_matrix = cosine_similarity_test(recommended_set, actual_set)
print(f"Cosine Similarity Score: {similarity_matrix}")