-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgocerry project.R
More file actions
224 lines (198 loc) · 9.85 KB
/
Copy pathgocerry project.R
File metadata and controls
224 lines (198 loc) · 9.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#Final update in Project
library(shiny)
library(ggplot2)
library(dplyr)# duplicated&& group by
library(arules)
library(readr)
# Data cleaning function
clean_data <- function(df) {
# Remove duplicates
df <- distinct(df)
# Remove rows with any NA values
df <- na.omit(df)
# Check and convert data types
for (col in names(df)) {
if (col == "count" && !is.integer(df[[col]])) {
df[[col]] <- as.integer(df[[col]])
}
if (col == "total" && !is.numeric(df[[col]])) {
df[[col]] <- as.numeric(df[[col]])
}
if (col == "rnd" && !is.integer(df[[col]])) {
df[[col]] <- as.integer(df[[col]])
}
if (col == "age" && !is.integer(df[[col]])) {
df[[col]] <- as.integer(df[[col]])
}
if (col == "items" && !is.character(df[[col]])) {
df[[col]] <- as.character(df[[col]])
}
if (col == "customer" && !is.character(df[[col]])) {
df[[col]] <- as.character(df[[col]])
}
if (col == "city" && !is.character(df[[col]])) {
df[[col]] <- as.character(df[[col]])
}
}
return(df) # This is cleaned Data
}
# Define UI--> defines the appearance of the application
ui <- fluidPage(#وهذا يعني ان العناصر ستتمد لملا الشاشه تلقائيا
titlePanel("Dataset Analysis App"),#title of the program
sidebarLayout(#بنظم الواجهه الي هتظهر لما ارن الكود
sidebarPanel(
# here we take the input
fileInput("file_path", "Upload CSV File:", accept = c(".csv")),
numericInput("numofcls", "Number of Groups (2-4):",value=2, min = 2, max = 4, step = 1),
numericInput("minsupp", "Minimum Support:", value = 0.01, min = 0, max = 1, step = 0.01),
numericInput("minconf", "Minimum Confidence:", value = 0.5, min = 0, max = 1, step = 0.01),
actionButton("process", "Process Data"),
actionButton("run_button", "Run Apriori"),
width = 3
),
mainPanel(#output appears here
tabsetPanel(
tabPanel("Cleaned Data", tableOutput("cleaned_data")),#panel:appears top of the program,when click here appears the cleaned data in table(table output)
tabPanel("Clustered Data", plotOutput("plot")),#output as plot
tabPanel("Clustered Data Summary", tableOutput("cluster_summary")),#output as table
tabPanel("Visualization",
selectInput(inputId= "tools", label = "Choose a graph to be displayed",
choices = c("Compare cash and credit totals." = "piechart",
"Display each city total spending" = "barplot",
"Comparing each age and sum of total spending"="plot",
"Distribution of total spending"="boxplot",
"Dashboard"="dashboard")),
plotOutput("visualization")),#output of this is->visualization graphs
tabPanel("Apriori Association Rule Mining",
dataTableOutput("rules_table"),
plotOutput("relative_plot"),
plotOutput("absolute_plot")
))
)#C:/Users/HP/OneDrive/Documents/Data_set_Project.csv
)
)
# Define server logic
server <- function(input, output) {
cleaned_data <- reactiveVal(NULL)
df <- reactiveVal(NULL)
observeEvent(input$process, {#processاضغط علي الزر الي عملته وهو
req(input$file_path)
df_data <- read.csv(input$file_path$datapath)
# Clean data
df_cleaned <- clean_data(df_data)
output$cleaned_data <- renderTable({#call front from ui
df_cleaned
})
# Perform KMeans clustering
kmeanfn <- function(df, nclusters) {
processed_data <- df %>% #المتغير الي قبل العلامه بيحصل عليه التعديلات الي بعدالعلامه
group_by(customer,age) %>% #group by-> makes new data frame
summarise(avg_total = sum(total))
#Group by w summarize eletnen by3mlo el dataframe ...w group by sh8ala 3la awl 2 columns w summarize sh8ala 3la total avg Group by btgm3 el duplicates w summerize bt sum el total bta3 ay 7aga etkrret w gm3naha b group by
clusterdata <- data.frame(age = processed_data$age, total = processed_data$avg_total)
cluster <- kmeans(clusterdata, centers = nclusters)
newD <- cbind.data.frame(processed_data, (cluster$cluster))#second cluster is default lara&&mariam
names(newD) <- c("name", "age", "total","Cluster_Group")
return(newD)
}
clustered_data <- kmeanfn(df_cleaned, input$numofcls)# return our table with groups
output$plot <- renderPlot({
ggplot(clustered_data, aes(x = age, y = total, color = Cluster_Group)) + #بيقسم الالوان علي حسب عدد الجروبس
geom_point(size=5) +#("+" is syntax)
labs(title = "KMeans Clustering Results")
})
output$cluster_summary <- renderTable({#lara????
clustered_data
})
output$visualization <- renderPlot({
if(input$tools=='barplot'){
df_grp_city = df_cleaned %>% group_by(city) %>%
summarise(total_spending = sum(total),
.groups = 'drop')
df_grp_city = df_grp_city[order(df_grp_city$total_spending,decreasing=TRUE),]#mariam????decreasing
barplot(height=df_grp_city$total_spending,
name=df_grp_city$city,
col="violetred3",
main="display each city and its total spending",
xlab="cities",
ylab="total spending",
las=2, #بتخليها بالعرض cities
cex.names = 0.7)# resize x axis names
}
else if(input$tools=="piechart"){
x = table(df_cleaned$paymentType)#counting number of credit and cash
percentage = paste0(round(100*x/sum(x)),"%")
pie(x,percentage,main=" payment type percentage",col=c("purple","pink"))
legend("bottomright",legend=c("cash","credit"),fill=c("purple","pink"))
} else if(input$tools=="plot"){
df_grp_age = df_cleaned %>% group_by(age) %>%
summarise(total_spending = sum(total),
.groups = 'drop')
ggplot(data=df_grp_age,mapping=aes(x=age,y=total_spending))+
geom_point(size=5) +
geom_line(colour="hotpink")
} else if(input$tools=="boxplot"){
boxplot(x= df_cleaned$total , main="Distribution of total spending" , xlab="total spending",col="mediumaquamarine")
}
else if(input$tools=="dashboard"){
par(mfrow=c(2,2))
x = table(df_cleaned$paymentType)#counting number of credit and cash
percentage = paste0(round(100*x/sum(x)),"%")
pie(x,percentage,main=" payment type percentage",col=c("paleturquoise2","violetred3"))
legend("bottomright",legend=c("cash","credit"),fill=c("paleturquoise2","violetred3"))
#_____________________________________________________________________________________________________________________
df_grp_age = df_cleaned %>% group_by(age) %>%
summarise(total_spending = sum(total),
.groups = 'drop')
plot(x=df_grp_age$age,y=df_grp_age$total_spending,
main = "comparing each age with the sum of total spending",
xlab="age",ylab = "total spending of a specific generation",col="maroon4")
#_____________________________________________________________________________________________________________________
boxplot(x= df_cleaned$total , main="Distribution of total spending" , xlab="total spending",col="paleturquoise2")
#_____________________________________________________________________________________________________________________
df_grp_city = df_cleaned %>% group_by(city) %>%
summarise(total_spending = sum(total),
.groups = 'drop')
df_grp_city = df_grp_city[order(df_grp_city$total_spending,decreasing = TRUE),]
barplot(height=df_grp_city$total_spending,
names=df_grp_city$city,
col="violetred3",
main="display each city and its total spending",
xlab="cities",
ylab="total spending",
las=2, #بتخلي الارقام الموجوده علي ص بالعرض
cex.names = 0.4)# resize x axis names
}})
# Function to read and process the dataset for Apriori
dataset <- eventReactive(input$run_button, {
# df <- read.csv(input$file_path) #Read the file
#df_cleaned <- clean_data(df())
transaction <- strsplit(df_cleaned$items, ",")#الكلمه وكومه عشان تتحول لترانسيكشن
trans <- as(transaction, "transactions")
return(trans)
})
# Run Apriori and display rules
apriori_rules <- eventReactive(input$run_button, {
trans <- dataset()#اوصل للبيانات بسهوله بيخليني اشوفها
rules <- apriori(trans, parameter = list(supp = input$minsupp, conf = input$minconf, minlen = 2))
return(rules)
})
# Output rules as a table
output$rules_table <- renderDataTable({
rules <- apriori_rules()
inspect(rules)
})
# Plot relative item frequencies (numerical)
output$relative_plot <- renderPlot({
trans <- dataset()
itemFrequencyPlot(trans, topN = 5, type = "relative",col='lightblue1')
})#اعلي 5 عليهم طلب اكتر
# Plot absolute item frequencies
output$absolute_plot <- renderPlot({
trans <- dataset()
itemFrequencyPlot(trans, topN = 5, type = "absolute",col="plum3")
})
})#
}
# Run the application
shinyApp(ui = ui, server=server)