-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwebad-optimization_upper-confidence-bound_reinforcement-learning.py
More file actions
101 lines (75 loc) · 2.94 KB
/
webad-optimization_upper-confidence-bound_reinforcement-learning.py
File metadata and controls
101 lines (75 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# ==========================================
# Web Ad Optimization using UCB Algorithm
# Reinforcement Learning Project
# ==========================================
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
# ==========================================
# Step 1: Load Dataset
# ==========================================
# If using Google Colab (for local upload)
from google.colab import files
uploaded = files.upload()
# Read dataset
dataset = pd.read_csv('dataset.csv')
# Display dataset info
print("Dataset Shape:", dataset.shape)
print("\nFirst 5 Rows:\n", dataset.head())
# ==========================================
# Step 2: Initialize Variables for UCB
# ==========================================
observations = 10000 # Total number of rounds/users
no_of_ads = 10 # Total number of ads
ads_selected = [] # Stores selected ads at each round
numbers_of_selections = [0] * no_of_ads # Count of ad selections
sums_of_rewards = [0] * no_of_ads # Total reward per ad
total_reward = 0 # Total accumulated reward
# ==========================================
# Step 3: Implement UCB Algorithm
# ==========================================
for n in range(0, observations):
ad = 0
max_upper_bound = 0
# Loop through each ad to calculate UCB
for i in range(0, no_of_ads):
if numbers_of_selections[i] > 0:
# Calculate average reward
average_reward = sums_of_rewards[i] / numbers_of_selections[i]
# Calculate confidence interval (exploration term)
delta_i = math.sqrt((3/2) * math.log(n + 1) / numbers_of_selections[i])
# Upper Confidence Bound
upper_bound = average_reward + delta_i
else:
# Ensure each ad is selected at least once
upper_bound = 1e400
# Select the ad with maximum UCB
if upper_bound > max_upper_bound:
max_upper_bound = upper_bound
ad = i
# Append selected ad
ads_selected.append(ad)
# Update selection count
numbers_of_selections[ad] += 1
# Get reward from dataset (0 or 1)
reward = dataset.values[n, ad]
# Update rewards
sums_of_rewards[ad] += reward
total_reward += reward
# ==========================================
# Step 4: Results
# ==========================================
print("\nRewards by Ads:", sums_of_rewards)
print("Total Reward (UCB):", total_reward)
print("\nAds selected at each round:\n", ads_selected)
# ==========================================
# Step 5: Visualization
# ==========================================
plt.figure(figsize=(8, 5)) # Medium size for screenshot
plt.hist(ads_selected, bins=no_of_ads, edgecolor='black')
plt.title('Histogram of Ad Selections')
plt.xlabel('Ads')
plt.ylabel('Number of times each ad was selected')
plt.show()