ML-Lab/ok.py at main · Sanketmandwal/ML-Lab · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pandas as pd
import numpy as np

# Set the number of samples for the dataset
n_samples = 1000

# Set a random seed to ensure the results are the same every time
np.random.seed(42)

# --- Generate the Data ---

# 1. Generate the feature 'X'
X = 2 - 3 * np.random.normal(0, 1, n_samples)

# 2. Generate the target 'y' using a degree-2 polynomial relationship plus some random noise
# The underlying relationship is y = X - 2*(X^2)
y = X - 2 * (X ** 2) + np.random.normal(-3, 3, n_samples)

# --- Create and Save the DataFrame ---

# 3. Create a pandas DataFrame to hold the data
df = pd.DataFrame({'X': X, 'y': y})

# 4. Define the filename
file_name = 'polynomialdata.csv'

# 5. Save the DataFrame to a CSV file
df.to_csv(file_name, index=False)

print(f"Successfully created the file '{file_name}' with {len(df)} entries.")
print("\n--- First 5 Rows of the Dataset ---")
print(df.head())