mkdir -p linux-ds-project/data/raw
mkdir -p linux-ds-project/data/processed
mkdir -p linux-ds-project/scripts
mkdir -p linux-ds-project/sql
mkdir -p linux-ds-project/notebooks
mkdir -p linux-ds-project/output_plots
cd linux-ds-projectCheck file size:
ls -lh data/raw/superstore_sales.csv
Count total rows:
wc -l data/raw/superstore_sales.csv
View header row:
head -n 1 data/raw/superstore_sales.csv
Find how many unique Categories exist:
cut -d',' -f15 data/raw/superstore_sales.csv | sort | uniq -c
Find all orders from "Kentucky":
grep "Kentucky" data/raw/superstore_sales.csv | head -n 5
Check for missing values (empty commas):
grep ",," data/raw/superstore_sales.csv | wc -l
Calculate Total Sales for the "West" Region using AWK:
(Column 18 is Sales, Column 13 is Region - verify with head)
awk -F',' '$13 == "West" {sum += $18} END {print sum}' data/raw/superstore_sales.csv
Sort the Top 5 largest sales transactions:
(Skips header, sorts by column 18 numerically reverse, takes top 5)
tail -n +2 data/raw/superstore_sales.csv | sort -t',' -k18 -nr | head -n 5
Extract emails or patterns (if applicable) using SED:
sed -n '/@/p' data/raw/superstore_sales.csv | head