diff --git a/.github/workflows/restore-staging-mongodb.yml b/.github/workflows/restore-staging-mongodb.yml new file mode 100644 index 0000000..0485757 --- /dev/null +++ b/.github/workflows/restore-staging-mongodb.yml @@ -0,0 +1,145 @@ +name: Restore Staging MongoDB from Production + +on: + workflow_dispatch: + inputs: + anonymize_data: + description: 'Anonymize PII data after restore' + required: true + type: boolean + default: true + skip_snapshot: + description: 'Skip snapshot creation (use existing volume)' + required: false + type: boolean + default: false + +env: + AWS_REGION: us-west-2 + +jobs: + restore-mongodb: + name: Clone Production EBS to Staging + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Verify AWS authentication + run: | + aws sts get-caller-identity + echo "✅ AWS authentication successful" + + - name: Install Ansible + run: | + sudo apt-get update + sudo apt-get install -y ansible + ansible --version + + - name: Verify prerequisites + run: | + echo "Checking AWS CLI..." + aws --version + + echo "Checking Python..." + python3 --version + + echo "✅ All prerequisites met" + + - name: Run MongoDB restore playbook + working-directory: ansible/playbook + run: | + ansible-playbook -i staging/inventory mongodb-restore.yml \ + -e "anonymize_data=${{ github.event.inputs.anonymize_data }}" \ + -v + timeout-minutes: 30 + + - name: Get restore summary + if: success() + id: summary + run: | + echo "restore_complete=true" >> $GITHUB_OUTPUT + echo "timestamp=$(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_OUTPUT + + - name: Post results to summary + if: success() + run: | + cat >> $GITHUB_STEP_SUMMARY << 'EOF' + ## ✅ MongoDB Staging Restore Completed + + **Timestamp:** ${{ steps.summary.outputs.timestamp }} + + **Configuration:** + - Source: Production MongoDB EBS Volume + - Destination: Staging MongoDB Instance + - Region: ${{ env.AWS_REGION }} + - Data Anonymized: ${{ github.event.inputs.anonymize_data }} + + **Next Steps:** + 1. Verify MongoDB is running on staging + 2. Test application connectivity + 3. Verify data integrity + 4. Clean up old volumes if needed + + **Useful Commands:** + ```bash + # SSH into staging + ssh ec2-user@ + + # Check MongoDB status + sudo systemctl status mongod + + # Verify data + mongosh + use userdb + db.users.countDocuments() + ``` + EOF + + - name: Handle failure + if: failure() + run: | + cat >> $GITHUB_STEP_SUMMARY << 'EOF' + ## ❌ MongoDB Staging Restore Failed + + Please check the workflow logs for detailed error information. + + **Common Issues:** + - AWS permissions insufficient + - SSM Agent not running on instances + - MongoDB not installed on staging + - Network connectivity issues + + **Troubleshooting:** + 1. Check AWS IAM permissions + 2. Verify EC2 instance tags + 3. Check SSM Agent status + 4. Review Ansible output logs + EOF + + notify: + name: Send Notification + runs-on: ubuntu-latest + needs: restore-mongodb + if: always() + + steps: + - name: Notify on success + if: needs.restore-mongodb.result == 'success' + run: | + echo "✅ MongoDB staging restore completed successfully" + # Add your notification logic here (Slack, Teams, email, etc.) + + - name: Notify on failure + if: needs.restore-mongodb.result == 'failure' + run: | + echo "❌ MongoDB staging restore failed" + # Add your notification logic here (Slack, Teams, email, etc.) diff --git a/README.md b/README.md index 549d475..f0c8865 100644 --- a/README.md +++ b/README.md @@ -1 +1,262 @@ -# replication-db \ No newline at end of file +# MongoDB Database Replication & Anonymization + +Automated infrastructure for deploying MongoDB on AWS EC2 with EBS volumes, and safely cloning production data to staging environments with PII anonymization. + +## 🎯 Overview + +This project provides: +- **Terraform Infrastructure**: Deploy MongoDB on EC2 with dedicated EBS data volumes +- **Ansible Automation**: Clone production MongoDB volumes to staging +- **Data Anonymization**: Automatically anonymize PII data in staging +- **CI/CD Ready**: GitHub Actions workflows for automated deployments + +## 📁 Project Structure + +``` +. +├── terraform/ # Infrastructure as Code +│ ├── modules/ +│ │ └── ec2/ # EC2 + EBS module for MongoDB +│ └── stacks/ +│ ├── production/ # Production environment +│ └── staging/ # Staging environment +├── ansible/ # Automation playbooks +│ ├── README.md # Detailed Ansible documentation +│ ├── QUICKSTART.md # Quick start guide +│ └── playbook/ +│ ├── mongodb-restore.yml +│ └── roles/ +│ ├── clone-mount/ # EBS volume cloning +│ └── mongodb/ # MongoDB operations +├── mongodb/ # Database scripts +│ ├── setup_database.js # Create DB with mock PII data +│ ├── anonymize_data.js # Simple anonymization +│ ├── anonymize_with_hash.js # Hash-based anonymization +│ └── restore_original_data.js +└── .github/ + └── workflows/ + └── restore-staging-mongodb.yml # GitHub Actions workflow +``` + +## 🚀 Quick Start + +### 1. Deploy Infrastructure with Terraform + +```bash +# Deploy staging environment +cd terraform/stacks/staging +terraform init +terraform plan +terraform apply + +# Deploy production environment +cd ../production +terraform init +terraform apply +``` + +**What gets created:** +- EC2 instance with Amazon Linux 2023 +- Root volume (8GB) for OS +- Data volume (20GB) for MongoDB +- Security groups for SSH and MongoDB access +- MongoDB 7.0 installed and configured + +### 2. Setup Initial Database + +```bash +# SSH into production instance +ssh ec2-user@ + +# Copy and run the setup script +mongosh < setup_database.js +``` + +### 3. Clone Production to Staging + +**Option A: Using Ansible Locally** +```bash +cd ansible/playbook +ansible-playbook -i staging/inventory mongodb-restore.yml +``` + +**Option B: Using GitHub Actions** (Recommended) +1. Add AWS credentials to GitHub Secrets +2. Go to Actions → "Restore Staging MongoDB from Production" +3. Click "Run workflow" + +## 📋 Features + +### Infrastructure (Terraform) + +- ✅ EC2 instances with MongoDB 7.0 +- ✅ Separate EBS volumes for data +- ✅ Auto-mounting and configuration via user_data +- ✅ Security groups with proper access controls +- ✅ Support for multiple environments (staging/production) +- ✅ XFS filesystem (MongoDB recommended) + +### Automation (Ansible) + +- ✅ Automatic EBS volume cloning +- ✅ Snapshot-based replication +- ✅ Zero-downtime for production +- ✅ Automatic volume attachment/detachment +- ✅ SSM-based command execution +- ✅ Data anonymization support + +### Data Management + +- ✅ Mock PII data generation +- ✅ Simple anonymization (User 1, User 2, etc.) +- ✅ Hash-based anonymization (non-reversible) +- ✅ Data restoration scripts +- ✅ 10 sample users with realistic PII + +## 🔐 Security & Compliance + +### PII Data Protection + +The project includes two anonymization strategies: + +**Simple Anonymization:** +- Names → "User [ID]" +- Email → "user[ID]@anonymized.local" +- SSN → "XXX-XX-[ID]" +- Address → Redacted values + +**Hash-Based Anonymization:** +- One-way hash functions +- Non-reversible transformation +- Maintains referential consistency +- Suitable for production-like testing + +### AWS Security + +- EC2 security groups restrict access +- EBS volumes encrypted at rest +- SSM for secure command execution +- IAM roles with least privilege +- Snapshots properly tagged + +## 📖 Documentation + +- **[Ansible README](ansible/README.md)**: Complete Ansible documentation +- **[Quick Start Guide](ansible/QUICKSTART.md)**: Get started in 5 minutes +- **[MongoDB Scripts](mongodb/README.md)**: Database setup and anonymization + +## 🛠️ Prerequisites + +### For Terraform +- Terraform 1.0+ +- AWS CLI configured +- AWS credentials with EC2/VPC permissions + +### For Ansible +- Ansible 2.9+ +- AWS CLI v2 +- EC2 instances with SSM Agent +- IAM permissions for EC2, EBS, SSM + +### For GitHub Actions +- AWS credentials (stored as GitHub Secrets) +- Repository access to GitHub Actions + +## 🔄 Typical Workflow + +1. **Initial Setup**: Deploy infrastructure with Terraform +2. **Populate Production**: Load production data +3. **Clone to Staging**: Use Ansible to clone and anonymize +4. **Test in Staging**: Verify functionality with anonymized data +5. **Repeat**: Schedule regular refreshes (weekly/monthly) + +## 💰 Cost Estimate + +**Per Environment (Monthly):** +- EC2 t3.large: ~$60/month +- EBS Root (8GB gp3): ~$0.64/month +- EBS Data (20GB gp3): ~$1.60/month +- Data transfer: Free (same region) +- **Total: ~$62/month per environment** + +**Snapshots:** +- ~$0.05/GB/month (incremental) +- 20GB snapshot: ~$1/month + +## 🎯 Use Cases + +- **Development/Testing**: Safe staging environment with anonymized data +- **Compliance**: Meet GDPR/CCPA requirements for test data +- **Disaster Recovery**: Practice restoration procedures +- **Performance Testing**: Use production-sized datasets +- **Training**: Onboard new team members safely + +## 🔧 Configuration + +### Terraform Variables + +Edit `terraform/stacks/staging/terraform.tfvars`: +```hcl +instance_type = "t3.large" +root_volume_size = 8 +mongodb_data_volume_size = 20 +``` + +### Ansible Variables + +Edit `ansible/playbook/staging/group_vars/mongodb_restore.yml`: +```yaml +prod_instance_name: "MongoDB-production" +staging_instance_name: "MongoDB-staging" +anonymize_data: true +``` + +## 📊 Monitoring + +Check MongoDB status: +```bash +sudo systemctl status mongod +``` + +View data volume: +```bash +df -h | grep mongodb +``` + +Count documents: +```bash +mongosh --eval "use userdb; db.users.countDocuments()" +``` + +## 🐛 Troubleshooting + +See detailed troubleshooting guides in: +- [Ansible README](ansible/README.md#troubleshooting) +- [Quick Start Guide](ansible/QUICKSTART.md#common-issues--solutions) + +## 🤝 Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Test thoroughly +5. Submit a pull request + +## 📄 License + +This project is provided as-is for educational and internal use purposes. + +## 🙋 Support + +For issues or questions: +1. Check the documentation in `ansible/README.md` +2. Review troubleshooting guides +3. Check AWS CloudWatch logs +4. Verify IAM permissions + +## 🎓 Learning Resources + +- [MongoDB Production Notes](https://docs.mongodb.com/manual/administration/production-notes/) +- [AWS EBS Volume Types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html) +- [Terraform AWS Provider](https://registry.terraform.io/providers/hashicorp/aws/latest/docs) +- [Ansible AWS Modules](https://docs.ansible.com/ansible/latest/collections/amazon/aws/) \ No newline at end of file diff --git a/ansible/QUICKSTART.md b/ansible/QUICKSTART.md new file mode 100644 index 0000000..514794c --- /dev/null +++ b/ansible/QUICKSTART.md @@ -0,0 +1,190 @@ +# Quick Start Guide: MongoDB Volume Cloning + +## Overview +This guide helps you quickly clone your production MongoDB EBS volume to staging using Ansible. + +## Prerequisites Checklist + +- [ ] AWS CLI installed and configured +- [ ] Ansible installed on your machine +- [ ] AWS IAM permissions for EC2 and SSM +- [ ] Production and Staging EC2 instances running +- [ ] SSM Agent installed on both instances +- [ ] MongoDB installed on staging instance + +## Quick Setup (5 minutes) + +### 1. Configure AWS Credentials + +```bash +# Set AWS credentials +export AWS_ACCESS_KEY_ID="your-access-key" +export AWS_SECRET_ACCESS_KEY="your-secret-key" +export AWS_DEFAULT_REGION="us-west-2" + +# Or use AWS CLI configure +aws configure +``` + +### 2. Update Configuration + +Edit `ansible/playbook/staging/group_vars/mongodb_restore.yml`: + +```yaml +prod_instance_name: "MongoDB-production" # Your production instance Name tag +staging_instance_name: "MongoDB-staging" # Your staging instance Name tag +anonymize_data: true # true to anonymize PII +``` + +### 3. Run the Playbook + +```bash +cd ansible/playbook +ansible-playbook -i staging/inventory mongodb-restore.yml +``` + +That's it! The playbook will: +- ✅ Snapshot production volume +- ✅ Create new volume from snapshot +- ✅ Detach old staging volume +- ✅ Attach new volume to staging +- ✅ Mount and start MongoDB +- ✅ Anonymize data (if enabled) + +## Using GitHub Actions (Recommended for Teams) + +### One-Time Setup + +1. **Add AWS credentials to GitHub Secrets:** + - Go to: Repository → Settings → Secrets and variables → Actions + - Add: `AWS_ACCESS_KEY_ID` + - Add: `AWS_SECRET_ACCESS_KEY` + +2. **Push the workflow file** (already created at `.github/workflows/restore-staging-mongodb.yml`) + +### Run the Workflow + +1. Go to: Repository → Actions → "Restore Staging MongoDB from Production" +2. Click "Run workflow" +3. Select options: + - ✅ Anonymize data: Yes/No +4. Click "Run workflow" +5. Watch the progress in real-time + +## Verification + +After the restore completes: + +```bash +# SSH into staging +ssh ec2-user@ + +# Check MongoDB status +sudo systemctl status mongod + +# Verify data +mongosh +use userdb +db.users.countDocuments() # Should show count +db.users.findOne() # Should show a user (anonymized if enabled) +``` + +## Timeline + +- **Snapshot creation:** 2-5 minutes (depending on volume size) +- **Volume creation:** 1-2 minutes +- **Attachment & mounting:** 30 seconds +- **Data anonymization:** 5-10 seconds +- **Total:** ~5-10 minutes + +## Common Issues & Solutions + +### Issue: "Instance not found" +**Solution:** Verify instance Name tags match configuration + +```bash +aws ec2 describe-instances \ + --filters "Name=tag:Name,Values=MongoDB-staging" \ + --query "Reservations[].Instances[].[InstanceId,Tags]" +``` + +### Issue: "SSM command failed" +**Solution:** Ensure SSM Agent is running and instance has IAM role + +```bash +# Check SSM agent status +aws ssm describe-instance-information + +# If not listed, install SSM agent on the instance +sudo yum install -y amazon-ssm-agent +sudo systemctl enable amazon-ssm-agent +sudo systemctl start amazon-ssm-agent +``` + +### Issue: "MongoDB won't start" +**Solution:** Check logs and volume mount + +```bash +# Check MongoDB logs +sudo tail -100 /var/log/mongod.log + +# Verify volume is mounted +df -h | grep mongodb + +# Try manual mount +sudo mount /dev/nvme1n1 /data/mongodb +sudo chown -R mongod:mongod /data/mongodb +sudo systemctl restart mongod +``` + +## Safety Notes + +✅ **Safe Operations:** +- Production is never stopped or modified +- Snapshots are created without downtime +- Old staging volume is only detached (not deleted) + +⚠️ **Important:** +- Staging MongoDB will be stopped during restore +- Old staging data will be replaced +- Snapshot costs are minimal but ongoing + +## Cost Estimate + +- **Snapshot storage:** ~$0.05/GB/month for 20GB = $1/month +- **EBS volume:** ~$0.08/GB/month for 20GB = $1.60/month +- **Data transfer:** Free (same region) +- **Total:** ~$2.60/month (if you keep one snapshot) + +## Next Steps + +1. ✅ Run your first restore +2. 📝 Schedule regular restores (weekly/monthly) +3. 🧹 Clean up old snapshots periodically +4. 🔐 Ensure anonymization is working correctly +5. 📊 Monitor staging environment + +## Support + +- **Documentation:** See `ansible/README.md` for detailed guide +- **Troubleshooting:** Check workflow logs in GitHub Actions +- **Manual intervention:** SSH into instances for debugging + +## Advanced Usage + +### Restore without anonymization +```bash +ansible-playbook -i staging/inventory mongodb-restore.yml -e "anonymize_data=false" +``` + +### Use specific instance IDs +```bash +ansible-playbook -i staging/inventory mongodb-restore.yml \ + -e "prod_instance_id=i-xxxxx" \ + -e "staging_instance_id=i-yyyyy" +``` + +### Test connection only +```bash +ansible-playbook -i staging/inventory mongodb-restore.yml --tags verify +``` diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 0000000..d0122b3 --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,295 @@ +# MongoDB EBS Volume Cloning with Ansible + +This Ansible playbook automates the process of cloning a production MongoDB EBS volume to staging, including volume snapshot, detachment, attachment, and optional data anonymization. + +## Architecture + +The playbook performs the following operations: +1. **Discovery**: Identifies production and staging EC2 instances +2. **Snapshot**: Creates a snapshot of the production MongoDB data volume +3. **Clone**: Creates a new volume from the snapshot +4. **Detach**: Detaches the existing staging MongoDB volume +5. **Attach**: Attaches the cloned volume to staging +6. **Mount**: Mounts the volume and starts MongoDB +7. **Anonymize** (optional): Anonymizes PII data in the staging database + +## Prerequisites + +### AWS Permissions Required + +The IAM user/role running this playbook needs the following AWS permissions: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:DescribeInstances", + "ec2:DescribeVolumes", + "ec2:DescribeSnapshots", + "ec2:CreateSnapshot", + "ec2:CreateVolume", + "ec2:AttachVolume", + "ec2:DetachVolume", + "ec2:CreateTags", + "ssm:SendCommand", + "ssm:GetCommandInvocation", + "ssm:ListCommands" + ], + "Resource": "*" + } + ] +} +``` + +### EC2 Instance Requirements + +- **SSM Agent**: Both production and staging instances must have SSM Agent installed and running +- **IAM Role**: Instances need an IAM role with `AmazonSSMManagedInstanceCore` policy +- **MongoDB**: MongoDB should be installed and configured on staging +- **Tags**: Instances must be tagged with `Name` tag matching the configuration + +### Local Requirements + +- Ansible 2.9+ +- AWS CLI v2 +- Configured AWS credentials (`~/.aws/credentials` or environment variables) + +## Directory Structure + +``` +ansible/ +├── README.md +├── playbook/ +│ ├── mongodb-restore.yml # Main playbook +│ ├── production/ +│ │ ├── inventory # Production inventory +│ │ └── group_vars/ +│ │ └── mongodb_restore.yml # Production variables +│ ├── staging/ +│ │ ├── inventory # Staging inventory +│ │ └── group_vars/ +│ │ └── mongodb_restore.yml # Staging variables +│ └── roles/ +│ ├── clone-mount/ # Volume cloning role +│ │ ├── defaults/ +│ │ │ └── main.yml +│ │ └── tasks/ +│ │ └── main.yml +│ └── mongodb/ # MongoDB post-restore role +│ ├── defaults/ +│ │ └── main.yml +│ └── tasks/ +│ └── main.yml +``` + +## Configuration + +### Staging Configuration + +Edit `playbook/staging/group_vars/mongodb_restore.yml`: + +```yaml +aws_region: "us-west-2" +prod_instance_name: "MongoDB-production" +staging_instance_name: "MongoDB-staging" +device_name: "/dev/sdf" +volume_type: "gp3" +anonymize_data: true # Set to true to anonymize PII data +``` + +### Production Configuration + +Edit `playbook/production/group_vars/mongodb_restore.yml` if needed. + +## Usage + +### Running Locally + +#### Clone Production to Staging (with anonymization) + +```bash +cd ansible/playbook + +# Run the full restore process +ansible-playbook -i staging/inventory mongodb-restore.yml +``` + +#### Clone Production to Staging (without anonymization) + +```bash +ansible-playbook -i staging/inventory mongodb-restore.yml -e "anonymize_data=false" +``` + +#### Clone Volume Only (skip MongoDB tasks) + +```bash +ansible-playbook -i staging/inventory mongodb-restore.yml --tags clone +``` + +### Running in GitHub Actions + +Create `.github/workflows/restore-staging-mongodb.yml`: + +```yaml +name: Restore Staging MongoDB from Production + +on: + workflow_dispatch: + inputs: + anonymize_data: + description: 'Anonymize PII data after restore' + required: true + type: boolean + default: true + +jobs: + restore-mongodb: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Install Ansible + run: | + sudo apt-get update + sudo apt-get install -y ansible + + - name: Verify AWS CLI + run: aws --version + + - name: Run MongoDB restore playbook + working-directory: ansible/playbook + run: | + ansible-playbook -i staging/inventory mongodb-restore.yml \ + -e "anonymize_data=${{ github.event.inputs.anonymize_data }}" + + - name: Summary + if: success() + run: | + echo "✅ MongoDB staging restore completed successfully" + echo "Anonymization: ${{ github.event.inputs.anonymize_data }}" +``` + +### Required GitHub Secrets + +Add these secrets to your GitHub repository: +- `AWS_ACCESS_KEY_ID` +- `AWS_SECRET_ACCESS_KEY` + +## Roles + +### clone-mount Role + +Handles the EBS volume cloning process: +- Discovers EC2 instance IDs from tags +- Creates snapshot of production volume +- Creates new volume from snapshot +- Detaches old staging volume +- Attaches new volume to staging +- Mounts the volume + +**Variables:** +- `aws_region`: AWS region +- `prod_instance_name`: Production instance Name tag +- `staging_instance_name`: Staging instance Name tag +- `device_name`: EBS device name (default: `/dev/sdf`) +- `volume_type`: EBS volume type (default: `gp3`) + +### mongodb Role + +Handles post-restore MongoDB tasks: +- Starts MongoDB service +- Verifies data migration +- Optionally anonymizes PII data + +**Variables:** +- `db_name`: MongoDB database name +- `anonymize_data`: Whether to anonymize data (default: `false`) + +## Troubleshooting + +### SSM Command Issues + +If SSM commands fail: + +```bash +# Check SSM agent status on instance +aws ssm describe-instance-information \ + --filters "Key=tag:Name,Values=MongoDB-staging" + +# Verify instance has proper IAM role +aws ec2 describe-instances \ + --filters "Name=tag:Name,Values=MongoDB-staging" \ + --query "Reservations[].Instances[].[InstanceId,IamInstanceProfile]" +``` + +### Volume Attachment Issues + +If volume fails to attach: + +```bash +# Check volume availability +aws ec2 describe-volumes --volume-ids vol-xxxxx + +# Check instance state +aws ec2 describe-instances --instance-ids i-xxxxx +``` + +### MongoDB Not Starting + +If MongoDB fails to start after restore: + +```bash +# SSH into staging instance +ssh ec2-user@ + +# Check MongoDB status +sudo systemctl status mongod + +# Check logs +sudo tail -f /var/log/mongod.log + +# Verify volume is mounted +df -h | grep mongodb +``` + +## Cleanup + +After successful restore, you may want to: + +1. **Delete old staging volume** (optional): +```bash +aws ec2 delete-volume --volume-id +``` + +2. **Delete snapshot** (after verification): +```bash +aws ec2 delete-snapshot --snapshot-id +``` + +## Security Notes + +- This playbook stops MongoDB during the restore process +- Production database remains running and unaffected +- Snapshots are created without impacting production performance +- Old staging volumes are detached but NOT automatically deleted +- Enable `anonymize_data: true` for staging environments with real data + +## Support + +For issues or questions: +1. Check the logs in Ansible output +2. Verify AWS permissions +3. Ensure SSM Agent is running on both instances +4. Check MongoDB logs on the staging instance diff --git a/ansible/playbook/mongodb-restore.yml b/ansible/playbook/mongodb-restore.yml new file mode 100644 index 0000000..6752f94 --- /dev/null +++ b/ansible/playbook/mongodb-restore.yml @@ -0,0 +1,19 @@ +--- +# MongoDB Data Restore Playbook +# Clones production MongoDB EBS volume to staging and optionally anonymizes data + +- name: Clone Production MongoDB Volume to Staging + hosts: mongodb_restore + connection: local + gather_facts: yes + + roles: + - clone-mount + +- name: Post-Restore MongoDB Tasks + hosts: mongodb_restore + connection: local + gather_facts: yes + + roles: + - mongodb \ No newline at end of file diff --git a/ansible/playbook/production/group_vars/mongodb_restore.yml b/ansible/playbook/production/group_vars/mongodb_restore.yml new file mode 100644 index 0000000..c0a3a09 --- /dev/null +++ b/ansible/playbook/production/group_vars/mongodb_restore.yml @@ -0,0 +1,22 @@ +--- +# Production MongoDB Configuration + +# AWS Configuration +aws_region: "us-west-2" + +# Production EC2 Instance +prod_instance_name: "MongoDB-production" +# Optional: Specify instance ID directly if known +prod_instance_id: "i-0e360e7615a63a796" + +# EBS Volume Configuration +device_name: "/dev/sdf" +volume_type: "gp3" + +# Tags +environment_tag: "production" +project_tag: "mongodb-replication" + +# MongoDB Configuration +db_name: "userdb" +mongodb_port: 27017 \ No newline at end of file diff --git a/ansible/playbook/production/inventory b/ansible/playbook/production/inventory new file mode 100644 index 0000000..477e18c --- /dev/null +++ b/ansible/playbook/production/inventory @@ -0,0 +1,2 @@ +[mongodb_restore] +mongodb.restore ansible_host=localhost \ No newline at end of file diff --git a/ansible/playbook/roles/clone-mount/defaults/main.yml b/ansible/playbook/roles/clone-mount/defaults/main.yml new file mode 100644 index 0000000..8405616 --- /dev/null +++ b/ansible/playbook/roles/clone-mount/defaults/main.yml @@ -0,0 +1,28 @@ +--- +# Default variables for clone-mount role + +# AWS Region +aws_region: "us-west-2" + +# Production EC2 instance details +prod_instance_name: "MongoDB-production" +prod_instance_id: "" # Will be auto-discovered if empty + +# Staging EC2 instance details +staging_instance_name: "MongoDB-staging" +staging_instance_id: "" # Will be auto-discovered if empty + +# EBS Volume settings +device_name: "/dev/sdf" +volume_type: "gp3" + +# Snapshot settings +snapshot_description: "MongoDB data volume snapshot for staging restore" +snapshot_wait_timeout: 600 # 10 minutes + +# Volume attachment settings +attachment_wait_timeout: 120 # 2 minutes + +# Tags +environment_tag: "staging" +project_tag: "mongodb-replication" \ No newline at end of file diff --git a/ansible/playbook/roles/clone-mount/tasks/main.yml b/ansible/playbook/roles/clone-mount/tasks/main.yml new file mode 100644 index 0000000..efcd122 --- /dev/null +++ b/ansible/playbook/roles/clone-mount/tasks/main.yml @@ -0,0 +1,293 @@ +--- +# Clone and Mount EBS Volume from Production to Staging + +- name: Display clone operation details + debug: + msg: "Cloning MongoDB volume from {{ prod_instance_name }} to {{ staging_instance_name }}" + +# Step 1: Discover Production Instance ID +- name: Get Production EC2 instance ID + shell: | + aws ec2 describe-instances \ + --region {{ aws_region }} \ + --filters "Name=tag:Name,Values={{ prod_instance_name }}" "Name=instance-state-name,Values=running" \ + --query "Reservations[0].Instances[0].InstanceId" \ + --output text + register: prod_instance_result + when: prod_instance_id == "" + changed_when: false + +- name: Set production instance ID + set_fact: + prod_instance_id: "{{ prod_instance_result.stdout if prod_instance_id == '' else prod_instance_id }}" + +- name: Display production instance ID + debug: + msg: "Production Instance ID: {{ prod_instance_id }}" + +# Step 2: Discover Staging Instance ID +- name: Get Staging EC2 instance ID + shell: | + aws ec2 describe-instances \ + --region {{ aws_region }} \ + --filters "Name=tag:Name,Values={{ staging_instance_name }}" "Name=instance-state-name,Values=running" \ + --query "Reservations[0].Instances[0].InstanceId" \ + --output text + register: staging_instance_result + when: staging_instance_id == "" + changed_when: false + +- name: Set staging instance ID + set_fact: + staging_instance_id: "{{ staging_instance_result.stdout if staging_instance_id == '' else staging_instance_id }}" + +- name: Display staging instance ID + debug: + msg: "Staging Instance ID: {{ staging_instance_id }}" + +# Step 3: Get Production MongoDB Data Volume ID +- name: Get production MongoDB data volume ID + shell: | + aws ec2 describe-volumes \ + --region {{ aws_region }} \ + --filters "Name=attachment.instance-id,Values={{ prod_instance_id }}" "Name=attachment.device,Values={{ device_name }}" \ + --query "Volumes[0].VolumeId" \ + --output text + register: prod_volume_result + changed_when: false + +- name: Set production volume ID + set_fact: + prod_volume_id: "{{ prod_volume_result.stdout }}" + +- name: Display production volume ID + debug: + msg: "Production Volume ID: {{ prod_volume_id }}" + +- name: Fail if production volume not found + fail: + msg: "Production MongoDB volume not found on instance {{ prod_instance_id }}" + when: prod_volume_id == "None" or prod_volume_id == "" + +# Step 4: Get Staging MongoDB Data Volume ID (to detach) +- name: Get staging MongoDB data volume ID + shell: | + aws ec2 describe-volumes \ + --region {{ aws_region }} \ + --filters "Name=attachment.instance-id,Values={{ staging_instance_id }}" "Name=attachment.device,Values={{ device_name }}" \ + --query "Volumes[0].VolumeId" \ + --output text + register: staging_volume_result + changed_when: false + failed_when: false + +- name: Set staging volume ID + set_fact: + staging_volume_id: "{{ staging_volume_result.stdout }}" + when: staging_volume_result.stdout != "None" and staging_volume_result.stdout != "" + +- name: Display staging volume ID + debug: + msg: "Existing Staging Volume ID: {{ staging_volume_id | default('None') }}" + +# Step 5: Stop MongoDB on Staging (if running) +- name: Stop MongoDB service on staging instance + shell: | + aws ssm send-command \ + --region {{ aws_region }} \ + --instance-ids {{ staging_instance_id }} \ + --document-name "AWS-RunShellScript" \ + --parameters 'commands=["sudo systemctl stop mongod","sudo umount /data/mongodb || true"]' \ + --query "Command.CommandId" \ + --output text + register: ssm_stop_mongo + changed_when: true + failed_when: false + +- name: Wait for MongoDB stop command to complete + shell: | + aws ssm wait command-executed \ + --region {{ aws_region }} \ + --command-id {{ ssm_stop_mongo.stdout }} \ + --instance-id {{ staging_instance_id }} + when: ssm_stop_mongo.stdout is defined and ssm_stop_mongo.stdout != "" + changed_when: false + failed_when: false + +# Step 6: Create Snapshot of Production Volume +- name: Create snapshot of production MongoDB volume + shell: | + aws ec2 create-snapshot \ + --region {{ aws_region }} \ + --volume-id {{ prod_volume_id }} \ + --description "{{ snapshot_description }} - $(date '+%Y-%m-%d %H:%M:%S')" \ + --tag-specifications 'ResourceType=snapshot,Tags=[{Key=Name,Value=mongodb-staging-restore},{Key=Environment,Value={{ environment_tag }}},{Key=Project,Value={{ project_tag }}},{Key=Source,Value={{ prod_volume_id }}}]' \ + --query "SnapshotId" \ + --output text + register: snapshot_result + changed_when: true + +- name: Set snapshot ID + set_fact: + snapshot_id: "{{ snapshot_result.stdout }}" + +- name: Display snapshot ID + debug: + msg: "Created Snapshot ID: {{ snapshot_id }}" + +# Step 7: Wait for Snapshot to Complete +- name: Wait for snapshot to complete + shell: | + aws ec2 wait snapshot-completed \ + --region {{ aws_region }} \ + --snapshot-ids {{ snapshot_id }} + changed_when: false + async: "{{ snapshot_wait_timeout }}" + poll: 10 + +- name: Snapshot completed successfully + debug: + msg: "Snapshot {{ snapshot_id }} is now available" + +# Step 8: Get Availability Zone of Staging Instance +- name: Get staging instance availability zone + shell: | + aws ec2 describe-instances \ + --region {{ aws_region }} \ + --instance-ids {{ staging_instance_id }} \ + --query "Reservations[0].Instances[0].Placement.AvailabilityZone" \ + --output text + register: staging_az_result + changed_when: false + +- name: Set staging availability zone + set_fact: + staging_az: "{{ staging_az_result.stdout }}" + +# Step 9: Create New Volume from Snapshot +- name: Create new volume from snapshot in staging AZ + shell: | + aws ec2 create-volume \ + --region {{ aws_region }} \ + --availability-zone {{ staging_az }} \ + --snapshot-id {{ snapshot_id }} \ + --volume-type {{ volume_type }} \ + --tag-specifications 'ResourceType=volume,Tags=[{Key=Name,Value=mongodb-staging-data},{Key=Environment,Value={{ environment_tag }}},{Key=Project,Value={{ project_tag }}},{Key=SourceSnapshot,Value={{ snapshot_id }}}]' \ + --query "VolumeId" \ + --output text + register: new_volume_result + changed_when: true + +- name: Set new volume ID + set_fact: + new_volume_id: "{{ new_volume_result.stdout }}" + +- name: Display new volume ID + debug: + msg: "Created New Volume ID: {{ new_volume_id }}" + +# Step 10: Wait for New Volume to be Available +- name: Wait for new volume to be available + shell: | + aws ec2 wait volume-available \ + --region {{ aws_region }} \ + --volume-ids {{ new_volume_id }} + changed_when: false + +# Step 11: Detach Existing Staging Volume +- name: Detach existing staging MongoDB volume + shell: | + aws ec2 detach-volume \ + --region {{ aws_region }} \ + --volume-id {{ staging_volume_id }} \ + --force + when: staging_volume_id is defined and staging_volume_id != "None" and staging_volume_id != "" + register: detach_result + changed_when: true + failed_when: false + +- name: Wait for old volume to detach + shell: | + aws ec2 wait volume-available \ + --region {{ aws_region }} \ + --volume-ids {{ staging_volume_id }} + when: staging_volume_id is defined and staging_volume_id != "None" and staging_volume_id != "" + changed_when: false + failed_when: false + +- name: Old volume detached successfully + debug: + msg: "Old volume {{ staging_volume_id | default('N/A') }} detached" + when: staging_volume_id is defined + +# Step 12: Attach New Volume to Staging Instance +- name: Attach new volume to staging instance + shell: | + aws ec2 attach-volume \ + --region {{ aws_region }} \ + --volume-id {{ new_volume_id }} \ + --instance-id {{ staging_instance_id }} \ + --device {{ device_name }} + register: attach_result + changed_when: true + +- name: Wait for volume to attach + shell: | + aws ec2 wait volume-in-use \ + --region {{ aws_region }} \ + --volume-ids {{ new_volume_id }} + changed_when: false + +- name: Volume attached successfully + debug: + msg: "New volume {{ new_volume_id }} attached to {{ staging_instance_id }}" + +# Step 13: Mount the Volume on Staging Instance +- name: Mount MongoDB volume on staging instance + shell: | + aws ssm send-command \ + --region {{ aws_region }} \ + --instance-ids {{ staging_instance_id }} \ + --document-name "AWS-RunShellScript" \ + --parameters 'commands=["sleep 5","DATA_DEVICE=$(lsblk -o NAME,SERIAL | grep $(aws ec2 describe-volumes --region {{ aws_region }} --volume-ids {{ new_volume_id }} --query \"Volumes[0].Attachments[0].VolumeId\" --output text | sed \"s/-//\") | awk "{print \"/dev/\" \\$1}" | head -1)","if [ -z \"$DATA_DEVICE\" ]; then DATA_DEVICE=/dev/nvme1n1; fi","echo \"Mounting device: $DATA_DEVICE\"","sudo mkdir -p /data/mongodb","sudo mount $DATA_DEVICE /data/mongodb 2>/dev/null || sudo mount /dev/nvme1n1 /data/mongodb","sudo chown -R mongod:mongod /data/mongodb","df -h | grep mongodb"]' \ + --query "Command.CommandId" \ + --output text + register: ssm_mount + changed_when: true + +- name: Wait for mount command to complete + shell: | + aws ssm wait command-executed \ + --region {{ aws_region }} \ + --command-id {{ ssm_mount.stdout }} \ + --instance-id {{ staging_instance_id }} + changed_when: false + +- name: Get mount command output + shell: | + aws ssm get-command-invocation \ + --region {{ aws_region }} \ + --command-id {{ ssm_mount.stdout }} \ + --instance-id {{ staging_instance_id }} \ + --query "StandardOutputContent" \ + --output text + register: mount_output + changed_when: false + +- name: Display mount output + debug: + msg: "{{ mount_output.stdout_lines }}" + +# Step 14: Summary +- name: Clone and mount operation summary + debug: + msg: + - "=== Clone and Mount Operation Completed ===" + - "Source Volume (Production): {{ prod_volume_id }}" + - "Snapshot Created: {{ snapshot_id }}" + - "New Volume (Staging): {{ new_volume_id }}" + - "Old Staging Volume: {{ staging_volume_id | default('None') }}" + - "Attached to Instance: {{ staging_instance_id }}" + - "Device: {{ device_name }}" + - "Mount Point: /data/mongodb" + - "Note: Old volume {{ staging_volume_id | default('N/A') }} can be deleted if no longer needed" \ No newline at end of file diff --git a/ansible/playbook/roles/mongodb/defaults/main.yml b/ansible/playbook/roles/mongodb/defaults/main.yml new file mode 100644 index 0000000..41244f0 --- /dev/null +++ b/ansible/playbook/roles/mongodb/defaults/main.yml @@ -0,0 +1,18 @@ +--- +# MongoDB role default variables + +# AWS Configuration +aws_region: "us-west-2" + +# Instance details +staging_instance_id: "" +staging_instance_name: "MongoDB-staging" + +# MongoDB Configuration +db_name: "userdb" +mongodb_port: 27017 + +# Anonymization +anonymize_data: false +anonymization_script_local_path: "../../../mongodb/anonymize_data.js" +anonymization_script_remote_path: "/tmp/anonymize_data.js" \ No newline at end of file diff --git a/ansible/playbook/roles/mongodb/tasks/main.yml b/ansible/playbook/roles/mongodb/tasks/main.yml new file mode 100644 index 0000000..5f40f3a --- /dev/null +++ b/ansible/playbook/roles/mongodb/tasks/main.yml @@ -0,0 +1,168 @@ +--- +# MongoDB Post-Restore Tasks + +- name: Display MongoDB post-restore tasks + debug: + msg: "Starting MongoDB service and running post-restore tasks" + +# Get staging instance ID if not provided +- name: Get Staging EC2 instance ID + shell: | + aws ec2 describe-instances \ + --region {{ aws_region }} \ + --filters "Name=tag:Name,Values={{ staging_instance_name }}" "Name=instance-state-name,Values=running" \ + --query "Reservations[0].Instances[0].InstanceId" \ + --output text + register: staging_instance_result + when: staging_instance_id == "" + changed_when: false + +- name: Set staging instance ID + set_fact: + staging_instance_id: "{{ staging_instance_result.stdout if staging_instance_id == '' else staging_instance_id }}" + +# Start MongoDB service +- name: Start MongoDB service on staging + shell: | + aws ssm send-command \ + --region {{ aws_region }} \ + --instance-ids {{ staging_instance_id }} \ + --document-name "AWS-RunShellScript" \ + --parameters 'commands=["sudo systemctl start mongod","sleep 3","sudo systemctl status mongod"]' \ + --query "Command.CommandId" \ + --output text + register: ssm_start_mongo + changed_when: true + +- name: Wait for MongoDB start command + shell: | + aws ssm wait command-executed \ + --region {{ aws_region }} \ + --command-id {{ ssm_start_mongo.stdout }} \ + --instance-id {{ staging_instance_id }} + changed_when: false + +- name: Get MongoDB status + shell: | + aws ssm get-command-invocation \ + --region {{ aws_region }} \ + --command-id {{ ssm_start_mongo.stdout }} \ + --instance-id {{ staging_instance_id }} \ + --query "StandardOutputContent" \ + --output text + register: mongo_status + changed_when: false + +- name: Display MongoDB status + debug: + msg: "{{ mongo_status.stdout_lines }}" + +# Verify MongoDB data +- name: Verify MongoDB data exists + shell: | + aws ssm send-command \ + --region {{ aws_region }} \ + --instance-ids {{ staging_instance_id }} \ + --document-name "AWS-RunShellScript" \ + --parameters 'commands=["mongosh --quiet --eval \"use {{ db_name }}; db.users.countDocuments();\""]' \ + --query "Command.CommandId" \ + --output text + register: ssm_verify + changed_when: false + +- name: Wait for verify command + shell: | + aws ssm wait command-executed \ + --region {{ aws_region }} \ + --command-id {{ ssm_verify.stdout }} \ + --instance-id {{ staging_instance_id }} + changed_when: false + +- name: Get document count + shell: | + aws ssm get-command-invocation \ + --region {{ aws_region }} \ + --command-id {{ ssm_verify.stdout }} \ + --instance-id {{ staging_instance_id }} \ + --query "StandardOutputContent" \ + --output text + register: doc_count + changed_when: false + +- name: Display document count + debug: + msg: "Database {{ db_name }} has {{ doc_count.stdout | trim }} documents" + +# Anonymize data if requested +- name: Copy anonymization script to staging + shell: | + SCRIPT_CONTENT=$(cat {{ anonymization_script_local_path }}) + aws ssm send-command \ + --region {{ aws_region }} \ + --instance-ids {{ staging_instance_id }} \ + --document-name "AWS-RunShellScript" \ + --parameters "commands=['cat > {{ anonymization_script_remote_path }} << '\''ENDOFSCRIPT'\''','$SCRIPT_CONTENT','ENDOFSCRIPT']" \ + --query "Command.CommandId" \ + --output text + register: ssm_copy_script + when: anonymize_data | bool + changed_when: true + +- name: Wait for script copy + shell: | + aws ssm wait command-executed \ + --region {{ aws_region }} \ + --command-id {{ ssm_copy_script.stdout }} \ + --instance-id {{ staging_instance_id }} + when: anonymize_data | bool + changed_when: false + +- name: Run anonymization script + shell: | + aws ssm send-command \ + --region {{ aws_region }} \ + --instance-ids {{ staging_instance_id }} \ + --document-name "AWS-RunShellScript" \ + --parameters 'commands=["mongosh < {{ anonymization_script_remote_path }}"]' \ + --query "Command.CommandId" \ + --output text + register: ssm_anonymize + when: anonymize_data | bool + changed_when: true + +- name: Wait for anonymization to complete + shell: | + aws ssm wait command-executed \ + --region {{ aws_region }} \ + --command-id {{ ssm_anonymize.stdout }} \ + --instance-id {{ staging_instance_id }} + when: anonymize_data | bool + changed_when: false + +- name: Get anonymization output + shell: | + aws ssm get-command-invocation \ + --region {{ aws_region }} \ + --command-id {{ ssm_anonymize.stdout }} \ + --instance-id {{ staging_instance_id }} \ + --query "StandardOutputContent" \ + --output text + register: anonymize_output + when: anonymize_data | bool + changed_when: false + +- name: Display anonymization results + debug: + msg: "{{ anonymize_output.stdout_lines }}" + when: anonymize_data | bool + +# Final summary +- name: Post-restore tasks completed + debug: + msg: + - "=== MongoDB Post-Restore Tasks Completed ===" + - "Instance: {{ staging_instance_id }}" + - "Database: {{ db_name }}" + - "Document Count: {{ doc_count.stdout | trim }}" + - "Data Anonymized: {{ 'Yes' if anonymize_data else 'No' }}" + - "MongoDB is ready for use on staging environment" \ No newline at end of file diff --git a/ansible/playbook/staging/group_vars/mongodb_restore.yml b/ansible/playbook/staging/group_vars/mongodb_restore.yml new file mode 100644 index 0000000..db5e4ea --- /dev/null +++ b/ansible/playbook/staging/group_vars/mongodb_restore.yml @@ -0,0 +1,32 @@ +--- +# Staging MongoDB Restore Configuration + +# AWS Configuration +aws_region: "us-west-2" + +# Production EC2 Instance +prod_instance_name: "MongoDB-production" +# Optional: Specify instance ID directly if known +prod_instance_id: "i-0e360e7615a63a796" + +# Staging EC2 Instance +staging_instance_name: "MongoDB-staging" +# Optional: Specify instance ID directly if known +staging_instance_id: "i-05661b198eb8d9b0a" + +# EBS Volume Configuration +device_name: "/dev/sdf" +volume_type: "gp3" + +# Snapshot Configuration +snapshot_description: "MongoDB staging restore from production" +snapshot_wait_timeout: 600 # 10 minutes + +# Tags +environment_tag: "staging" +project_tag: "mongodb-replication" + +# MongoDB Configuration (for mongodb role) +db_name: "userdb" +mongodb_port: 27017 +anonymize_data: true # Set to true to run anonymization after restore \ No newline at end of file diff --git a/ansible/playbook/staging/inventory b/ansible/playbook/staging/inventory new file mode 100644 index 0000000..477e18c --- /dev/null +++ b/ansible/playbook/staging/inventory @@ -0,0 +1,2 @@ +[mongodb_restore] +mongodb.restore ansible_host=localhost \ No newline at end of file diff --git a/ansible/test-prerequisites.sh b/ansible/test-prerequisites.sh new file mode 100644 index 0000000..8b637a4 --- /dev/null +++ b/ansible/test-prerequisites.sh @@ -0,0 +1,194 @@ +#!/bin/bash +# Test script for MongoDB restore playbook +# This script validates prerequisites before running the Ansible playbook + +set -e + +echo "=========================================" +echo "MongoDB Restore - Prerequisites Check" +echo "=========================================" +echo "" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +check_passed=0 +check_failed=0 + +# Function to check command existence +check_command() { + if command -v "$1" &> /dev/null; then + echo -e "${GREEN}✓${NC} $1 is installed" + ((check_passed++)) + return 0 + else + echo -e "${RED}✗${NC} $1 is NOT installed" + ((check_failed++)) + return 1 + fi +} + +# Function to check AWS credentials +check_aws_credentials() { + if aws sts get-caller-identity &> /dev/null; then + echo -e "${GREEN}✓${NC} AWS credentials are configured" + aws sts get-caller-identity --query "Arn" --output text | sed 's/^/ Account: /' + ((check_passed++)) + return 0 + else + echo -e "${RED}✗${NC} AWS credentials are NOT configured" + echo " Run: aws configure" + ((check_failed++)) + return 1 + fi +} + +# Function to check EC2 instance exists +check_instance() { + local instance_name=$1 + local instance_id + + instance_id=$(aws ec2 describe-instances \ + --filters "Name=tag:Name,Values=${instance_name}" "Name=instance-state-name,Values=running" \ + --query "Reservations[0].Instances[0].InstanceId" \ + --output text 2>/dev/null) + + if [ "$instance_id" != "None" ] && [ -n "$instance_id" ]; then + echo -e "${GREEN}✓${NC} Instance '${instance_name}' found: ${instance_id}" + ((check_passed++)) + return 0 + else + echo -e "${RED}✗${NC} Instance '${instance_name}' NOT found or not running" + ((check_failed++)) + return 1 + fi +} + +# Function to check SSM connectivity +check_ssm() { + local instance_name=$1 + local instance_id + + instance_id=$(aws ec2 describe-instances \ + --filters "Name=tag:Name,Values=${instance_name}" "Name=instance-state-name,Values=running" \ + --query "Reservations[0].Instances[0].InstanceId" \ + --output text 2>/dev/null) + + if [ "$instance_id" != "None" ] && [ -n "$instance_id" ]; then + if aws ssm describe-instance-information \ + --filters "Key=InstanceIds,Values=${instance_id}" \ + --query "InstanceInformationList[0].PingStatus" \ + --output text 2>/dev/null | grep -q "Online"; then + echo -e "${GREEN}✓${NC} SSM Agent is online for '${instance_name}'" + ((check_passed++)) + return 0 + else + echo -e "${RED}✗${NC} SSM Agent is NOT online for '${instance_name}'" + echo " Install SSM agent or attach IAM role with AmazonSSMManagedInstanceCore" + ((check_failed++)) + return 1 + fi + else + echo -e "${YELLOW}⊘${NC} Cannot check SSM for '${instance_name}' (instance not found)" + return 1 + fi +} + +echo "1. Checking required tools..." +echo "----------------------------" +check_command "ansible" +check_command "aws" +check_command "python3" +echo "" + +echo "2. Checking AWS configuration..." +echo "--------------------------------" +check_aws_credentials +echo "" + +# Get region from AWS CLI config or use default +AWS_REGION=${AWS_REGION:-$(aws configure get region 2>/dev/null || echo "us-west-2")} +export AWS_REGION +echo "Using AWS Region: ${AWS_REGION}" +echo "" + +echo "3. Checking EC2 instances..." +echo "----------------------------" +PROD_INSTANCE_NAME=${PROD_INSTANCE_NAME:-"MongoDB-production"} +STAGING_INSTANCE_NAME=${STAGING_INSTANCE_NAME:-"MongoDB-staging"} + +check_instance "${PROD_INSTANCE_NAME}" +check_instance "${STAGING_INSTANCE_NAME}" +echo "" + +echo "4. Checking SSM Agent connectivity..." +echo "--------------------------------------" +check_ssm "${PROD_INSTANCE_NAME}" +check_ssm "${STAGING_INSTANCE_NAME}" +echo "" + +echo "5. Checking Ansible playbook files..." +echo "--------------------------------------" +PLAYBOOK_DIR="ansible/playbook" + +if [ -f "${PLAYBOOK_DIR}/mongodb-restore.yml" ]; then + echo -e "${GREEN}✓${NC} Playbook found: mongodb-restore.yml" + ((check_passed++)) +else + echo -e "${RED}✗${NC} Playbook NOT found: mongodb-restore.yml" + ((check_failed++)) +fi + +if [ -f "${PLAYBOOK_DIR}/staging/inventory" ]; then + echo -e "${GREEN}✓${NC} Inventory found: staging/inventory" + ((check_passed++)) +else + echo -e "${RED}✗${NC} Inventory NOT found: staging/inventory" + ((check_failed++)) +fi + +if [ -f "${PLAYBOOK_DIR}/staging/group_vars/mongodb_restore.yml" ]; then + echo -e "${GREEN}✓${NC} Variables found: staging/group_vars/mongodb_restore.yml" + ((check_passed++)) +else + echo -e "${RED}✗${NC} Variables NOT found: staging/group_vars/mongodb_restore.yml" + ((check_failed++)) +fi +echo "" + +echo "=========================================" +echo "Prerequisites Summary" +echo "=========================================" +echo -e "${GREEN}Passed: ${check_passed}${NC}" +echo -e "${RED}Failed: ${check_failed}${NC}" +echo "" + +if [ $check_failed -eq 0 ]; then + echo -e "${GREEN}✓ All prerequisites met!${NC}" + echo "" + echo "Ready to run the playbook:" + echo "" + echo " cd ${PLAYBOOK_DIR}" + echo " ansible-playbook -i staging/inventory mongodb-restore.yml" + echo "" + echo "Or with custom options:" + echo " ansible-playbook -i staging/inventory mongodb-restore.yml -e \"anonymize_data=false\"" + echo "" + exit 0 +else + echo -e "${RED}✗ Some prerequisites are missing${NC}" + echo "" + echo "Please fix the issues above before running the playbook." + echo "" + echo "Common fixes:" + echo " - Install Ansible: pip install ansible" + echo " - Configure AWS: aws configure" + echo " - Check EC2 instance Names match configuration" + echo " - Install SSM Agent on EC2 instances" + echo " - Attach IAM role with AmazonSSMManagedInstanceCore to instances" + echo "" + exit 1 +fi diff --git a/mongodb/README.md b/mongodb/README.md new file mode 100644 index 0000000..ddeeb11 --- /dev/null +++ b/mongodb/README.md @@ -0,0 +1,128 @@ +# MongoDB Setup + +## Database Structure + +- **Database**: `userdb` +- **Collection**: `users` +- **Records**: 10 users with mock PII data + +## PII Data Included + +Each user document contains: +- First Name & Last Name +- Email Address +- Phone Number +- Social Security Number (SSN) +- Date of Birth +- Full Address (street, city, state, zip, country) +- Timestamps (createdAt, updatedAt) + +## Available Scripts + +1. **setup_database.js** - Creates the database with mock PII data +2. **anonymize_data.js** - Simple anonymization (User 1, User 2, etc.) +3. **anonymize_with_hash.js** - Hash-based anonymization (non-reversible) +4. **restore_original_data.js** - Restores original mock PII data + +## Setup Instructions + +### 1. Create Database with Mock PII Data + +```bash +# Connect to MongoDB and run the setup script +mongosh < setup_database.js +``` + +Or run it directly in mongosh: + +```bash +# Connect to MongoDB +mongosh + +# Load and execute the script +load("setup_database.js") +``` + +### 2. Anonymize PII Data + +**Option A: Simple Anonymization** +```bash +# Anonymizes to User 1, User 2, etc. +mongosh < anonymize_data.js +``` + +**Option B: Hash-Based Anonymization** +```bash +# Uses one-way hash functions for consistent anonymization +mongosh < anonymize_with_hash.js +``` + +### 3. Restore Original Data (if needed) + +```bash +# Restores the original mock PII data +mongosh < restore_original_data.js +``` + +### Verify the Setup + +```javascript +// Connect to the database +use userdb; + +// Count documents +db.users.countDocuments(); + + +## Anonymization Details + +### Simple Anonymization (anonymize_data.js) +- Names → "User [ID]" +- Email → "user[ID]@anonymized.local" +- Phone → "+1-XXX-XXX-[ID]" +- SSN → "XXX-XX-[ID]" +- Date of Birth → Year only (YYYY-01-01) +- Address → Redacted values + +### Hash-Based Anonymization (anonymize_with_hash.js) +- Uses one-way hash functions +- Creates consistent but non-reversible values +- Original data cannot be recovered +- Maintains referential consistency +- Stores hash of original email for tracking + +### Comparison + +| Method | Pros | Cons | Use Case | +|--------|------|------|----------| +| Simple | Easy to understand, predictable | Pattern visible | Testing, demos | +| Hash-Based | More realistic, non-reversible | Less predictable | Production-like testing | +``` +// View all users +db.users.find().pretty(); + +// Find a specific user by email +db.users.findOne({ email: "john.smith@email.com" }); + +// View indexes +db.users.getIndexes(); +``` + +## Sample Queries + +```javascript +// Find users by city +db.users.find({ "address.city": "New York" }); + +// Find users born after 1990 +db.users.find({ dateOfBirth: { $gte: new Date("1990-01-01") } }); + +// Update a user's phone number +db.users.updateOne( + { email: "john.smith@email.com" }, + { $set: { phone: "+1-555-9999", updatedAt: new Date() } } +); + +// Delete a user +db.users.deleteOne({ email: "john.smith@email.com" }); +``` diff --git a/mongodb/anonymize_data.js b/mongodb/anonymize_data.js new file mode 100644 index 0000000..9f47173 --- /dev/null +++ b/mongodb/anonymize_data.js @@ -0,0 +1,50 @@ +// MongoDB Data Anonymization Script +// Run this with: mongosh < anonymize_data.js +// This script anonymizes all PII data in the users collection + +// Switch to the database +use userdb; + +print("\n🔒 Starting data anonymization...\n"); + +// Get all users +const users = db.users.find().toArray(); +let anonymizedCount = 0; + +// Anonymize each user +users.forEach((user, index) => { + const anonymizedData = { + firstName: `User`, + lastName: `${user.userId}`, + email: `user${user.userId}@anonymized.local`, + phone: `+1-XXX-XXX-${String(user.userId).padStart(4, '0')}`, + ssn: `XXX-XX-${String(user.userId).padStart(4, '0')}`, + dateOfBirth: new Date(`${user.dateOfBirth.getFullYear()}-01-01`), // Keep year, anonymize day/month + address: { + street: `${user.userId} Anonymous Street`, + city: "Redacted City", + state: "XX", + zipCode: "00000", + country: "Redacted" + }, + updatedAt: new Date() + }; + + db.users.updateOne( + { userId: user.userId }, + { $set: anonymizedData } + ); + + anonymizedCount++; + print(`✓ Anonymized user ${user.userId} (${user.firstName} ${user.lastName} → ${anonymizedData.firstName} ${anonymizedData.lastName})`); +}); + +print(`\n✅ Successfully anonymized ${anonymizedCount} user records`); +print("\nAnonymization applied to:"); +print(" - First/Last Names → User [ID]"); +print(" - Email → user[ID]@anonymized.local"); +print(" - Phone → +1-XXX-XXX-[ID]"); +print(" - SSN → XXX-XX-[ID]"); +print(" - Date of Birth → [Year]-01-01"); +print(" - Address → Redacted/Anonymous values"); +print("\nTo verify, run: db.users.find().pretty()"); diff --git a/mongodb/setup_database.js b/mongodb/setup_database.js new file mode 100644 index 0000000..ca22aa2 --- /dev/null +++ b/mongodb/setup_database.js @@ -0,0 +1,204 @@ +// MongoDB Database Setup Script +// Run this with: mongosh < setup_database.js + +// Switch to the database (creates it if doesn't exist) +use userdb; + +// Drop the collection if it exists (for clean setup) +db.users.drop(); + +// Create users collection with mock PII data +db.users.insertMany([ + { + userId: 1, + firstName: "John", + lastName: "Smith", + email: "john.smith@email.com", + phone: "+1-555-0101", + ssn: "123-45-6789", + dateOfBirth: new Date("1985-03-15"), + address: { + street: "123 Main Street", + city: "New York", + state: "NY", + zipCode: "10001", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 2, + firstName: "Sarah", + lastName: "Johnson", + email: "sarah.johnson@email.com", + phone: "+1-555-0102", + ssn: "234-56-7890", + dateOfBirth: new Date("1990-07-22"), + address: { + street: "456 Oak Avenue", + city: "Los Angeles", + state: "CA", + zipCode: "90001", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 3, + firstName: "Michael", + lastName: "Williams", + email: "michael.williams@email.com", + phone: "+1-555-0103", + ssn: "345-67-8901", + dateOfBirth: new Date("1988-11-30"), + address: { + street: "789 Pine Road", + city: "Chicago", + state: "IL", + zipCode: "60601", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 4, + firstName: "Emily", + lastName: "Brown", + email: "emily.brown@email.com", + phone: "+1-555-0104", + ssn: "456-78-9012", + dateOfBirth: new Date("1992-05-18"), + address: { + street: "321 Elm Street", + city: "Houston", + state: "TX", + zipCode: "77001", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 5, + firstName: "David", + lastName: "Garcia", + email: "david.garcia@email.com", + phone: "+1-555-0105", + ssn: "567-89-0123", + dateOfBirth: new Date("1987-09-25"), + address: { + street: "654 Maple Drive", + city: "Phoenix", + state: "AZ", + zipCode: "85001", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 6, + firstName: "Jessica", + lastName: "Martinez", + email: "jessica.martinez@email.com", + phone: "+1-555-0106", + ssn: "678-90-1234", + dateOfBirth: new Date("1995-01-12"), + address: { + street: "987 Cedar Lane", + city: "Philadelphia", + state: "PA", + zipCode: "19101", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 7, + firstName: "Robert", + lastName: "Anderson", + email: "robert.anderson@email.com", + phone: "+1-555-0107", + ssn: "789-01-2345", + dateOfBirth: new Date("1983-12-08"), + address: { + street: "147 Birch Court", + city: "San Antonio", + state: "TX", + zipCode: "78201", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 8, + firstName: "Jennifer", + lastName: "Taylor", + email: "jennifer.taylor@email.com", + phone: "+1-555-0108", + ssn: "890-12-3456", + dateOfBirth: new Date("1991-04-20"), + address: { + street: "258 Spruce Way", + city: "San Diego", + state: "CA", + zipCode: "92101", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 9, + firstName: "Christopher", + lastName: "Thomas", + email: "christopher.thomas@email.com", + phone: "+1-555-0109", + ssn: "901-23-4567", + dateOfBirth: new Date("1989-08-14"), + address: { + street: "369 Willow Street", + city: "Dallas", + state: "TX", + zipCode: "75201", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + }, + { + userId: 10, + firstName: "Amanda", + lastName: "Moore", + email: "amanda.moore@email.com", + phone: "+1-555-0110", + ssn: "012-34-5678", + dateOfBirth: new Date("1994-02-28"), + address: { + street: "741 Hickory Avenue", + city: "San Jose", + state: "CA", + zipCode: "95101", + country: "USA" + }, + createdAt: new Date(), + updatedAt: new Date() + } +]); + +// Create an index on email for faster lookups +db.users.createIndex({ email: 1 }, { unique: true }); + +// Create an index on userId +db.users.createIndex({ userId: 1 }, { unique: true }); + +// Print confirmation +print("\n✓ Database 'userdb' created"); +print("✓ Collection 'users' created with 10 records"); +print("✓ Indexes created on 'email' and 'userId'"); +print("\nTo verify, run: db.users.countDocuments()"); diff --git a/scripts/setup_mongodb_userdata.sh b/scripts/setup_mongodb_userdata.sh new file mode 100644 index 0000000..8cc17e6 --- /dev/null +++ b/scripts/setup_mongodb_userdata.sh @@ -0,0 +1,42 @@ +sudo bash <<'SCRIPT' +# Setup MongoDB data volume +DATA_DEVICE="/dev/nvme1n1" + +# Format the volume with XFS +mkfs.xfs $DATA_DEVICE + +# Create mount point +mkdir -p /data/mongodb + +# Mount the volume +mount $DATA_DEVICE /data/mongodb + +# Add to fstab +UUID=$(blkid -s UUID -o value $DATA_DEVICE) +echo "UUID=$UUID /data/mongodb xfs defaults,nofail 0 2" >> /etc/fstab + +# Add MongoDB repository +cat > /etc/yum.repos.d/mongodb-org-7.0.repo <<'REPO' +[mongodb-org-7.0] +name=MongoDB Repository +baseurl=https://repo.mongodb.org/yum/redhat/9/mongodb-org/7.0/x86_64/ +gpgcheck=1 +enabled=1 +gpgkey=https://pgp.mongodb.com/server-7.0.asc +REPO + +# Install MongoDB +yum install -y mongodb-org + +# Set ownership +chown -R mongod:mongod /data/mongodb + +# Configure MongoDB +sed -i 's/bindIp: 127.0.0.1/bindIp: 0.0.0.0/' /etc/mongod.conf +sed -i 's|dbPath: /var/lib/mongo|dbPath: /data/mongodb|' /etc/mongod.conf + +# Start MongoDB +systemctl start mongod +systemctl enable mongod +systemctl status mongod +SCRIPT \ No newline at end of file diff --git a/terraform/modules/ec2/main.tf b/terraform/modules/ec2/main.tf index 744a75a..a3dff4a 100644 --- a/terraform/modules/ec2/main.tf +++ b/terraform/modules/ec2/main.tf @@ -39,6 +39,8 @@ resource "aws_instance" "mongodb" { key_name = var.key_name vpc_security_group_ids = [aws_security_group.mongodb.id] + + user_data_replace_on_change = true root_block_device { volume_size = var.root_volume_size