diff --git a/.databricks/commit_outputs b/.databricks/commit_outputs new file mode 100644 index 0000000..c63b073 --- /dev/null +++ b/.databricks/commit_outputs @@ -0,0 +1,16 @@ + +# .databricks/commit_outputs file is used to control whether a notebook's output should be included when committing changes to Git. +# Outputs are included/excluded at a per notebook level, committing select outputs within a specific notebook is not feasible. +# +# Reference: +# each line in .databricks/commit_outputs represents a glob pattern +# line starting with # represents a comment +# notebook that matches a glob pattern will have output included +# line starting with ! represents a exclusion pattern (i.e. !folder_a/\* will exclude output for all notebooks in folder_a) +# if a notebook path matches multiple glob patterns, the last matching glob pattern will be used +# note that glob patterns are case sensitive +# more examples can be found here: https://docs.databricks.com/repos/repos-setup.html#patterns-for-a-repo-config-file + +# Uncomment the following pattern to include output for all notebooks +# ** + \ No newline at end of file diff --git a/DE_Bronze/Support Ticket 2026-01-25.ipynb b/DE_Bronze/Support Ticket 2026-01-25.ipynb new file mode 100644 index 0000000..bb6773b --- /dev/null +++ b/DE_Bronze/Support Ticket 2026-01-25.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "e133225d-daf5-4fd9-8a8b-39607eb6aad8", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "spark.read.format(\"csv\").load(\"/Volumes/de_use_cases/support_tickets/support_tickets_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "89e19a3b-6c44-47a4-8129-11493db2b9c7", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df = spark.read.format(\"csv\").load(\"/Volumes/de_use_cases/support_tickets/support_tickets_volume\")\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d801d834-febb-4194-ab63-439936717660", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Cell 3" + } + }, + "outputs": [], + "source": [ + "df = spark.read.format(\"csv\")\\\n", + " .option(\"header\", \"true\")\\\n", + " .load(\"/Volumes/de_use_cases/support_tickets/support_tickets_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c8819041-dcf6-4e84-b951-85bba991fc4a", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "0cfbe491-6aa1-4aa2-9c80-cd5a8a704f61", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df = spark.read.format(\"csv\")\\\n", + " .option(\"header\", \"true\")\\\n", + " .option(\"inferSchema\", \"true\")\\\n", + " .load(\"/Volumes/de_use_cases/support_tickets/support_tickets_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "eb48ec44-0ebf-4c79-9140-6d4ce56010e6", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Cell 6" + } + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StructType, StructField, StringType, DateType\n", + "schema = StructType([\n", + " StructField(\"ticket_id\", StringType(), True),\n", + " StructField(\"description\", StringType(), True),\n", + " StructField(\"status\", StringType(), True),\n", + " StructField(\"created_date\", DateType(), True)\n", + "])\n", + "df = spark.read.format(\"csv\")\\\n", + " .option(\"header\", \"true\")\\\n", + " .schema(schema)\\\n", + " .load(\"/Volumes/de_use_cases/support_tickets/support_tickets_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "5ab20122-5e65-49db-8867-d9a972fcd9f5", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "display(df)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "4" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 4 + }, + "notebookName": "Support Ticket 2026-01-25", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/DE_Bronze/Testing myname in job 2026-01-25.ipynb b/DE_Bronze/Testing myname in job 2026-01-25.ipynb new file mode 100644 index 0000000..34c8cb5 --- /dev/null +++ b/DE_Bronze/Testing myname in job 2026-01-25.ipynb @@ -0,0 +1,48 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6fb1bed7-f8b1-4898-888f-be65e19bead3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "print(\"Hello Jagadeesh Reddy\")\n", + "print(\"Here am running below code for you :)\")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "4" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 4 + }, + "notebookName": "Testing myname in job 2026-01-25", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/DE_Bronze/Vendor Orders 2026-01-25.ipynb b/DE_Bronze/Vendor Orders 2026-01-25.ipynb new file mode 100644 index 0000000..86a1a3b --- /dev/null +++ b/DE_Bronze/Vendor Orders 2026-01-25.ipynb @@ -0,0 +1,434 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "657ebee4-d36b-4970-be69-3227c82ffe10", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df = spark.read.format(\"csv\").load(\"/Volumes/de_use_cases_UC/vendor_dirty_schema/vendor_dirty_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "a23187df-b0e1-476f-8014-69d2b6e12bae", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df = spark.read.format(\"csv\").option(\"header\",\"true\").load(\"/Volumes/de_use_cases_UC/vendor_dirty_schema/vendor_dirty_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f069d6b0-bb2b-4435-ac66-0410e32e5659", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "d932f707-fc3e-4abb-91cc-d51a7bee4160", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "b734d3db-cd41-4797-81eb-48655dafb75a", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df = spark.read.format(\"csv\").option(\"header\",\"true\")\\\n", + " .option(\"inferSchema\",\"true\").load(\"/Volumes/de_use_cases_UC/vendor_dirty_schema/vendor_dirty_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "81e6cd0f-8a9f-4841-a73d-b89c8b652baa", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Cell 6" + } + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType\n", + "schema = StructType([\n", + " StructField(\"order_id\", IntegerType(), True),\n", + " StructField(\"amount\", IntegerType(), True),\n", + " StructField(\"currency\", StringType(), True),\n", + " StructField(\"order_date\", DateType(), True)\n", + "])\n", + "df = spark.read.format(\"csv\")\\\n", + " .option(\"header\",\"true\")\\\n", + " .schema(schema)\\\n", + " .load(\"/Volumes/de_use_cases_UC/vendor_dirty_schema/vendor_dirty_volume\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "97209731-faec-4372-89a1-664cf0edd018", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType\n", + "from pyspark.sql.functions import to_date, date_format, col, coalesce\n", + "schema = StructType([\n", + " StructField(\"order_id\", IntegerType(), True),\n", + " StructField(\"amount\", IntegerType(), True),\n", + " StructField(\"currency\", StringType(), True),\n", + " StructField(\"order_date\", DateType(), True)\n", + "])\n", + "df = spark.read.format(\"csv\")\\\n", + " .option(\"header\",\"true\")\\\n", + " .schema(schema)\\\n", + " .load(\"/Volumes/de_use_cases_UC/vendor_dirty_schema/vendor_dirty_volume\")\n", + "\n", + "df = df.withColumn(\n", + " \"order_date\",\n", + " coalesce(\n", + " to_date(col(\"order_date\"), \"yyyy-MM-dd\"),\n", + " to_date(col(\"order_date\"), \"yyyy/MM/dd\"),\n", + " to_date(col(\"order_date\"), \"dd-MM-yyyy\")\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "b757c459-fa9b-4ca6-8038-a342f627eaf6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "from pyspark.sql.types import StructType, StructField, StringType, IntegerType\n", + "from pyspark.sql.functions import to_date, col, coalesce\n", + "\n", + "schema = StructType([\n", + " StructField(\"order_id\", IntegerType(), True),\n", + " StructField(\"amount\", IntegerType(), True),\n", + " StructField(\"currency\", StringType(), True),\n", + " StructField(\"order_date\", StringType(), True) # 👈 MUST be StringType\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "07bb0d78-701b-4dbc-beff-395fa0cc5380", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df = spark.read.format(\"csv\") \\\n", + " .option(\"header\", \"true\") \\\n", + " .schema(schema) \\\n", + " .load(\"/Volumes/de_use_cases_UC/vendor_dirty_schema/vendor_dirty_volume\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "2d6a657e-686c-4da5-b592-1f9fda995cb5", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Cell 10" + } + }, + "outputs": [], + "source": [ + "from pyspark.sql.functions import when, col, to_date\n", + "\n", + "df = df.withColumn(\n", + " \"order_date\",\n", + " when(col(\"order_date\").rlike(\"^[0-9]{4}-[0-9]{2}-[0-9]{2}$\"),\n", + " to_date(col(\"order_date\"), \"yyyy-MM-dd\"))\n", + " .when(col(\"order_date\").rlike(\"^[0-9]{4}/[0-9]{2}/[0-9]{2}$\"),\n", + " to_date(col(\"order_date\"), \"yyyy/MM/dd\"))\n", + " .when(col(\"order_date\").rlike(\"^[0-9]{2}-[0-9]{2}-[0-9]{4}$\"),\n", + " to_date(col(\"order_date\"), \"dd-MM-yyyy\"))\n", + ")\n", + "\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "aac23a6a-9db0-4a74-9195-8fdc4fed8aed", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "from pyspark.sql.functions import upper, when, col\n", + "\n", + "df = df.withColumn(\n", + " \"currency\",\n", + " when(col(\"currency\") == \"usd\", \"USD\")\n", + " .when(col(\"currency\") == \"aed\", \"AED\")\n", + " .otherwise(col(\"currency\"))\n", + ")\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "944cd61f-495f-41c9-9b10-f39b982cc334", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "df = df.fillna(0)\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "657b786d-9196-4eda-bd31-ad8f9bea58ff", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Register df as temp view" + } + }, + "outputs": [], + "source": [ + "df.createOrReplaceTempView(\"df\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "implicitDf": true, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "441a27c2-da4f-4b90-aeea-16db8a82b701", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Cell 9" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "select distinct currency from df" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "84f57cae-0637-4082-ad32-7946c3bdb346", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "sum_amount = df.groupBy(\"currency\").agg({\"amount\": \"sum\"})\n", + "display(sum_amount)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "b7f44a08-ea7b-498d-8a80-362560ef04de", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Cell 16" + } + }, + "outputs": [], + "source": [ + "df.write.mode(\"overwrite\").saveAsTable(\"de_use_cases_UC.vendor_dirty_schema.tbl_vendor_dirty\")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "4" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "mostRecentlyExecutedCommandWithImplicitDF": { + "commandId": 5543925676959252, + "dataframes": [ + "_sqldf" + ] + }, + "pythonIndentUnit": 4 + }, + "notebookName": "Vendor Orders 2026-01-25", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/Diet Plan.pdf b/Diet Plan.pdf new file mode 100644 index 0000000..9c96db7 Binary files /dev/null and b/Diet Plan.pdf differ diff --git a/Untitled Notebook 2026-01-18 13_49_46.ipynb b/Untitled Notebook 2026-01-18 13_49_46.ipynb new file mode 100644 index 0000000..8d60ea1 --- /dev/null +++ b/Untitled Notebook 2026-01-18 13_49_46.ipynb @@ -0,0 +1,47 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "afccebd7-e7f1-4965-aced-6aa48503fbd1", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "print(\"Jagadeesh Reddy\")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "4" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 4 + }, + "notebookName": "Untitled Notebook 2026-01-18 13_49_46", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}