diff --git a/examples/fabric/01-setting_up_zingg.ipynb b/examples/fabric/01-setting_up_zingg.ipynb new file mode 100644 index 000000000..4f4ee1f3f --- /dev/null +++ b/examples/fabric/01-setting_up_zingg.ipynb @@ -0,0 +1,578 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Part 1 : Setting Up Zingg\n", + "## It is responsible for initializing the Zingg environment, which includes the following steps:\n", + "- **Environment Setup:** Loads all necessary libraries and dependencies required for Zingg to run.\n", + "- **Path Setup:** Defines and sets up all relevant file paths, such as model directory, input data locations, output directories.\n", + "- **Performance Tuning:** Applies Spark and Zingg performance-related configurations to optimize the execution of data processing tasks." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "0dc3f64b-c76b-4b4b-944d-52c993fbe873" + }, + { + "cell_type": "markdown", + "source": [ + "## Example Notebook For Training and Running Zingg Entity Resolution Workflow on Fabric\n", + "This notebook runs the Zingg Febrl Example on Fabric. Please refer to the\n", + "\n", + "- Zingg Python API\n", + "- Zingg Official Documentation for details.\n", + "\n", + "_This notebook has been tested on Runtime 1.3 version (Spark 3.5)_\n", + "\n", + "## Create a environment and install Zingg\n", + "# \n", + "- Go to the Clusters tab, hit Create Cluster, and give it a name like “Zingg.”\n", + "- Set the runtime version to a current LTS (Long-Term Support) version for compatibility.\n", + "- Next, you’ll need to install Zingg. For this, we will be need the latest Zingg JAR file.\n", + "- You need to go to the Environment tab and click on the New Environment button. You can name the new Environment \"Zingg Environment\".\n", + "- Visit [Zingg releases](https://github.com/zinggAI/zingg/releases), find the latest version of Zingg, and download the tar file and Extract the jar file from the newly downloaded tar file.\n", + "- you need to open the Environment, that you have created earlier. Then go to the custom library and upload the jar file there.\n", + "-Now you need to go back to the Environment “Zingg Environment”, click the “new item” button and select Lakehouse inside of it.\n", + "- Zingg supports multiple file formats like CSV, Parquet, or JSON. For this example, let’s use a CSV file. You now need to go inside the Lakehouse, click on “Get data,” and upload the CSV file.\n", + "- Save and Publish the files\n", + "\n", + "### Please execute each cell one by one as per the instructions provided.\n" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "d4f49099-039b-468a-95b2-5de71b0b73cd" + }, + { + "cell_type": "markdown", + "source": [ + "## Install Zingg\n", + "## Check if all the Zingg wheels are installed correctly" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "6afaa74f-a348-4416-a7d4-3b427fbd09be" + }, + { + "cell_type": "code", + "source": [ + "pip install zingg" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "4dddda43-3901-445e-a0ea-b84b903bb493" + }, + { + "cell_type": "code", + "source": [ + "!pip show zingg" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "ea2d3e54-9adf-4a62-8570-d1ff1b46c51f" + }, + { + "cell_type": "markdown", + "source": [ + "## Set Checkpoint directory" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "1fb07f68-037b-4806-9d1f-7286c597a0af" + }, + { + "cell_type": "code", + "source": [ + "spark.sparkContext.setCheckpointDir(\"Files/checkpoint\")" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "8a1cc235-bd84-4766-be00-05d9c0a9b23c" + }, + { + "cell_type": "markdown", + "source": [ + "## Import the required libraries" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "8a506714-0bde-4920-bbaa-711f5aee6841" + }, + { + "cell_type": "code", + "source": [ + "## Import necessary libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "from ipywidgets import widgets, interact, GridspecLayout\n", + "import base64\n", + "import pyspark.sql.functions as fn\n", + "\n", + "\n", + "# Zingg libraries\n", + "from zingg.client import *\n", + "from zingg.pipes import *\n", + "\n", + "# Function to count labeled pairs\n", + "def count_labeled_pairs(marked_pd):\n", + " '''\n", + " The purpose of this function is to count the labeled pairs in the marked folder.\n", + " '''\n", + " n_total = len(np.unique(marked_pd['z_cluster']))\n", + " n_positive = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 1]['z_cluster']))\n", + " n_negative = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 0]['z_cluster']))\n", + " n_uncertain = len(np.unique(marked_pd[marked_pd['z_isMatch'] == 2]['z_cluster']))\n", + "\n", + " return n_positive, n_negative, n_uncertain, n_total\n", + "\n", + "# Setup interactive widget\n", + "available_labels = {\n", + " 'No Match': 0,\n", + " 'Match': 1,\n", + " 'Uncertain': 2\n", + "}" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "cee4aa35-0654-4e2e-b567-fc06099fff0e" + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "## Define locations for the model\n", + "The Zingg models and training data are persisted in storage.\n", + "\n", + "Please edit the model id in the cell below to reflect your model." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "id": "ab8a42bd-b382-4a79-b586-cfc4595f7457" + }, + { + "cell_type": "code", + "source": [ + "##you can change these to the locations of your choice\n", + "##these are the only two settings that need to change\n", + "zinggDir = \"abfss://ACD@onelake.dfs.fabric.microsoft.com/newlk.Lakehouse/Files/models\"\n", + "modelId = \"oss11Dec\"\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "1759bfa9-c7dc-47a1-829b-0fc3fce1fa5a" + }, + { + "cell_type": "markdown", + "source": [ + "## Set the Directories path" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "id": "e3255ef3-fb69-461f-ae24-3f54481a9578" + }, + { + "cell_type": "code", + "source": [ + "## Define constants\n", + "MARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/marked/\"\n", + "UNMARKED_DIR = zinggDir + \"/\" + modelId + \"/trainingData/unmarked/\"\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "06799675-549b-402d-a50f-16eed643eb99" + }, + { + "cell_type": "markdown", + "source": [ + "## Start building the Zingg program\n", + "The following cell sets up the initial arguments for Zingg." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + } + }, + "id": "ede4efff-293f-4b74-9b7f-ad55ea9d4f0d" + }, + { + "cell_type": "code", + "source": [ + "#build the arguments for zingg\n", + "args = Arguments()\n", + "# Set the modelid and the zingg dir. You can use this as is\n", + "args.setModelId(modelId)\n", + "args.setZinggDir(zinggDir)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "abd427f3-44cb-400b-ab5d-cdd86aa68d8c" + }, + { + "cell_type": "markdown", + "source": [ + "## Performance settings\n", + "The numPartitions define how data is split across the cluster. Please change this as per your data and cluster size \n", + "\n", + "For details, refer to [Zingg performance tuning documentation](https://docs.zingg.ai/latest/stepbystep/configuration/tuning-label-match-and-link-jobs).\n", + "In general:\n", + "- keep `numPartitions` to ~20-30x the worker vCPU count \n", + "- Disable Spark's Adaptive Query Execution\n", + "\n", + "__NOTE__: *Please modify this for your use case*" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "e86e55fd-1783-4576-bd7f-e12993cb174d" + }, + { + "cell_type": "code", + "source": [ + "args.setNumPartitions(4)\n", + "args.setLabelDataSampleSize(0.4)\n" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "e504700c-ae16-436f-bd55-53b59d171c13" + }, + { + "cell_type": "code", + "source": [ + "spark.conf.set(\"spark.sql.adaptive.enabled\", False)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "6b38d8ff-9841-4a18-b46a-f526c788c9ff" + }, + { + "cell_type": "markdown", + "source": [ + "## Define the input\n", + "Please refer to [Pipes](https://docs.zingg.ai/latest/connectors/pipes) for details on different formats.\n", + "\n", + "Please modify this for your data." + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "d5007a45-0d1a-410c-a2d8-2d700d085f27" + }, + { + "cell_type": "code", + "source": [ + "# Import pandas\n", + "import pandas as pd\n", + "\n", + "# Define the schema (optional for validation)\n", + "schema = [\"id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\", \"areacode\", \"state\", \"dob\", \"ssn\"]\n", + "\n", + "# Load the CSV file\n", + "data = pd.read_csv(\"abfss://ACD@onelake.dfs.fabric.microsoft.com/newlk.Lakehouse/Files/test.csv\",header=None)\n", + "\n", + "# Ensure column names match the schema\n", + "data.columns = schema # Adjust only if the file's column names differ\n", + "\n", + "# Display the data\n", + "data.head()" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "b2f1af14-da2e-4eea-9791-4955fe4a50e9" + }, + { + "cell_type": "code", + "source": [ + "schema = \"rec_id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string\"\n", + "inputPipe = CsvPipe(\"inputpipe\", \"abfss://ACD@onelake.dfs.fabric.microsoft.com/newlk.Lakehouse/Files/test.csv\", schema)\n", + "\n", + "args.setData(inputPipe)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "016f8f6f-a65e-4c18-a8d5-0760015f98cd" + }, + { + "cell_type": "markdown", + "source": [ + "# Configure the output\n", + "Here we configure the output to be a csv, but similar to the input above, the output can be a file format like parquet or delta or a data store like MySQL\n", + "\n", + "**Please modify this for your data.**" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "7f4cdac7-9652-4c1a-8e10-cc0ed55b8612" + }, + { + "cell_type": "code", + "source": [ + "#setting outputpipe in 'args'\n", + "output_path = \"abfss://ACD@onelake.dfs.fabric.microsoft.com/newlk.Lakehouse/Files/ossOutput\"+modelId\n", + "outputPipe = CsvPipe(\"resultOutput\", output_path)\n", + "args.setOutput(outputPipe)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "2772b54c-0731-4d28-9c2b-e161c079d919" + }, + { + "cell_type": "markdown", + "source": [ + "# Define the match fields and their types\n", + "\n", + "The cell below is used to configure Zingg with the fields for use in matching and the match types.\n", + "Details on the field definitions can be found at [Zingg official docs](https://docs.zingg.ai/latest)\n", + "\n", + "**Please modify this for your data.**" + ], + "metadata": { + "nteract": { + "transient": { + "deleting": false + } + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "e0c0c383-f5ee-4707-90fc-f7fa9fd17fe3" + }, + { + "cell_type": "code", + "source": [ + "# Set field definitions\n", + "rec_id = FieldDefinition(\"rec_id\", \"string\", MatchType.DONT_USE) \n", + "fname = FieldDefinition(\"fname\", \"string\", MatchType.FUZZY) # First Name\n", + "lname = FieldDefinition(\"lname\", \"string\", MatchType.FUZZY) # Last Name\n", + "stNo = FieldDefinition(\"stNo\", \"string\", MatchType.FUZZY) # Street Number\n", + "add1 = FieldDefinition(\"add1\", \"string\", MatchType.FUZZY) # Address Line 1\n", + "add2 = FieldDefinition(\"add2\", \"string\", MatchType.FUZZY) # Address Line 2\n", + "city = FieldDefinition(\"city\", \"string\", MatchType.FUZZY) # City\n", + "areacode = FieldDefinition(\"areacode\", \"string\", MatchType.FUZZY) # areacode\n", + "state = FieldDefinition(\"state\", \"string\", MatchType.FUZZY) # State\n", + "dob = FieldDefinition(\"dob\", \"string\", MatchType.EXACT) # Date of Birth (prefer exact match)\n", + "ssn = FieldDefinition(\"ssn\", \"string\", MatchType.EXACT) # SSN (should use exact match)\n", + "\n", + "# Create the field definitions list\n", + "fieldDefs = [rec_id, fname, lname, stNo, add1, add2, city, areacode, state, dob, ssn]\n", + "\n", + "# Set field definitions in args\n", + "args.setFieldDefinition(fieldDefs)" + ], + "outputs": [], + "execution_count": null, + "metadata": { + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark" + } + }, + "id": "be245be2-bbff-4a98-a899-2907a4641e76" + } + ], + "metadata": { + "kernel_info": { + "name": "synapse_pyspark" + }, + "kernelspec": { + "name": "synapse_pyspark", + "display_name": "synapse_pyspark" + }, + "language_info": { + "name": "python" + }, + "microsoft": { + "language": "python", + "language_group": "synapse_pyspark", + "ms_spell_check": { + "ms_spell_check_language": "en" + } + }, + "nteract": { + "version": "nteract-front-end@1.0.0" + }, + "spark_compute": { + "compute_id": "/trident/default", + "session_options": { + "conf": { + "spark.synapse.nbs.session.timeout": "1200000" + } + } + }, + "dependencies": { + "lakehouse": { + "known_lakehouses": [ + { + "id": "9431468e-7392-49f7-972e-854b427cc833" + }, + { + "id": "e12962eb-1b6b-4d11-bef0-b23c2565587f" + } + ], + "default_lakehouse": "e12962eb-1b6b-4d11-bef0-b23c2565587f", + "default_lakehouse_name": "newlk", + "default_lakehouse_workspace_id": "41811c91-fcf2-4aba-8980-4c64b90166ad" + }, + "environment": { + "environmentId": "e9fdac13-03c9-4d44-befe-d7ee9618b120", + "workspaceId": "41811c91-fcf2-4aba-8980-4c64b90166ad" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/fabric/02-label_training_data.ipynb b/examples/fabric/02-label_training_data.ipynb new file mode 100644 index 000000000..38aee2301 --- /dev/null +++ b/examples/fabric/02-label_training_data.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","source":["# Part 2: FindTrainingData and Label Phase\n","## We have completed setting up Zingg in the previous step. In this part, we will run the **_FindTrainingData_** and **_Label_** phases. \n","This involves generating candidate record pairs for training, presenting them for manual labeling, and saving the labeled data for use in model training. This step is essential for building a high-quality training dataset for entity resolution."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"31dff71f-1168-4472-a2ef-462647f21e27"},{"cell_type":"markdown","source":["## Run Setup Zingg"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"5bca831e-6962-45fa-8d41-dbff018987ca"},{"cell_type":"code","source":["%run 01-setting_up_zingg"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"269f743d-8542-44a7-931b-a276674a890e"},{"cell_type":"markdown","source":["## Performance setting\n","The numPartitions define how data is split across the cluster. \n","\n"," Please change the fllowing as per your data and cluster size by referring to the docs."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"dd722f1a-e634-407a-9295-a2aa08be6739"},{"cell_type":"code","source":["\n","args.setNumPartitions(4)\n","args.setLabelDataSampleSize(0.4)"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e27da579-374e-400d-8ce0-2ca8dd4f7c72"},{"cell_type":"markdown","source":["## Finding Records For Training Set Creation\n","Zingg uses Active Learning to accumulate training \n","\n","__NOTE__: *Iterate through the following steps to label a few rounds of pairs*"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"2f31ac47-1572-4179-a75a-ae322463c43f"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"findTrainingData\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"cbeb3ca0-a5dc-41e0-91c8-fbcc1d1d6cc7"},{"cell_type":"markdown","source":["## Select pairs for user labeling\n","\n","No change is needed in the cell below."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"0645b964-b8ab-44d9-a45b-d48c52d969d3"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"label\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.init()"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"e03fa8c6-9c13-4545-bb84-3d26888a60aa"},{"cell_type":"markdown","source":["## See if we have records for labeling\n","\n","No change is needed to the cell below."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"df6e2abd-229a-4b32-a50b-77b7f5bb6110"},{"cell_type":"code","source":["# get candidate pairs\n","candidate_pairs_pd = getPandasDfFromDs(zingg.getUnmarkedRecords())\n"," \n","# if no candidate pairs, run job and wait\n","if candidate_pairs_pd.shape[0] == 0:\n"," print('No unlabeled candidate pairs found. Run findTraining job ...')\n","\n","else:\n"," # get list of pairs (as identified by z_cluster) to label \n"," z_clusters = list(np.unique(candidate_pairs_pd['z_cluster'])) \n","\n"," # print candidate pair stats\n"," print('{0} candidate pairs found for labeling'.format(len(z_clusters)))"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"27128fe7-49b7-41cb-88a1-f5d48597e2f7"},{"cell_type":"markdown","source":["## Label the pairs\n","\n","A drop down widget is shown which will let the user mark matching, non matching and unsure pairs.\n","\n","No change is needed in the cell below."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"65fee170-bf68-48b7-b8c5-aa87b97e804c"},{"cell_type":"code","source":["# Label Training Set\n","\n","# define variable to avoid duplicate saves\n","ready_for_save = False\n","\n","# user-friendly labels and corresponding zingg numerical value\n","# (the order in the dictionary affects how displayed below)\n","LABELS = {\n"," 'Uncertain':2,\n"," 'Match':1,\n"," 'No Match':0 \n"," }\n","\n","# GET CANDIDATE PAIRS\n","\n","n_pairs = int(candidate_pairs_pd.shape[0]/2)\n","\n","# DEFINE IPYWIDGET DISPLAY\n","# ========================================================\n","display_pd = candidate_pairs_pd.drop(\n"," labels=[\n"," 'z_zid', 'z_prediction', 'z_score', 'z_isMatch', 'z_zsource'\n"," ], \n"," axis=1)\n","\n","# define header to be used with each displayed pair\n","html_prefix = \"

\"\n","html_suffix = \"

\"\n","header = widgets.HTML(value=f\"{html_prefix}\" + \"
\".join([str(i)+\"  \" for i in display_pd.columns.to_list()]) + f\"
{html_suffix}\")\n","\n","# initialize display\n","vContainers = []\n","vContainers.append(widgets.HTML(value=f'

Indicate if each of the {n_pairs} record pairs is a match or not

'))\n","\n","# for each set of pairs\n","for n in range(n_pairs):\n","\n"," # get candidate records\n"," candidate_left = display_pd.loc[2*n].to_list()\n","\n"," candidate_right = display_pd.loc[(2*n)+1].to_list()\n","\n","\n"," # define grid to hold values\n"," html = ''\n","\n"," for i in range(display_pd.shape[1]):\n","\n"," # get column name\n"," column_name = display_pd.columns[i]\n","\n"," # if field is image\n"," if column_name == 'image_path':\n","\n"," # define row header\n"," html += ''\n"," html += 'image'\n","\n"," # read left image to encoded string\n"," l_endcode = ''\n"," if candidate_left[i] != '':\n"," with open(candidate_left[i], \"rb\") as l_file:\n"," l_encode = base64.b64encode( l_file.read() ).decode()\n","\n"," # read right image to encoded string\n"," r_encode = ''\n"," if candidate_right[i] != '':\n"," with open(candidate_right[i], \"rb\") as r_file:\n"," r_encode = base64.b64encode( r_file.read() ).decode() \n","\n"," # present images\n"," html += f''\n"," html += f''\n"," html += ''\n","\n"," elif column_name != 'image_path': # display text values\n","\n"," if column_name == 'z_cluster': z_cluster = candidate_left[i]\n","\n"," html += ''\n"," html += f'{column_name}'\n"," html += f'{str(candidate_left[i])}'\n"," html += f'{str(candidate_right[i])}'\n"," html += ''\n","\n"," # insert data table\n"," table = widgets.HTML(value=f''+html+'
')\n"," z_cluster = None\n","\n"," # assign label options to pair\n"," label = widgets.ToggleButtons(\n"," options=LABELS.keys(), \n"," button_style='info'\n"," )\n","\n"," # define blank line between displayed pair and next\n"," blankLine=widgets.HTML(value='
')\n","\n"," # append pair, label and blank line to widget structure\n"," vContainers.append(widgets.VBox(children=[table, label, blankLine]))\n","\n","\n","display(widgets.VBox(children=vContainers))\n","# ========================================================\n","\n","# mark flag to allow save \n","ready_for_save = True"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"0d071c12-8d25-4be6-a50b-78c511a812fc"},{"cell_type":"markdown","source":["## Save all the labels provided by the user\n","#### Recommendation: 40+ matches and 40+ non-matches, though more will be better\n","No change is needed to the cell below."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"8f1393dd-e543-4a49-8023-402b72a2b515"},{"cell_type":"code","source":["if not ready_for_save:\n"," print('No labels have been assigned. Run the previous cell to create candidate pairs and assign labels to them before re-running this cell.')\n","\n","else:\n","\n"," # ASSIGN LABEL VALUE TO CANDIDATE PAIRS IN DATAFRAME\n"," # ========================================================\n"," # for each pair in displayed widget\n"," for pair in vContainers[1:]:\n","\n"," # get pair and assigned label\n"," html_content = pair.children[1].get_interact_value() # the displayed pair as html\n"," user_assigned_label = pair.children[1].get_interact_value() # the assigned label\n","\n"," # extract candidate pair id from html pair content\n"," start = pair.children[0].value.find('data-title=\"')\n"," if start > 0: \n"," start += len('data-title=\"') \n"," end = pair.children[0].value.find('\"', start+2)\n"," pair_id = pair.children[0].value[start:end]\n","\n","\n","\n"," # assign label to candidate pair entry in dataframe\n"," candidate_pairs_pd.loc[candidate_pairs_pd['z_cluster']==pair_id, 'z_isMatch'] = LABELS.get(user_assigned_label)\n","\n"," # SAVE LABELED DATA TO ZINGG FOLDER\n"," # ========================================================\n"," # make target directory if needed\n"," notebookutils.fs.mkdirs(MARKED_DIR)\n"," \n"," # save label assignments\n"," zingg.writeLabelledOutputFromPandas(candidate_pairs_pd,args)\n","\n"," # count labels accumulated\n"," marked_pd_df = getPandasDfFromDs(zingg.getMarkedRecords())\n"," n_pos, n_neg, n_uncer, n_tot = count_labeled_pairs(marked_pd_df)\n"," print(f'Out of total {n_tot} pairs,')\n"," print(f'You have accumulated {n_pos} pairs labeled as positive matches.')\n"," print(f'You have accumulated {n_neg} pairs labeled as not matches.')\n"," print(f'You have accumulated {n_uncer} pairs labeled as uncertain.')\n"," print(\"If you need more pairs to label, re-run the cell for 'findTrainingData'\")\n"," # ======================================================== \n","\n"," # save completed\n"," ready_for_save = False"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"dcee9cab-44ce-47dd-9fed-499a457a129e"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","display_name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"widgets":{"application/vnd.jupyter.widget-state+json":{"version_major":2,"version_minor":0,"state":{"f121693c544144f69f8f089ee8f1fad3":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"3b5354e95cee45029d8d899bfac3a88d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_00b4e7a5e9ca40eab0ccf033a3619891","style":"IPY_MODEL_1409df8968c042089d180a2cd6f47db7"}},"89caedb1deba42c8844e5afd38c8d047":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_453b8b80465841dba9bd3f3aa7a9ffb6","style":"IPY_MODEL_bdb398974889427e98601fc8f24b53f2"}},"f2f20f20ad7a4a6895dc394460a926bc":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_43d9e839ea914b5aa4e6734b8f70ad4e","style":"IPY_MODEL_6f6b6dec2fe84be3884763d703c6b30a"}},"0d3fca61007f4b6fbd373fe247f4b37b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:51778558290992:5
rec_idrec-1041-dup-0rec-1021-org
fname tyler thomas
lname frojd george
stNo 1
add1 burramurra avenue mcmanus place
add2 kmart p plaza stoney creek
city san rmeo north turramurra
areacode36703130
state sa sa
dob1980091619630225
ssn78122195460534
","layout":"IPY_MODEL_4bd200d12cf642d9bf2b7fc9210cf0e3","style":"IPY_MODEL_2622ff541995471bb8c976eb91181218"}},"edc5132e328c4fa9b5b1bc3a7718c452":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_add452369df3466581cbe12634526370","tooltips":[],"style":"IPY_MODEL_a0dd609280c14b74a626654774a306b3","icons":[]}},"4bd200d12cf642d9bf2b7fc9210cf0e3":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d4bb5fb68500472fa69f49bc6833459e":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"fc83460e047045608c98e98df66e34f5":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6e4720b9557c4c96a5961c2326e06dde":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"6c0fdbf1d4194b5eb424e044e314d993":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"af846425acb24a248d18f43396c591a7":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:61778558290992:6
rec_idrec-1022-orgrec-1022-dup-3
fname jackson jackson
lname eglinton christo
stNo840840
add1 fowles street fowles street
add2 mountview mou ntveiw
city burleigh heads burleig heads
areacode28302830
state sa sa
dob1983080719830807
ssn29328372932837
","layout":"IPY_MODEL_daf3287789654524ae1a7ed42ccd3155","style":"IPY_MODEL_f05979cae6614841bf305e70e64c219b"}},"aec3e114a0144cc0b0424095893aa03a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"58fbda6dbfc74ec6a61bcb5bccf75d8a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"bdafb58945b14422b0029d59a8f74e20":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"863f7efcaa3e4edfb8bfc1bcf99b75eb":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"61021577dbbd43479c8601daecaa3085":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"be4367a12b4140efb00e0ea346ab37e8":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_260f4ac43ac8455fa44761f9c4a710b9","tooltips":[],"style":"IPY_MODEL_7d33739649494276b06785954d900a42","icons":[]}},"4a9cf73aa2e9467492e09cad83d14a01":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d4eaaba3bf604a7aa1907a670a29c50b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_c92bc92bb7c24063adc65253fc534599","style":"IPY_MODEL_e88e1f7ff88148a58b3f8c7d9db84443"}},"fda9317cfe704af78bee7d1618c2f26d":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"7b36104cb757461ea3b0a1ddc6fa7d4b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_0cf1af3ebdb14a14bd7495a040b87367","style":"IPY_MODEL_bdafb58945b14422b0029d59a8f74e20"}},"ac835ccf19104fe89a7e48e2d8e38ea3":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_177eda1695ef463bb435f790a0d04657","IPY_MODEL_3e4c0e432fa84cad93618c571d5f60d0","IPY_MODEL_003f048beb844865bf43b29838877a3a"],"layout":"IPY_MODEL_c845fe4969f44065bb72b8d88674939c"}},"1a39e120d9624d5f86c92afdbb6c4248":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"a660a416df994bf6996e77a9bfa2ff76":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"83d24aee8de74579872cb20b22746223":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"264bb94c389d42bebafe7b49691baea5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_6c0fdbf1d4194b5eb424e044e314d993","tooltips":[],"style":"IPY_MODEL_fda9317cfe704af78bee7d1618c2f26d","icons":[]}},"e9983630c02745d5a0adcef808a284c4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f7cb4f70f33c40f08f932b1b70443fb2":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"ae73fac927fe40c9b872a7f1692e0c12":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_58e83533ac9445d0bb52bde37a5e7ef6","style":"IPY_MODEL_a660a416df994bf6996e77a9bfa2ff76"}},"ed30070853964ca28d8ab363bc1200a9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"43d9e839ea914b5aa4e6734b8f70ad4e":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d200e29345bc4ae893e4657de22f5f39":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_a2f7cc02c6334b239488ef87744e0c74","style":"IPY_MODEL_cd460afd975b46db92b76cce6780e8e6"}},"9c329b40a81f43feb8d7f7553e431860":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_7d52577ee23e4cb9ab0f20bc158fa141","style":"IPY_MODEL_504ab3248e9a479dbf142659b9d8fefa"}},"dbeb500413d947a8881ce9c164a70efb":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:31778558290992:3
rec_idrec-1042-dup-0rec-1033-dup-0
fname kiandra keziah
lname painter
stNo218
add1 gatliff place ainsli e avenue
add2 rustenburg sth sec 1
city girgarre torquay
areacode39953205
state qld vic
dob1980112519191031
ssn33282057801066
","layout":"IPY_MODEL_a70d0d89103949538b8d7e34eec44b07","style":"IPY_MODEL_3bc1b5fad4794e4294c7cbe0e02d8b41"}},"12b152e48d044d26831217199600a839":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_8cec7a222d7648788ec7b57007c5b15d","tooltips":[],"style":"IPY_MODEL_1c5659916e7f43ad88b0bf0ba3d4a38a","icons":[]}},"0b938a61449143d4a8a3128207ac41cf":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"81c809539c0b43cfa1520e1418e5df3f":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_24b780ce2e4948a38a755492478ba69f","tooltips":[],"style":"IPY_MODEL_93d45c209e6c4db7b4f1893e7a07fa33","icons":[]}},"4f7b87179562435782403adf075ed320":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"83ee1b80050b49af90bfb8d42bd95098":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_dbeb500413d947a8881ce9c164a70efb","IPY_MODEL_7949872cb6d24c06af06235eda96ff3c","IPY_MODEL_b10553e0f1c14696b68c1590064c2c9c"],"layout":"IPY_MODEL_e9983630c02745d5a0adcef808a284c4"}},"a0fd60a2a98748488df2eba0da433cdb":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2ae909d8009e4805b56d5bbf97382481":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"d213b09fb1014baa82e91308101a48a2":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"168f0eb2f6c14e9a98b3ef26489baec3":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_badc5f3432ce45d68da13fa6018fd6a5","IPY_MODEL_8c78775a84fc4f699bfa2bbfe3dab7c5","IPY_MODEL_3b5354e95cee45029d8d899bfac3a88d"],"layout":"IPY_MODEL_73d15ef654894812acc4793f619e173c"}},"830d71d6419b40fd8776893f2d691169":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7d6d3fc9be064fa5948b71135a517c97":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_9e524b20ca86440c8d6d05d9e57d10a4","tooltips":[],"style":"IPY_MODEL_f121693c544144f69f8f089ee8f1fad3","icons":[]}},"5c6c1c9203fb4b72859c2524e2b0bc4d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8bd5f18acdc841bda78ddfa56dfaca47":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_353fc5896e9d427facb8c7ab092c7aec","tooltips":[],"style":"IPY_MODEL_f4a0925b42fa4fcab7f1ee751abb01ae","icons":[]}},"b9ab056b322f4ef1b9c9090aaf2ce4bd":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"e390344731ff46a997b22b912a6cf6cf":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"01f28b4f778e4b2fb6365a030cccf913":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_ed30070853964ca28d8ab363bc1200a9","tooltips":[],"style":"IPY_MODEL_2fc823b6ff4d427fb5571244820da108","icons":[]}},"58e83533ac9445d0bb52bde37a5e7ef6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"22e063f5749e4021b2feee111dd98ad0":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7c584c6817e64829b3fb53ffbec0f9a8":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9e32a901d0ce414580c8bbaffde46086":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:121778558290992:12
rec_idrec-1035-orgrec-1035-dup-2
fname jaiden jaiden
lname rollins rolilns
stNo4848
add1 rossarden street swinden street
add2 tulgeywood tulgeywood
city balwyn north balwyn north
areacode22242224
state nt nt
dob1928072219280722
ssn76263967626396
","layout":"IPY_MODEL_06fd76a14ee84b9eba54bffc6ac1e001","style":"IPY_MODEL_2ae909d8009e4805b56d5bbf97382481"}},"755a276636da43daab47c62d23cfcb2b":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_d213b09fb1014baa82e91308101a48a2","tooltips":[],"style":"IPY_MODEL_638a50340a054b4ea96c1161c456fa9b","icons":[]}},"bf7f21656e6d4415874c9ae55dd23fba":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d78f12e795214636847ce7cc1b930a52":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0f5cbc13c5e643b1a7535db02db3f69d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"9e524b20ca86440c8d6d05d9e57d10a4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3d05b7d854f741ecbc2e1db1010798ab":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"81c2eed31b1a4ebd8db900ea57352d19":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"e73b751f1bb14a8d9e59982b8beaac2f":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:111778558290992:11
rec_idrec-1035-dup-2rec-1022-org
fname jaiden jackson
lname rolilns eglinton
stNo48840
add1 swinden street fowles street
add2 tulgeywood mountview
city balwyn north burleigh heads
areacode22242830
state nt sa
dob1928072219830807
ssn76263962932837
","layout":"IPY_MODEL_1e2854b7bd5f4187adafa1d0166583e0","style":"IPY_MODEL_e8bc7fc720bf4237b722fa25ccdb1875"}},"ad05a19064b64550b011ae8fa8a34dd6":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"bdb398974889427e98601fc8f24b53f2":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"34257aeb771f4ba1a90800c582771274":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ba2be471e6fa4d5d9a2ed41219972e22":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b2c40a0aee8d44ccb35c48638a0b01fc":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"eab0d30832f145b09276faf3286a8b1d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"af4f5dc7af734f399c292fa8d5d1afa7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"add452369df3466581cbe12634526370":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"dfdbc2ffd55f4e7daf9f0b7bcc64cce1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a8ce89a36b954bd5ba6c5204821075c5":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_3d9927621bba4645af879e20d16dec67","style":"IPY_MODEL_a49bf7896a38474b8d25d5baa1d26c10"}},"d00e7e76bb83422e9583e37f545122ee":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f2dc1b4432f741eeafc44675ab83f576":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"75da2f082ad245e88ee80916ed700b09":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_7204933ceef244649bfc0c795eb82476","style":"IPY_MODEL_1a39e120d9624d5f86c92afdbb6c4248"}},"3bc1b5fad4794e4294c7cbe0e02d8b41":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2d0c9f9d66214a7889243c794c690d04":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_dfdbc2ffd55f4e7daf9f0b7bcc64cce1","style":"IPY_MODEL_e3e3df0f2b184f32a0787fd3f48cb518"}},"2876b4f8886b4f158ee7950db6a6649a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"99ba3b7d0f704f7e9126eb23c9ad1839":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_ba4aaeca9ac8434d8f06083a5bbafbed","IPY_MODEL_264bb94c389d42bebafe7b49691baea5","IPY_MODEL_eabe71f987c2434bb68616b54f85af29"],"layout":"IPY_MODEL_7c584c6817e64829b3fb53ffbec0f9a8"}},"0a553997f46e401cbdd9f7de26d08fad":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_99c522159d2e4a9fb9081c33bb7a8afc","IPY_MODEL_74be767e86d0473f93e5490f555342f6","IPY_MODEL_ac4175421e5042e8b1e80371bc6fae4c","IPY_MODEL_d085e3c0fc77495597dd3842686426b6","IPY_MODEL_8e7cc470fbdf436e8e59f8d9739a5519","IPY_MODEL_d6ca02c0533146aa9aebbabf34f16455","IPY_MODEL_1abd7a413e9b43c88b637725d7218ffc","IPY_MODEL_30e3c09525b14da0a8db49e15797ea3e","IPY_MODEL_4fe53864f3d94b4580e4f065c90e8346","IPY_MODEL_94149e87a8bf4a27b848cceed81ca749","IPY_MODEL_1adc473fea594bab9ee35bb462f09bda","IPY_MODEL_83ee1b80050b49af90bfb8d42bd95098","IPY_MODEL_a8bd668040d244bba99b5e9d0533c88f","IPY_MODEL_94bbcf3b21e84b3883920d909f93f0c3","IPY_MODEL_168f0eb2f6c14e9a98b3ef26489baec3","IPY_MODEL_ac835ccf19104fe89a7e48e2d8e38ea3","IPY_MODEL_0c5a547ca456482c910a35e0dc7d8ac3","IPY_MODEL_2d014e69339d4025b50acba8f53d119a","IPY_MODEL_99ba3b7d0f704f7e9126eb23c9ad1839","IPY_MODEL_d1149fe939b04798a7ce3e05751d14aa","IPY_MODEL_dc8018a2208d46a08193355b6b59c782"],"layout":"IPY_MODEL_aba5de83ed974dbb8db559a6f9aa6557"}},"c845fe4969f44065bb72b8d88674939c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a0c6989023564446bfd47941169e9d54":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:21778558290992:2
rec_idrec-1029-dup-4rec-1029-dup-0
fname kylee kylee
lname stephenson stepehndon
stNo8181
add1 cordoba manor rose scott circuit
add2 rose scott circuit cordobak anor
city ashfield ashfield
areacode42264226
state vic vic
dob1946110119461101
ssn47830854783085
","layout":"IPY_MODEL_4f7b87179562435782403adf075ed320","style":"IPY_MODEL_e390344731ff46a997b22b912a6cf6cf"}},"5f21b4cd03ea45f5abe37f33a1163ff8":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:91778558290992:9
rec_idrec-1033-dup-0rec-1029-dup-0
fname keziah kylee
lname painter stepehndon
stNo1881
add1 ainsli e avenue rose scott circuit
add2 sec 1 cordobak anor
city torquay ashfield
areacode32054226
state vic vic
dob1919103119461101
ssn78010664783085
","layout":"IPY_MODEL_1c831753309b45948c88d0f7d9a08d99","style":"IPY_MODEL_ceea1337e88b4ace88df4722791d2623"}},"36b8cb80bdef41058a75707742f055f3":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"56a71a23ba7f4cc39cea1e0c23a5cfd4":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2622ff541995471bb8c976eb91181218":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"dc8018a2208d46a08193355b6b59c782":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_7342b70a94b14f398ce8f4cfb8207f05","IPY_MODEL_7d6d3fc9be064fa5948b71135a517c97","IPY_MODEL_7b36104cb757461ea3b0a1ddc6fa7d4b"],"layout":"IPY_MODEL_a18e67ef38fa4391a7711706f7e47a4e"}},"aba5de83ed974dbb8db559a6f9aa6557":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"d1149fe939b04798a7ce3e05751d14aa":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_aee006f3d8ce492986cf5790e62f4381","IPY_MODEL_12b152e48d044d26831217199600a839","IPY_MODEL_d4eaaba3bf604a7aa1907a670a29c50b"],"layout":"IPY_MODEL_ed3e2fe6027648449df2465c332d9342"}},"7d33739649494276b06785954d900a42":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"aec36a138f3d4c5cb685e6d1757a7e9b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:11778558290992:1
rec_idrec-1032-orgrec-103-org
fname brooklyn briony
lname naar-cafentas koerbin
stNo210146
add1 duffy street violet grover place
add2 tourist park wybelanah
city berwick mill park
areacode24812446
state nsw nsw
dob1984080219210210
ssn36243043808808
","layout":"IPY_MODEL_e60d3aec9b3d4caaacd9be2a7d073036","style":"IPY_MODEL_2d5b80a272ed4f2caf37e6c8fa66d72b"}},"1e2854b7bd5f4187adafa1d0166583e0":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"003f048beb844865bf43b29838877a3a":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_34257aeb771f4ba1a90800c582771274","style":"IPY_MODEL_b9ab056b322f4ef1b9c9090aaf2ce4bd"}},"1c831753309b45948c88d0f7d9a08d99":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ba4aaeca9ac8434d8f06083a5bbafbed":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:101778558290992:10
rec_idrec-1041-orgrec-1041-dup-0
fname tyler tyler
lname froud frojd
stNo8
add1 burramurra avenue burramurra avenue
add2 kmart p plaza kmart p plaza
city san remo san rmeo
areacode36703670
state sa sa
dob1980091619800916
ssn78122197812219
","layout":"IPY_MODEL_bf7f21656e6d4415874c9ae55dd23fba","style":"IPY_MODEL_81c2eed31b1a4ebd8db900ea57352d19"}},"54763f2feac845eab7bcbd1c563abb00":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_3dd402ef8f0f4d5aa971466b4c0c68a1","style":"IPY_MODEL_21eca8ceeb654cf9be9fba939cd53021"}},"eabe71f987c2434bb68616b54f85af29":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_fc83460e047045608c98e98df66e34f5","style":"IPY_MODEL_36b8cb80bdef41058a75707742f055f3"}},"ed3e2fe6027648449df2465c332d9342":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"15b562a70b2645f7bd0f78b1766c82c0":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4b75920e91154d1c802cc6de64c95679":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_a7de6e0c8d834a7ca6ac327faf99c571","style":"IPY_MODEL_533435e35119466eaff90124ea8024ce"}},"a18e67ef38fa4391a7711706f7e47a4e":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"24b780ce2e4948a38a755492478ba69f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4c4ac18531ee44ba85a7b83c9ffc2b0f":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"3d9927621bba4645af879e20d16dec67":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"06bce4b961da48fe9fe41d5cc63d4d1e":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"f05979cae6614841bf305e70e64c219b":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"638a50340a054b4ea96c1161c456fa9b":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8cec7a222d7648788ec7b57007c5b15d":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"51d41977eaf946b6bd076b2fa8900691":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"0c5a547ca456482c910a35e0dc7d8ac3":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_41cd4fbc1080458ba0d513c976590f6b","IPY_MODEL_81c809539c0b43cfa1520e1418e5df3f","IPY_MODEL_215f6920cca9457082e7ad94df11777d"],"layout":"IPY_MODEL_143835d7b3ab4274b2fe8878a25047e1"}},"d4b81c3c524b4709adc3d3768f0828d6":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"260f4ac43ac8455fa44761f9c4a710b9":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"66663b650882454ea7f39611d8e672e1":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"5a55211e3d5d4ca1861e6cfa9aecaf7b":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"4fe53864f3d94b4580e4f065c90e8346":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_5f21b4cd03ea45f5abe37f33a1163ff8","IPY_MODEL_01f28b4f778e4b2fb6365a030cccf913","IPY_MODEL_2d0c9f9d66214a7889243c794c690d04"],"layout":"IPY_MODEL_2876b4f8886b4f158ee7950db6a6649a"}},"ceea1337e88b4ace88df4722791d2623":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"d6ca02c0533146aa9aebbabf34f16455":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_b370d503d97444a2ba67714b167ab318","IPY_MODEL_edc5132e328c4fa9b5b1bc3a7718c452","IPY_MODEL_a8ce89a36b954bd5ba6c5204821075c5"],"layout":"IPY_MODEL_f1f24fda3e644eca983e2c1fd9885a56"}},"a7de6e0c8d834a7ca6ac327faf99c571":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0cf1af3ebdb14a14bd7495a040b87367":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"1abd7a413e9b43c88b637725d7218ffc":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e04bb692eddc4c3a9527f7bd031280ee","IPY_MODEL_56de6b4d2ff9406a81ac71cdcc1f35ba","IPY_MODEL_d200e29345bc4ae893e4657de22f5f39"],"layout":"IPY_MODEL_d4bb5fb68500472fa69f49bc6833459e"}},"453b8b80465841dba9bd3f3aa7a9ffb6":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"99c522159d2e4a9fb9081c33bb7a8afc":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"

Indicate if each of the 20 record pairs is a match or not

","layout":"IPY_MODEL_15b562a70b2645f7bd0f78b1766c82c0","style":"IPY_MODEL_51d41977eaf946b6bd076b2fa8900691"}},"11a287787d6a4a06b496ffe12a6bdc64":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"8f36e05ea9dc4269a7d67d13ac96b701":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8c78775a84fc4f699bfa2bbfe3dab7c5":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_35f0fc9864e840eeb6ccd7f2d93f447e","tooltips":[],"style":"IPY_MODEL_c527e11847424d589175d218c69992df","icons":[]}},"41cd4fbc1080458ba0d513c976590f6b":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:141778558290992:14
rec_idrec-1022-orgrec-1022-dup-4
fname jackson jackson
lname eglinton eglinton
stNo840840
add1 fowles street fowles street
add2 mountview mountv iew
city burleigh heads burleigh heads
areacode28302830
state sa sa
dob1983080719830807
ssn29328372932837
","layout":"IPY_MODEL_3d05b7d854f741ecbc2e1db1010798ab","style":"IPY_MODEL_63e4a1f51e8244f29051e4a69336cb49"}},"00f26290427e4acf94d04c28b234837d":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_72ffeaf681cf47a3925fd26300d34de1","tooltips":[],"style":"IPY_MODEL_e3f1acd70e2c4e839acd6e9e8b7e49e5","icons":[]}},"93d45c209e6c4db7b4f1893e7a07fa33":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"1c5659916e7f43ad88b0bf0ba3d4a38a":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"b370d503d97444a2ba67714b167ab318":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:151778558290992:15
rec_idrec-1042-dup-0rec-1029-dup-0
fname kiandra kylee
lname stepehndon
stNo281
add1 gatliff place rose scott circuit
add2 rustenburg sth cordobak anor
city girgarre ashfield
areacode39954226
state qld vic
dob1980112519461101
ssn33282054783085
","layout":"IPY_MODEL_aec3e114a0144cc0b0424095893aa03a","style":"IPY_MODEL_6e4720b9557c4c96a5961c2326e06dde"}},"09e432d35f094545acfbcc9d98e95e14":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"e8bc7fc720bf4237b722fa25ccdb1875":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"e3f1acd70e2c4e839acd6e9e8b7e49e5":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"94149e87a8bf4a27b848cceed81ca749":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_a0c6989023564446bfd47941169e9d54","IPY_MODEL_8bd5f18acdc841bda78ddfa56dfaca47","IPY_MODEL_0afca202d5904153a3148f737d99fb1d"],"layout":"IPY_MODEL_a0fd60a2a98748488df2eba0da433cdb"}},"e04bb692eddc4c3a9527f7bd031280ee":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:171778558290992:17
rec_idrec-1033-dup-0rec-1029-dup-4
fname keziah kylee
lname painter stephenson
stNo1881
add1 ainsli e avenue cordoba manor
add2 sec 1 rose scott circuit
city torquay ashfield
areacode32054226
state vic vic
dob1919103119461101
ssn78010664783085
","layout":"IPY_MODEL_4c4ac18531ee44ba85a7b83c9ffc2b0f","style":"IPY_MODEL_e1a8dbd2d2b14c1097accbfcfa592964"}},"94bbcf3b21e84b3883920d909f93f0c3":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_af846425acb24a248d18f43396c591a7","IPY_MODEL_00f26290427e4acf94d04c28b234837d","IPY_MODEL_9c329b40a81f43feb8d7f7553e431860"],"layout":"IPY_MODEL_dc812084de4d46439b8b1e8d17671b06"}},"a70d0d89103949538b8d7e34eec44b07":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"504ab3248e9a479dbf142659b9d8fefa":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"b0b29a6c5fdd4cccb4090bed9b60bd3a":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"2d5b80a272ed4f2caf37e6c8fa66d72b":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"f43dc03167e04e8684c806e4685879cd":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"8e7cc470fbdf436e8e59f8d9739a5519":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_55962c3259b04b7bb44dd2f5800e3f04","IPY_MODEL_108b233b867b4523828c224e7bbfe235","IPY_MODEL_f2f20f20ad7a4a6895dc394460a926bc"],"layout":"IPY_MODEL_22e063f5749e4021b2feee111dd98ad0"}},"c1597b87211f4902b6c2c5c1f740019d":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"73d15ef654894812acc4793f619e173c":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2d014e69339d4025b50acba8f53d119a":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_9e32a901d0ce414580c8bbaffde46086","IPY_MODEL_be4367a12b4140efb00e0ea346ab37e8","IPY_MODEL_ae73fac927fe40c9b872a7f1692e0c12"],"layout":"IPY_MODEL_5c6c1c9203fb4b72859c2524e2b0bc4d"}},"82651085baca4df597f5545f020b2e86":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"25f56bd8a40b424f88b311a715853228":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:131778558290992:13
rec_idrec-1035-orgrec-1022-dup-4
fname jaiden jackson
lname rollins eglinton
stNo48840
add1 rossarden street fowles street
add2 tulgeywood mountv iew
city balwyn north burleigh heads
areacode22242830
state nt sa
dob1928072219830807
ssn76263962932837
","layout":"IPY_MODEL_0b938a61449143d4a8a3128207ac41cf","style":"IPY_MODEL_11a287787d6a4a06b496ffe12a6bdc64"}},"a0dd609280c14b74a626654774a306b3":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"bb0ee709ad194dbeb293d65f3705a540":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"21eca8ceeb654cf9be9fba939cd53021":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"374c8221eeb847d58680f17a5942c7fc":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"d30a2e33d4cf4adda194d84c396f5c6e":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7949872cb6d24c06af06235eda96ff3c":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_bb0ee709ad194dbeb293d65f3705a540","tooltips":[],"style":"IPY_MODEL_8de8f8b8d1ff4d62a321cfdd6b264ab0","icons":[]}},"7d52577ee23e4cb9ab0f20bc158fa141":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6a4880614d53435f93bf9e0a3265ec42":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"daf3287789654524ae1a7ed42ccd3155":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"06fd76a14ee84b9eba54bffc6ac1e001":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"70954a558837497093f2700674cdadf1":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"dc812084de4d46439b8b1e8d17671b06":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"143835d7b3ab4274b2fe8878a25047e1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7204933ceef244649bfc0c795eb82476":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"6f6b6dec2fe84be3884763d703c6b30a":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"c92bc92bb7c24063adc65253fc534599":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"2fc823b6ff4d427fb5571244820da108":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"a1881865283046429020422933f73a27":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"c527e11847424d589175d218c69992df":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"cd460afd975b46db92b76cce6780e8e6":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"d29b9d77e4f5486da0aabb17d1c5a981":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"b3b5b42f269548b4973e106b6cd4a0d4":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:191778558290992:19
rec_idrec-1042-orgrec-1033-dup-0
fname kiandra keziah
lname cowle painter
stNo218
add1 gatliff place ainsli e avenue
add2 rustenburg sth sec 1
city girgarre torquay
areacode39953205
state qld vic
dob1980112519191031
ssn33282057801066
","layout":"IPY_MODEL_d30a2e33d4cf4adda194d84c396f5c6e","style":"IPY_MODEL_374c8221eeb847d58680f17a5942c7fc"}},"e60d3aec9b3d4caaacd9be2a7d073036":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"533435e35119466eaff90124ea8024ce":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"3fe56e5f41184e4bbbc05193d05ad835":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"3dd402ef8f0f4d5aa971466b4c0c68a1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a49bf7896a38474b8d25d5baa1d26c10":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"bc1a0629a8324f02a735790059c4ca36":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"72ffeaf681cf47a3925fd26300d34de1":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"df5ae174e14346c3a53a41b49c3d93ca":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"108b233b867b4523828c224e7bbfe235":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_61021577dbbd43479c8601daecaa3085","tooltips":[],"style":"IPY_MODEL_66663b650882454ea7f39611d8e672e1","icons":[]}},"56de6b4d2ff9406a81ac71cdcc1f35ba":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_58fbda6dbfc74ec6a61bcb5bccf75d8a","tooltips":[],"style":"IPY_MODEL_82651085baca4df597f5545f020b2e86","icons":[]}},"38e6408e68054910b6e5fbffc4cac2d4":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"30e3c09525b14da0a8db49e15797ea3e":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_0d3fca61007f4b6fbd373fe247f4b37b","IPY_MODEL_a8234fefe9fd475891897e7e74e8a599","IPY_MODEL_75da2f082ad245e88ee80916ed700b09"],"layout":"IPY_MODEL_c762dd5002554e238e01a89dca077a63"}},"1628ed8be05c444b93f86691c81fdd1a":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"a4950c291d034be2ad31535d404dc7f7":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_f2dc1b4432f741eeafc44675ab83f576","tooltips":[],"style":"IPY_MODEL_152c58a33ce441d4aafd34bbe0b08adf","icons":[]}},"7342b70a94b14f398ce8f4cfb8207f05":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:161778558290992:16
rec_idrec-1026-orgrec-1026-dup-0
fname xani xani
lname green green
stNo2
add1 phillip avenue phill ip avenue
add2 abbey green
city armidale armidale
areacode51085108
state nsw nsw
dob1939041019390410
ssn92010579201057
","layout":"IPY_MODEL_0f5cbc13c5e643b1a7535db02db3f69d","style":"IPY_MODEL_f85b283843ae4ca4b6683ace99a6d038"}},"1adc473fea594bab9ee35bb462f09bda":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_b3b5b42f269548b4973e106b6cd4a0d4","IPY_MODEL_a4950c291d034be2ad31535d404dc7f7","IPY_MODEL_08a91d4d71744c17b97235c9264830cd"],"layout":"IPY_MODEL_f17f4c5d40c44634be8de8943379d703"}},"152c58a33ce441d4aafd34bbe0b08adf":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"e7df664f9ec146d395ec22be18c7d346":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:41778558290992:4
rec_idrec-1022-dup-4rec-1022-dup-3
fname jackson jackson
lname eglinton christo
stNo840840
add1 fowles street fowles street
add2 mountv iew mou ntveiw
city burleigh heads burleig heads
areacode28302830
state sa sa
dob1983080719830807
ssn29328372932837
","layout":"IPY_MODEL_a1881865283046429020422933f73a27","style":"IPY_MODEL_af4f5dc7af734f399c292fa8d5d1afa7"}},"e1a8dbd2d2b14c1097accbfcfa592964":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"1409df8968c042089d180a2cd6f47db7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"55962c3259b04b7bb44dd2f5800e3f04":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:71778558290992:7
rec_idrec-1035-orgrec-1022-org
fname jaiden jackson
lname rollins eglinton
stNo48840
add1 rossarden street fowles street
add2 tulgeywood mountview
city balwyn north burleigh heads
areacode22242830
state nt sa
dob1928072219830807
ssn76263962932837
","layout":"IPY_MODEL_ba2be471e6fa4d5d9a2ed41219972e22","style":"IPY_MODEL_261fd2425a7b49e1989194506cd2e9a7"}},"f85b283843ae4ca4b6683ace99a6d038":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"aee006f3d8ce492986cf5790e62f4381":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:181778558290992:18
rec_idrec-1042-orgrec-1042-dup-0
fname kiandra kiandra
lname cowle
stNo22
add1 gatliff place gatliff place
add2 rustenburg sth rustenburg sth
city girgarre girgarre
areacode39953995
state qld qld
dob1980112519801125
ssn33282053328205
","layout":"IPY_MODEL_d29b9d77e4f5486da0aabb17d1c5a981","style":"IPY_MODEL_c1597b87211f4902b6c2c5c1f740019d"}},"c762dd5002554e238e01a89dca077a63":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"e88e1f7ff88148a58b3f8c7d9db84443":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"a8234fefe9fd475891897e7e74e8a599":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_830d71d6419b40fd8776893f2d691169","tooltips":[],"style":"IPY_MODEL_c63eee8e53664cc9be38ae8dbcdad788","icons":[]}},"090bcb2840a34fb295228180eb1af239":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"215f6920cca9457082e7ad94df11777d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_d00e7e76bb83422e9583e37f545122ee","style":"IPY_MODEL_b0b29a6c5fdd4cccb4090bed9b60bd3a"}},"ecf72ca62a2f44c3a81f2399df483247":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_1628ed8be05c444b93f86691c81fdd1a","style":"IPY_MODEL_70954a558837497093f2700674cdadf1"}},"e3e3df0f2b184f32a0787fd3f48cb518":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"badc5f3432ce45d68da13fa6018fd6a5":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:01778558290992:0
rec_idrec-1035-dup-2rec-1035-dup-0
fname jaiden jaiden
lname rolilns rollins
stNo4848
add1 swinden street tulgeywood
add2 tulgeywood rossarden street
city balwyn north balwyn north
areacode22242224
state nt nt
dob1928072219280722
ssn76263967626396
","layout":"IPY_MODEL_4a9cf73aa2e9467492e09cad83d14a01","style":"IPY_MODEL_f7cb4f70f33c40f08f932b1b70443fb2"}},"63e4a1f51e8244f29051e4a69336cb49":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"f17f4c5d40c44634be8de8943379d703":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"35f0fc9864e840eeb6ccd7f2d93f447e":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"1f12f41d3b4a4e89a586eead23d1e07b":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_b2c40a0aee8d44ccb35c48638a0b01fc","tooltips":[],"style":"IPY_MODEL_ad05a19064b64550b011ae8fa8a34dd6","icons":[]}},"00b4e7a5e9ca40eab0ccf033a3619891":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"ac4175421e5042e8b1e80371bc6fae4c":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e73b751f1bb14a8d9e59982b8beaac2f","IPY_MODEL_755a276636da43daab47c62d23cfcb2b","IPY_MODEL_ecf72ca62a2f44c3a81f2399df483247"],"layout":"IPY_MODEL_df5ae174e14346c3a53a41b49c3d93ca"}},"1c88a042fede41f3a427f54e010e6706":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_d78f12e795214636847ce7cc1b930a52","tooltips":[],"style":"IPY_MODEL_f43dc03167e04e8684c806e4685879cd","icons":[]}},"c63eee8e53664cc9be38ae8dbcdad788":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"f1f24fda3e644eca983e2c1fd9885a56":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"74be767e86d0473f93e5490f555342f6":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_25f56bd8a40b424f88b311a715853228","IPY_MODEL_1c88a042fede41f3a427f54e010e6706","IPY_MODEL_89caedb1deba42c8844e5afd38c8d047"],"layout":"IPY_MODEL_5a55211e3d5d4ca1861e6cfa9aecaf7b"}},"3e4c0e432fa84cad93618c571d5f60d0":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_863f7efcaa3e4edfb8bfc1bcf99b75eb","tooltips":[],"style":"IPY_MODEL_09e432d35f094545acfbcc9d98e95e14","icons":[]}},"353fc5896e9d427facb8c7ab092c7aec":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"7e6023c9b25946e99a181ae3130c2fcb":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"0afca202d5904153a3148f737d99fb1d":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_38e6408e68054910b6e5fbffc4cac2d4","style":"IPY_MODEL_d4b81c3c524b4709adc3d3768f0828d6"}},"d085e3c0fc77495597dd3842686426b6":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_aec36a138f3d4c5cb685e6d1757a7e9b","IPY_MODEL_a3639e5cf2c54340bda93d5703fcd752","IPY_MODEL_54763f2feac845eab7bcbd1c563abb00"],"layout":"IPY_MODEL_eab0d30832f145b09276faf3286a8b1d"}},"177eda1695ef463bb435f790a0d04657":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
z_cluster1778558290992:81778558290992:8
rec_idrec-1035-orgrec-1035-dup-0
fname jaiden jaiden
lname rollins rollins
stNo4848
add1 rossarden street tulgeywood
add2 tulgeywood rossarden street
city balwyn north balwyn north
areacode22242224
state nt nt
dob1928072219280722
ssn76263967626396
","layout":"IPY_MODEL_06bce4b961da48fe9fe41d5cc63d4d1e","style":"IPY_MODEL_56a71a23ba7f4cc39cea1e0c23a5cfd4"}},"a2f7cc02c6334b239488ef87744e0c74":{"model_name":"LayoutModel","model_module":"@jupyter-widgets/base","model_module_version":"2.0.0","state":{}},"8de8f8b8d1ff4d62a321cfdd6b264ab0":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"f4a0925b42fa4fcab7f1ee751abb01ae":{"model_name":"ToggleButtonsStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","button_width":""}},"b10553e0f1c14696b68c1590064c2c9c":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_83d24aee8de74579872cb20b22746223","style":"IPY_MODEL_3fe56e5f41184e4bbbc05193d05ad835"}},"261fd2425a7b49e1989194506cd2e9a7":{"model_name":"HTMLStyleModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"description_width":"","font_size":null,"text_color":null}},"a3639e5cf2c54340bda93d5703fcd752":{"model_name":"ToggleButtonsModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"index":0,"_options_labels":["Uncertain","Match","No Match"],"button_style":"info","layout":"IPY_MODEL_6a4880614d53435f93bf9e0a3265ec42","tooltips":[],"style":"IPY_MODEL_8f36e05ea9dc4269a7d67d13ac96b701","icons":[]}},"08a91d4d71744c17b97235c9264830cd":{"model_name":"HTMLModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"value":"
","layout":"IPY_MODEL_bc1a0629a8324f02a735790059c4ca36","style":"IPY_MODEL_090bcb2840a34fb295228180eb1af239"}},"a8bd668040d244bba99b5e9d0533c88f":{"model_name":"VBoxModel","model_module":"@jupyter-widgets/controls","model_module_version":"2.0.0","state":{"children":["IPY_MODEL_e7df664f9ec146d395ec22be18c7d346","IPY_MODEL_1f12f41d3b4a4e89a586eead23d1e07b","IPY_MODEL_4b75920e91154d1c802cc6de64c95679"],"layout":"IPY_MODEL_7e6023c9b25946e99a181ae3130c2fcb"}}}}},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"dependencies":{"lakehouse":{"known_lakehouses":[{"id":"9431468e-7392-49f7-972e-854b427cc833"},{"id":"e12962eb-1b6b-4d11-bef0-b23c2565587f"}],"default_lakehouse":"e12962eb-1b6b-4d11-bef0-b23c2565587f","default_lakehouse_name":"newlk","default_lakehouse_workspace_id":"41811c91-fcf2-4aba-8980-4c64b90166ad"},"environment":{"environmentId":"e9fdac13-03c9-4d44-befe-d7ee9618b120","workspaceId":"41811c91-fcf2-4aba-8980-4c64b90166ad"}}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file diff --git a/examples/fabric/03-generate_documents.ipynb b/examples/fabric/03-generate_documents.ipynb new file mode 100644 index 000000000..fbd07e4b8 --- /dev/null +++ b/examples/fabric/03-generate_documents.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","source":["# Part 3: Documenting the model\n","## We have completed setting up Zingg and labeled the training data in the previous steps. In this part, we will run the **_generateDocs_** phase. \n","#### This phase processes the labeled data to create the readable documentation about the training data, including those marked as matches, as well as non-matches. \n"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"20a505d7-fe1b-4353-bd52-367510182cb9"},{"cell_type":"markdown","source":["## Run Setup Zingg"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"23c72ae7-6b21-4fd9-97c4-0ad93cec18e5"},{"cell_type":"code","source":["%run 01-setting_up_zingg"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"5f2e2c46-704d-4050-82ed-96acb98d25b6"},{"cell_type":"markdown","source":["## Documenting The Model"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"f97cbf6d-f495-4f99-a05d-ff1433791909"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"generateDocs\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"605c65e5-94e5-4a22-b376-b1e7f4cd2dcd"},{"cell_type":"markdown","source":["\n","## View the generated docs"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"97b9f5a5-33b5-4f16-8632-046f7c431bc5"},{"cell_type":"code","source":["#Set generated docs directory path\n","DOCS_DIR = zinggDir + \"/\" + modelId + \"/docs/\""],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"3347806e-fecf-453b-bfa6-4f10814fb023"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"model.html\", 'r').read())"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"6fba8364-11f3-4a32-90fe-de917b116d24"},{"cell_type":"code","source":["displayHTML(open(DOCS_DIR+\"data.html\", 'r').read())"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"85ca2d29-cf2c-4fa9-a920-a03bc5f14745"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","display_name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"dependencies":{"lakehouse":{"known_lakehouses":[{"id":"9431468e-7392-49f7-972e-854b427cc833"},{"id":"e12962eb-1b6b-4d11-bef0-b23c2565587f"}],"default_lakehouse":"e12962eb-1b6b-4d11-bef0-b23c2565587f","default_lakehouse_name":"newlk","default_lakehouse_workspace_id":"41811c91-fcf2-4aba-8980-4c64b90166ad"},"environment":{"environmentId":"e9fdac13-03c9-4d44-befe-d7ee9618b120","workspaceId":"41811c91-fcf2-4aba-8980-4c64b90166ad"}}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file diff --git a/examples/fabric/04-train_and_match.ipynb b/examples/fabric/04-train_and_match.ipynb new file mode 100644 index 000000000..be6e82132 --- /dev/null +++ b/examples/fabric/04-train_and_match.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","source":["# Part 4: Train and Match Phase\n","## We have completed setting up Zingg, labeled the training data, and generated the required documents in the previous steps. In this part, we will run the **_Train_** and **_Match_** phases. \n","#### This involves training the entity resolution model using the labeled data and then applying the trained model to match records in your dataset. This step is crucial for identifying and matching similar entities across your data sources."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"5d7ddd33-2a70-4bd3-9826-167c7e5f459a"},{"cell_type":"markdown","source":["## Run Setup Zingg"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9886397a-3bff-4240-8e5c-c7eaeb5ca0dd"},{"cell_type":"code","source":["%run 01-setting_up_zingg"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"9802dff9-def7-4a1e-a665-b8beda054c29"},{"cell_type":"markdown","source":["## Build the Zingg models and Predict Matches\n","No change is needed to the cell below."],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"0b688acb-55da-491b-b793-f4402a1b55d5"},{"cell_type":"code","source":["options = ClientOptions([ClientOptions.PHASE,\"trainMatch\"])\n","\n","#Zingg execution for the given phase\n","zingg = ZinggWithSpark(args, options)\n","zingg.initAndExecute()"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"8e7ad019-10d6-4487-b63e-529d1b1f0d09"},{"cell_type":"markdown","source":["## Display the output"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"d1afd7fb-8345-4eb7-91b1-4a6e95196e5c"},{"cell_type":"code","source":["colNames = [\"z_minScore\", \"z_maxScore\", \"z_cluster\", \"id\", \"fname\", \"lname\", \"stNo\", \"add1\", \"add2\", \"city\",\"areacode\", \"state\", \"dob\", \"ssn\"]\n","outputDF = spark.read.csv(output_path)\n","outputDF = outputDF.toDF(*colNames)\n","display(outputDF)"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"},"collapsed":false},"id":"673a4edd-7968-4b59-8329-5d7cd0f00458"},{"cell_type":"code","source":["print(outputDF.count())"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"2f318f0b-7fd3-4522-becc-8e99ccf9e445"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","display_name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"synapse_widget":{"version":"0.1","state":{}},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"dependencies":{"lakehouse":{"known_lakehouses":[{"id":"9431468e-7392-49f7-972e-854b427cc833"},{"id":"e12962eb-1b6b-4d11-bef0-b23c2565587f"}],"default_lakehouse":"e12962eb-1b6b-4d11-bef0-b23c2565587f","default_lakehouse_name":"newlk","default_lakehouse_workspace_id":"41811c91-fcf2-4aba-8980-4c64b90166ad"},"environment":{"environmentId":"e9fdac13-03c9-4d44-befe-d7ee9618b120","workspaceId":"41811c91-fcf2-4aba-8980-4c64b90166ad"}}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file