Skip to content

NameError: name 'np' is not defined #45

@Daizwpa

Description

@Daizwpa

Here is my code:

import pandas as pd
from DataSynthesizer.DataDescriber import DataDescriber
from DataSynthesizer.DataGenerator import DataGenerator
from DataSynthesizer.lib.utils import display_bayesian_network

# Load your dataset from the CSV file
input_data_file = 'C:\\Users\\DAIZO\\Documents\\Python\\thyroid-cancer-dataset-2\\dataset\\data_train.csv'
data = pd.read_csv(input_data_file)

# Backup the original dataset
data_backup = data.copy()
# Specify categorical attributes
categorical_attributes = {
        # Binary 
        "binary__SEXE":True,
        "binary__NIVEAU_INSTRUC":True,
        "binary__SECURITE_SOCIALE":True,
        "binary__ACTIVITE_VIGOUREUSE":True,
        "binary__ATCD_PER_KC":True,
        "binary__MCV_FAM":True,
        "binary__M":True,
        "binary__INVASION_VASCULAIRE":True,
        "binary__MULTIFOCALITE":True,
        "binary__META":True,
        "binary__MALADIE_CV":True,
        "binary__Tabagisme":True,
        "binary__Alcoolisme":True,
        "binary__Papillaire":True,
        "binary__Medullaire":True,
        "binary__Vésiculaire":True,
        "binary__b_ETE":True,
        # Ordinal 
        "ordinal__REVENU_ANNUEL": True, 
        "ordinal__RISQUE_RECIDIVE_ATA": True, 
        "ordinal__YEAR_CHIRURGIE": True, 
        "ordinal__T":  True, 
        "ordinal__DOSE_CUMULEE_IODE": True, 
        "ordinal__ETE": True, 
        "ordinal__RISK_AJCC8": True, 
        # Nominal
        "nominal__STATUT_MATRIMONIAL Marié": True,
        "nominal__STATUT_MATRIMONIAL Célibataire": True,
        "nominal__STATUT_MATRIMONIAL Divorcé": True,
        "nominal__STATUT_MATRIMONIAL Veuf": True,
        "nominal__ACTIVITE_POFESSIONNELLE Employé": True,
        "nominal__ACTIVITE_POFESSIONNELLE Indépendant": True,
        "nominal__ACTIVITE_POFESSIONNELLE Maître (sse) de maison": True,
        "nominal__ACTIVITE_POFESSIONNELLE Retraité(e)": True,
        "nominal__ACTIVITE_POFESSIONNELLE Étudiant": True,
        "nominal__ACTIVITE_POFESSIONNELLE Chômeur (se)": True,
        "nominal__TYPE_HISTOLOGIQUE NIFT": True,
        "nominal__TYPE_HISTOLOGIQUE Tumeur vesiculaire à potentiel de malignité incertain": True,
        "nominal__TYPE_HISTOLOGIQUE Papillaire": True,
        "nominal__TYPE_HISTOLOGIQUE Vésiculaire": True,
        "nominal__TYPE_HISTOLOGIQUE Peu différencié": True,
        "nominal__TYPE_HISTOLOGIQUE Anaplasique": True,
        "nominal__TYPE_HISTOLOGIQUE Medullaire": True,
        "nominal__MALADIE_CV_CONNUE Non": True,
        "nominal__MALADIE_CV_CONNUE cardiopathie ischémique": True,
        "nominal__MALADIE_CV_CONNUE Insuffisance cardiaque": True,
        "nominal__MALADIE_CV_CONNUE Maladie rythmique": True,
        "nominal__MALADIE_CV_CONNUE AOMI": True,
        "nominal__MALADIE_CV_CONNUE Maladie rythmique+ IC": True,
        "nominal__MALADIE_CV_CONNUE TVP": True,
        "nominal__MALADIE_CV_CONNUE AVC": True,
        "nominal__TABAC_STAT Jamais": True,
        "nominal__TABAC_STAT Actif": True,
        "nominal__TABAC_STAT Ancien": True,
        "nominal__ALCOOL_STATUS Jamais": True,
        "nominal__ALCOOL_STATUS Actif": True,
        "nominal__ALCOOL_STATUS Ancien": True,
        "nominal__RISK_DYNAMIQ Excellente réponse": True,
        "nominal__RISK_DYNAMIQ Excellente Réponse indeterminée": True,
        "nominal__RISK_DYNAMIQ Excellente Réponse biologique incomplète": True,
        "nominal__RISK_DYNAMIQ Excellente Réponse radiologique incomplète": True,
        "N": True,

    }
# Define privacy settings
epsilon = 0.1
degree_of_bayesian_network = 2
num_tuples_to_generate = 1000

# Initialize DataDescriber with category threshold
describer = DataDescriber(category_threshold=5)
# Describe the dataset to create a Bayesian network
describer.describe_dataset_in_correlated_attribute_mode(dataset_file=input_data_file, 
                                                        epsilon=epsilon, 
                                                        k=degree_of_bayesian_network,
                                                        attribute_to_is_categorical=categorical_attributes
                                                        )   

description_file = 'C:\\Users\\DAIZO\\Documents\\Python\\thyroid-cancer-dataset-2\\Notebooks\\models\\out.json'
describer.save_dataset_description_to_file(description_file)
display_bayesian_network(describer.bayesian_network)\
generator = DataGenerator()
generator.generate_dataset_in_correlated_attribute_mode(num_tuples_to_generate, description_file)
# Save synthetic data to a CSV file
synthetic_data_file = 'synthetic__data.csv'
generator.save_synthetic_data(synthetic_data_file)

here is the error I got:

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], [line 2](vscode-notebook-cell:?execution_count=7&line=2)
      1 generator = DataGenerator()
----> [2](vscode-notebook-cell:?execution_count=7&line=2) generator.generate_dataset_in_correlated_attribute_mode(num_tuples_to_generate, description_file)
      3 # Save synthetic data to a CSV file
      4 synthetic_data_file = 'synthetic_retail_data.csv'

File c:\Users\DAIZO\miniconda3\envs\BRAF_lab\Lib\site-packages\DataSynthesizer\DataGenerator.py:65, in DataGenerator.generate_dataset_in_correlated_attribute_mode(self, n, description_file, seed)
     63 all_attributes = self.description['meta']['all_attributes']
     64 candidate_keys = set(self.description['meta']['candidate_keys'])
---> [65](file:///C:/Users/DAIZO/miniconda3/envs/BRAF_lab/Lib/site-packages/DataSynthesizer/DataGenerator.py:65) self.encoded_dataset = DataGenerator.generate_encoded_dataset(self.n, self.description)
     66 self.synthetic_dataset = DataFrame(columns=all_attributes)
     67 for attr in all_attributes:

File c:\Users\DAIZO\miniconda3\envs\BRAF_lab\Lib\site-packages\DataSynthesizer\DataGenerator.py:99, in DataGenerator.generate_encoded_dataset(n, description)
     97 for parents_instance in child_conditional_distributions.keys():
     98     dist = child_conditional_distributions[parents_instance]
---> [99](file:///C:/Users/DAIZO/miniconda3/envs/BRAF_lab/Lib/site-packages/DataSynthesizer/DataGenerator.py:99)     parents_instance = list(eval(parents_instance))
    101     filter_condition = ''
    102     for parent, value in zip(parents, parents_instance):

File <string>:1

NameError: name 'np' is not defined

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions