Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@

gspread==6.1.2
hubspot-api-client==8.2.1
notion-client==2.2.1
numpy==1.26.4
pandas==1.3.4
pyodbc==5.1.0
pytest==8.3.2
python-slugify==8.0.4
requests==2.32.3


Empty file.
89 changes: 89 additions & 0 deletions wherescape/connectors/gsheet/create_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import logging
from datetime import datetime, UTC

from ...helper_functions import create_column_names, create_display_names, prepare_metadata_query
from ...wherescape import WhereScape
from .gsheets_wrapper import Gsheet, set_gsheet_variables
from .gsheets_parsing import parse_gspread_arguments


def gsheet_create_metadata():
"""
Function that creates a load table in Wherescape based on the data
in a provided Google sheet file.
"""
start_time = datetime.now(tz=UTC)
# Initialize Wherescape
logging.info("Connecting to WhereScape")
wherescape_instance = WhereScape()
logging.info(
"Start time: %s for gsheet_load_data_os." % start_time.strftime("%Y-%m-%d %H:%M:%S")
)
gsheet: Gsheet = Gsheet()

load_table_name = wherescape_instance.table
url = wherescape_instance.query_meta(
"select lt_file_path from ws_load_tab where lt_table_name = ?",
[load_table_name],
)[0][0]
workbook_details = wherescape_instance.query_meta(
"select lt_file_name from ws_load_tab where lt_table_name = ?",
[load_table_name],
)[0][0]
logging.info(f"Metadata. URL: {url} ; Details : {workbook_details}")

args = parse_gspread_arguments(workbook_details)
if args.debug:
logging.warning("Debug mode on -> do not use for production.")

set_gsheet_variables(gsheet, url, args)

header_row = gsheet.get_header()
column_types = gsheet.get_column_types()
title = gsheet.get_worksheet().title
lt_obj_key = wherescape_instance.object_key

display_names = create_display_names(header_row)
column_names = create_column_names(header_row)
source_columns, comments = set_source_columns_and_comments(header_row)

sql = prepare_metadata_query(
lt_obj_key = lt_obj_key,
src_table_name = title,
columns=column_names,
display_names=display_names,
types=column_types,
comments=comments,
source_columns=source_columns,
)
logging.info(f"Stored details for {len(header_row)} columns")

wherescape_instance.push_to_meta(sql)
logging.info("--> Metadata updated. Table can be created.")

end_time = datetime.now(tz=UTC)
logging.info("End time: %s" % end_time.strftime("%Y-%m-%d %H:%M:%S"))
logging.info("Time elapsed: %s seconds" % (end_time - start_time).seconds)


def set_source_columns_and_comments(header_row: list):
"""
Fuction to determine source_column and comments for metadata.

Params:
header_row (list): header values.

Returns:
- list: source_column values.
- list: comment values.
"""
comments = []
source_columns = []

for value in header_row:
src_column_name = value.rstrip()

comments.append(src_column_name[0:1023].replace("'", "''"))
source_columns.append(src_column_name)

return source_columns, comments
81 changes: 81 additions & 0 deletions wherescape/connectors/gsheet/gsheets_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import argparse
import logging
import shlex
Comment thread
welmaris marked this conversation as resolved.

from gspread.utils import a1_range_to_grid_range


def parse_gspread_arguments(argument: str) -> argparse.Namespace:
"""
Converts an argument string into args object.

Parameters:
- argument (str): arguments for the parser collected in a string.

Returns
- args (Namespace): object with all arguments provided stored within.
"""
if argument == "":
logging.info("No arguments provided. Using defaults.")

argument_list = shlex.split(argument)

parser = create_parser()

try:
args = parser.parse_args(argument_list)
except SystemExit as ex:
logging.warning("There might be a mistake with the arguments. Ensure it's all correct.")
logging.error(ex)

if args.range:
args.range = args.range.upper()
if args.header_range:
args.header_range = args.header_range.upper()

logging.info(
f"workbook_name: {args.workbook_name}, sheet: {args.sheet}, range: {args.range}, hr: {args.header_range}, no_header: {str(args.no_header)}, debug: {args.debug}"
)

if args.header_range and args.no_header:
logging.error(
"You cannot specify both a header_range and --no_header in the object source File Name."
)
if args.header_range and not args.range:
logging.error(
"A --header_range can not be specified without specifying a --range."
)

if args.header_range and args.range:
row_index_header_range = a1_range_to_grid_range(args.header_range).get(
"startRowIndex"
)
row_index_range = a1_range_to_grid_range(args.range).get("startRowIndex")
if row_index_header_range != row_index_range:
logging.warning(
"If both a range and a header_range are specified, they should overlap."
)
return args


def create_parser():
"""
Method to create parser with arguments for workbook_details.

Return:
- parser containing possible args.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"workbook_name", help="Name of the Google Sheet/ workbook", default=None
) # positional argument
parser.add_argument("--sheet", help="Name of the sheet in the workbook")
parser.add_argument("--range", help="Cell range to retrieve")
parser.add_argument("--header_range", help="Cell range to be used as header")
parser.add_argument(
"--no_header", action="store_true", help="Specify if table has no header"
)
parser.add_argument(
"-d", "--debug", action="store_true", help="Print debug messages"
)
return parser
Loading