diff --git a/mindsdb/integrations/handlers/duckdb_handler/README.md b/mindsdb/integrations/handlers/duckdb_handler/README.md index 54c1040a42c..5fa9125b940 100644 --- a/mindsdb/integrations/handlers/duckdb_handler/README.md +++ b/mindsdb/integrations/handlers/duckdb_handler/README.md @@ -1,41 +1,62 @@ -# DuckDB Handler +# DuckDB Handler This is the implementation of the DuckDB handler for MindsDB. ## DuckDB DuckDB is an open-source analytical database system. DuckDB is designed for fast execution of analytical queries. -There are no external dependencies and the DBMS runs completly embedded within a host process, similar to SQLite. +There are no external dependencies, and the DBMS runs completely embedded within a host process, similar to SQLite. DuckDB provides a rich SQL dialect with support for complex queries with transactional guarantees (ACID). -## Implementation -This handler was implemented using the `duckdb` python client library. +## Implementation +This handler was implemented using the `duckdb` Python client library. ### DuckDB version -The DuckDB handler is currently using the `0.7.1.dev187` pre-relase version of the python client library. In case of issues, make sure your DuckDB database is compatible with this version. See the DuckDB handler [requirements.txt](requirements.txt) for details. - +The DuckDB handler is currently using the `1.1.3` release version of the Python client library. In case of issues, make sure your DuckDB or MotherDuck database is compatible with this version. See the DuckDB handler [requirements.txt](requirements.txt) for details. The required arguments to establish a connection are: -* `database`: the name of the DuckDB database file. May also be set to `:memory:`, which will create an in-memory database. +* `database`: the name of the DuckDB or MotherDuck database file. + - Set to `:memory:` to create an in-memory database. + - For MotherDuck, specify the database and motherduck_token. -The optional arguments are: +Additional optional arguments include: +* `motherduck_token`: a token to authenticate with MotherDuck. * `read_only`: a flag that specifies if the connection should be made in read-only mode. -This is required if multiple processes want to access the same database file at the same time. - + - This is required if multiple processes want to access the same database file simultaneously. ## Usage -In order to make use of this handler and connect to a DuckDB database in MindsDB, the following syntax can be used: +To connect to a DuckDB or MotherDuck database in MindsDB, the following syntax can be used: +### DuckDB Example ```sql CREATE DATABASE duckdb_datasource WITH engine='duckdb', parameters={ - "database":"db.duckdb" + "database": "db.duckdb" }; ``` -Now, you can use this established connection to query your database as follows: +### MotherDuck Example +```sql +CREATE DATABASE md_datasource +WITH +engine='duckdb', +parameters={ + "database": "sample_data", + "motherduck_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9." +}; +``` + +Once the connection is established, you can query the database: + ```sql SELECT * FROM duckdb_datasource.my_table; -``` \ No newline at end of file +``` + +For MotherDuck: +```sql +SELECT * FROM md_datasource.movies; +``` + +By leveraging these features, MindsDB provides powerful integrations with DuckDB and MotherDuck for scalable analytics. \ No newline at end of file diff --git a/mindsdb/integrations/handlers/duckdb_handler/connection_args.py b/mindsdb/integrations/handlers/duckdb_handler/connection_args.py index e5a372f9e88..4d9591e5eb6 100644 --- a/mindsdb/integrations/handlers/duckdb_handler/connection_args.py +++ b/mindsdb/integrations/handlers/duckdb_handler/connection_args.py @@ -2,16 +2,26 @@ from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE - connection_args = OrderedDict( database={ - 'type': ARG_TYPE.STR, - 'description': 'The database file to read and write from. The special value :memory: (default) can be used to create an in-memory database.', + "type": ARG_TYPE.STR, + "description": ( + "The database file to read and write from. The special value :memory: (default) " + "can be used to create an in-memory database." + ), + }, + motherduck_token={ + "type": ARG_TYPE.STR, + "description": "Motherduck access token if want to connect motherduck database.", }, read_only={ - 'type': ARG_TYPE.BOOL, - 'description': 'A flag that specifies if the connection should be made in read-only mode.', + "type": ARG_TYPE.BOOL, + "description": ("A flag that specifies if the connection should be made in read-only mode."), }, ) -connection_args_example = OrderedDict(database='db.duckdb', read_only=True) +connection_args_example = OrderedDict( + database="sample_data", + read_only=True, + motherduck_token="ey...enKoT.SsEcCa......", +) diff --git a/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py b/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py index 7ae5423859c..bc407ef0575 100644 --- a/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py +++ b/mindsdb/integrations/handlers/duckdb_handler/duckdb_handler.py @@ -19,14 +19,14 @@ class DuckDBHandler(DatabaseHandler): """This handler handles connection and execution of the DuckDB statements.""" - name = 'duckdb' + name = "duckdb" def __init__(self, name: str, **kwargs): super().__init__(name) self.parser = parse_sql - self.dialect = 'postgresql' - self.connection_data = kwargs.get('connection_data') - self.renderer = SqlalchemyRender('postgres') + self.dialect = "postgresql" + self.connection_data = kwargs.get("connection_data") + self.renderer = SqlalchemyRender("postgres") self.connection = None self.is_connected = False @@ -44,10 +44,17 @@ def connect(self) -> DuckDBPyConnection: if self.is_connected is True: return self.connection + motherduck_token = self.connection_data.get("motherduck_token") + if motherduck_token: + database = ( + f"md:{self.connection_data.get('database')}?motherduck_token={motherduck_token}&attach_mode=single" + ) + else: + database = self.connection_data.get("database") args = { - 'database': self.connection_data.get('database'), - 'read_only': self.connection_data.get('read_only'), + "database": database, + "read_only": self.connection_data.get("read_only"), } self.connection = duckdb.connect(**args) @@ -78,9 +85,7 @@ def check_connection(self) -> StatusResponse: self.connect() response.success = True except Exception as e: - logger.error( - f'Error connecting to DuckDB {self.connection_data["database"]}, {e}!' - ) + logger.error(f"Error connecting to DuckDB {self.connection_data['database']}, {e}!") response.error_message = str(e) finally: if response.success is True and need_to_close: @@ -111,17 +116,13 @@ def native_query(self, query: str) -> Response: if result: response = Response( RESPONSE_TYPE.TABLE, - data_frame=pd.DataFrame( - result, columns=[x[0] for x in cursor.description] - ), + data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]), ) else: connection.commit() response = Response(RESPONSE_TYPE.OK) except Exception as e: - logger.error( - f'Error running query: {query} on {self.connection_data["database"]}!' - ) + logger.error(f"Error running query: {query} on {self.connection_data['database']}!") response = Response(RESPONSE_TYPE.ERROR, error_message=str(e)) cursor.close() @@ -150,10 +151,10 @@ def get_tables(self) -> Response: Response: Names of the tables in the database. """ - q = 'SHOW TABLES;' + q = "SHOW TABLES;" result = self.native_query(q) df = result.data_frame - result.data_frame = df.rename(columns={df.columns[0]: 'table_name'}) + result.data_frame = df.rename(columns={df.columns[0]: "table_name"}) return result def get_columns(self, table_name: str) -> Response: @@ -166,5 +167,5 @@ def get_columns(self, table_name: str) -> Response: Response: Details of the table. """ - query = f'DESCRIBE {table_name};' + query = f"DESCRIBE {table_name};" return self.native_query(query)