2929import sys
3030import struct
3131import traceback
32+ import warnings
3233from contextlib import contextmanager
3334from glob import glob
3435from queue import Queue
3536from subprocess import PIPE , Popen
3637from threading import Thread
37- from time import sleep , time
38+ from time import sleep
3839from typing import Dict , Iterable , Sequence , Tuple , Union
3940from concurrent .futures import ThreadPoolExecutor
4041
4142import numpy as np
4243import pandas as pd
44+ import scipy .sparse as sp
4345from py4j .java_gateway import GatewayParameters , JavaGateway , Py4JNetworkError
4446from systemds .operator import (
4547 Frame ,
@@ -77,6 +79,7 @@ class SystemDSContext(object):
7779 _FIFO_JAVA2PY_PIPES = []
7880 _data_transfer_mode = 0
7981 _multi_pipe_enabled = False
82+ _sparse_data_transfer = True
8083 _logging_initialized = False
8184 _executor_pool = ThreadPoolExecutor (max_workers = os .cpu_count () * 2 or 4 )
8285
@@ -89,6 +92,7 @@ def __init__(
8992 py4j_logging_level : int = 50 ,
9093 data_transfer_mode : int = 1 ,
9194 multi_pipe_enabled : bool = False ,
95+ sparse_data_transfer : bool = True ,
9296 ):
9397 """Starts a new instance of SystemDSContext, in which the connection to a JVM systemds instance is handled
9498 Any new instance of this SystemDS Context, would start a separate new JVM.
@@ -103,14 +107,26 @@ def __init__(
103107 The logging levels are as follows: 10 DEBUG, 20 INFO, 30 WARNING, 40 ERROR, 50 CRITICAL.
104108 :param py4j_logging_level: The logging level for Py4j to use, since all communication to the JVM is done through this,
105109 it can be verbose if not set high.
106- :param data_transfer_mode: default 0,
110+ :param data_transfer_mode: default 0, 0 for py4j, 1 for using pipes (on unix systems)
111+ :param multi_pipe_enabled: default False, if True, use multiple pipes for data transfer
112+ only used if data_transfer_mode is 1.
113+ .. experimental:: This parameter is experimental and may be removed in a future version.
114+ :param sparse_data_transfer: default True, if True, use optimized sparse matrix transfer,
115+ if False, convert sparse matrices to dense arrays before transfer
107116 """
108117
118+ if multi_pipe_enabled :
119+ warnings .warn (
120+ "The 'multi_pipe_enabled' parameter is experimental and may be removed in a future version." ,
121+ DeprecationWarning ,
122+ stacklevel = 2 ,
123+ )
109124 self .__setup_logging (logging_level , py4j_logging_level )
110125 self .__start (port , capture_stdout )
111126 self .capture_stats (capture_statistics )
112127 self ._log .debug ("Started JVM and SystemDS python context manager" )
113128 self .__setup_data_transfer (data_transfer_mode , multi_pipe_enabled )
129+ self ._sparse_data_transfer = sparse_data_transfer
114130
115131 def __setup_data_transfer (self , data_transfer_mode = 0 , multi_pipe_enabled = False ):
116132 self ._data_transfer_mode = data_transfer_mode
@@ -771,14 +787,14 @@ def scalar(self, v: Dict[str, VALID_INPUT_TYPES]) -> Scalar:
771787
772788 def from_numpy (
773789 self ,
774- mat : np .array ,
790+ mat : Union [ np .ndarray , sp . spmatrix ] ,
775791 * args : Sequence [VALID_INPUT_TYPES ],
776792 ** kwargs : Dict [str , VALID_INPUT_TYPES ],
777793 ) -> Matrix :
778- """Generate DAGNode representing matrix with data given by a numpy array, which will be sent to SystemDS
779- on need.
794+ """Generate DAGNode representing matrix with data given by a numpy array or scipy sparse matrix,
795+ which will be sent to SystemDS on need.
780796
781- :param mat: the numpy array
797+ :param mat: the numpy array or scipy sparse matrix
782798 :param args: unnamed parameters
783799 :param kwargs: named parameters
784800 :return: A Matrix
0 commit comments