@@ -5533,7 +5533,7 @@ cannot be safely cast to variable data type""" % attname
55335533 if data.dtype.kind in [' S' ,' U' ] and data.dtype.itemsize > 1 :
55345534 # if data is a numpy string array, convert it to an array
55355535 # of characters with one more dimension.
5536- data = stringtochar(data, encoding = encoding)
5536+ data = stringtochar(data, encoding = encoding, n_strlen = self .shape[ - 1 ] )
55375537
55385538 # if structured data has strings (and _Encoding att set), create view as char arrays
55395539 # (issue #773)
@@ -6775,9 +6775,9 @@ returns a rank 1 numpy character array of length NUMCHARS with datatype `'S1'`
67756775 arr[0 :len (string)] = tuple (string)
67766776 return arr
67776777
6778- def stringtochar (a ,encoding = ' utf-8' ):
6778+ def stringtochar (a ,encoding = ' utf-8' , n_strlen = None ):
67796779 """
6780- **`stringtochar(a,encoding='utf-8')`**
6780+ **`stringtochar(a,encoding='utf-8',n_strlen=None )`**
67816781
67826782convert a string array to a character array with one extra dimension
67836783
@@ -6789,16 +6789,29 @@ optional kwarg `encoding` can be used to specify character encoding (default
67896789`utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` the input array
67906790is treated a raw byte strings (`numpy.string_`).
67916791
6792+ optional kwarg `n_strlen` is the number of characters in each string. Default
6793+ is None, which means `n_strlen` will be set to a.itemsize (the number of bytes
6794+ used to represent each string in the input array).
6795+
67926796returns a numpy character array with datatype `'S1'` or `'U1'`
67936797and shape `a.shape + (N,)`, where N is the length of each string in a."""
67946798 dtype = a.dtype.kind
6799+ if n_strlen is None :
6800+ n_strlen = a.dtype.itemsize
67956801 if dtype not in [" S" ," U" ]:
67966802 raise ValueError (" type must string or unicode ('S' or 'U')" )
67976803 if encoding in [' none' ,' None' ,' bytes' ]:
67986804 b = numpy.array(tuple (a.tobytes()),' S1' )
6805+ elif encoding == ' ascii' :
6806+ b = numpy.array(tuple (a.tobytes().decode(' ascii' )))
6807+ b.shape = a.shape + (n_strlen,)
67996808 else :
6800- b = numpy.array(tuple (a.tobytes().decode(encoding)),dtype+ ' 1' )
6801- b.shape = a.shape + (a.itemsize,)
6809+ if not a.ndim:
6810+ a = numpy.array([a])
6811+ bbytes = [text.encode(encoding) for text in a]
6812+ pad = b' \0' * n_strlen
6813+ bbytes = [(x + pad)[:n_strlen] for x in bbytes]
6814+ b = numpy.array([[bb[i:i+ 1 ] for i in range (n_strlen)] for bb in bbytes])
68026815 return b
68036816
68046817def chartostring (b ,encoding = ' utf-8' ):
0 commit comments