Skip to content

Commit b0aa776

Browse files
committed
Start SparkSQL backend for PySpark (new version number: 1.0.5)
1 parent 1b141d3 commit b0aa776

4 files changed

Lines changed: 34 additions & 5 deletions

File tree

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@
4747
# built documents.
4848
#
4949
# The short X.Y version.
50-
version = "1.0.5-pre2"
50+
version = "1.0.5"
5151
# The full version, including alpha/beta/rc tags.
52-
release = "1.0.5-pre2"
52+
release = "1.0.5"
5353

5454
# The language for content autogenerated by Sphinx. Refer to documentation
5555
# for a list of supported languages.

histogrammar/defs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,9 @@ def _makeNPWeights(self, weights, shape):
10901090
else:
10911091
return weights * numpy.ones(shape, dtype=numpy.float64)
10921092

1093+
def fillsparksql(self, data):
1094+
pass
1095+
10931096
# useful functions
10941097

10951098
unweighted = named("unweighted", lambda datum: 1.0)

histogrammar/util.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def __init__(self, container, fill):
4646
self.root = container.fillroot
4747
self.pycuda = container.fillpycuda
4848
self.numpy = container.fillnumpy
49+
self.sparksql = container.fillsparksql
4950
def __call__(self, *args, **kwds):
5051
return self.fill(*args, **kwds)
5152

@@ -212,8 +213,26 @@ def __init__(self, expr, name=None):
212213
self.name = expr.__name__
213214
else:
214215
self.name = name
215-
if expr is not None and not isinstance(expr, (basestring, types.FunctionType)):
216-
raise TypeError("quantity ({0}) must be a string or function".format(expr))
216+
217+
if expr is None:
218+
ok = True
219+
elif isinstance(expr, basestring):
220+
ok = True
221+
elif isinstance(expr, types.FunctionType):
222+
ok = True
223+
else:
224+
try:
225+
from pyspark.sql.column import Column
226+
except ImportError:
227+
ok = False
228+
else:
229+
if isinstance(expr, Column):
230+
if self.name is None:
231+
self.name = str(expr)[7:-1]
232+
ok = True
233+
if not ok:
234+
raise TypeError("quantity ({0}) must be a string, function, or SparkSQL Column".format(expr))
235+
217236
if name is not None and not isinstance(name, basestring):
218237
raise TypeError("function name must be a string, not {0} (perhaps your arguments are reversed)".format(name))
219238

@@ -287,6 +306,13 @@ def function(datum):
287306
raise TypeError("immutable container (created from JSON or .ed) cannot be filled")
288307

289308
else:
309+
try:
310+
from pyspark.sql.column import Column
311+
except ImportError:
312+
pass
313+
else:
314+
if isinstance(self.expr, Column):
315+
raise TypeError("cannot use SparkSQL Column with the normal fill method; use fill.sparksql")
290316
raise TypeError("unrecognized type for function: {0}".format(type(self.expr)))
291317

292318
return self.fcn(*args, **kwds)

histogrammar/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import re
1818

19-
__version__ = "1.0.5-pre2"
19+
__version__ = "1.0.5"
2020

2121
version = __version__
2222

0 commit comments

Comments
 (0)