Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Deeploy/Targets/Neureka/Engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@
ConvLayer([NeurekaPWConv2DMapper, NeurekaDWConv2DMapper, NeurekaDenseConv2DMapper]),
}

_includeList = ["pulp_nnx_neureka.h", "pulp_nnx_util.h", "neureka_siracusa_bsp.h", "neureka.h", "neureka_task.h"]
_includeList = [
"pulp_nnx_neureka.h", "pulp_nnx_util.h", "neureka_siracusa_bsp.h", "neureka.h", "neureka_task.h", "neureka_gvsoc.h"
]

_neurekaInitCode = r"""
neureka_siracusa_conf_t conf = {.max_stall = 8};
neureka_nnx_init(neureka_siracusa_get_dev(), &conf);
neureka_gvsoc_log_activate(neureka_siracusa_get_dev(), NEUREKA_GVSOC_LOG_LEVEL_ALL, NEUREKA_GVSOC_LOG_FORMAT_HEXADECIMAL);
"""


Expand Down
62 changes: 54 additions & 8 deletions Deeploy/Targets/Neureka/Parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,18 @@ def parseNodeCtxt(self,
# and enforcing that the channels_first is false
data_in = newCtxt.lookup(self.operatorRepresentation['data_in'])
data_out = newCtxt.lookup(self.operatorRepresentation['data_out'])
weight = newCtxt.lookup(self.operatorRepresentation['weight'])
# MARCHIOA: weight depends on the type of convolution so it requires to be parsed by the child parsers
# - PW -> 3-dim
# - DW -> 4-dim
# - Dense -> 4-dim
# weight = newCtxt.lookup(self.operatorRepresentation['weight'])

if not all([
channels_first == False,
len(data_in.shape) == 4,
# LMACAN: weight shape should be equal to 3 because we have to do the neureka's
# special weight encoding
len(weight.shape) == 3,
# # LMACAN: weight shape should be equal to 3 because we have to do the neureka's
# # special weight encoding
# len(weight.shape) == 3,
]):
return newCtxt, False

Expand All @@ -83,18 +87,36 @@ def parseNode(self, node: gs.Node) -> bool:
if not super().parseNode(node):
return False

ch_im_out = node.inputs[1].shape[0]
ch_im_in = node.inputs[1].shape[1]
weights = node.inputs[1]

# weigths reshaped by the weigths encoder into
# (cout, cinMajor, bits, weightBandwidthBytes)
# where:
# - cout: 1 by definition (it is cin from ONNX)
# - cinMajor: number of tiles over the channels
# - bits: weight bit width (only 8 is supported)
# - weightBandwidthBytes: which is 32 in Siracusa
if not all([
self.operatorRepresentation['kernel_shape'] == [3, 3],
self.operatorRepresentation['group'] == ch_im_out,
self.operatorRepresentation['group'] == ch_im_in,
len(weights.shape) == 4,
weights.shape[0] == 1, # ch_im_out
]):
return False

return True

def parseNodeCtxt(self, ctxt, node, channels_first = True):

newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
if not ret:
return False

weight = newCtxt.lookup(self.operatorRepresentation['weight'])
if not (len(weight.shape) == 4):
return False

return newCtxt, True


class NeurekaRQSDWConv2DParser(NeurekaDWConv2DParser, RQSParserInterface):

Expand Down Expand Up @@ -136,6 +158,18 @@ def parseNode(self, node: gs.Node) -> bool:

return True

def parseNodeCtxt(self, ctxt, node, channels_first = True):

newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
if not ret:
return False

weight = newCtxt.lookup(self.operatorRepresentation['weight'])
if not (len(weight.shape) == 3):
return False

return newCtxt, True


class NeurekaRQSPWConv2DParser(NeurekaPWConv2DParser, RQSParserInterface):

Expand Down Expand Up @@ -176,6 +210,18 @@ def parseNode(self, node: gs.Node) -> bool:

return True

def parseNodeCtxt(self, ctxt, node, channels_first = True):

newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
if not ret:
return False

weight = newCtxt.lookup(self.operatorRepresentation['weight'])
if not (len(weight.shape) == 4):
return False

return newCtxt, True


class NeurekaRQSDenseConv2DParser(NeurekaDenseConv2DParser, RQSParserInterface):

Expand Down
4 changes: 2 additions & 2 deletions Deeploy/Targets/Neureka/Templates/ConvTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,12 @@ def getCounters(
operatorRepresentation: OperatorRepresentation) -> Tuple[int, int, int, int, int, int, int, int, int, int]:
_ = operatorRepresentation # operatorRepresentation not accessed for now because it's just for pointwise kernels

n_channel_out_subtiles = _getNumTiles(channel_out, 28)
n_channel_out_subtiles = _getNumTiles(channel_out, 32)
n_channel_in_subtiles = _getNumTiles(channel_in, 28)
n_height_out_subtiles = _getNumTiles(height_out, 6)
n_width_out_subtiles = _getNumTiles(width_out, 6)

channel_out_border = _getBorderTileSize(channel_out, 28)
channel_out_border = _getBorderTileSize(channel_out, 32)
channel_in_border = _getBorderTileSize(channel_in, 28)
height_out_border = _getBorderTileSize(height_out, 6)
width_out_border = _getBorderTileSize(width_out, 6)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ def serializeTilingSolution(
replacementTypes['weight_addr_offset'] = PointerClass(uint32_t)
for absoluteCube in absoluteOutputCubes:
COffset, CSize = absoluteCube.absoluteOffset[-1], absoluteCube.rectangle.dims[-1]
WeightCube = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1]))
# WeightCube = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1]))
WeightCube = HyperRectangle((COffset, 0, 0, 0),
(CSize, weightShape[-3], weightShape[-2], weightShape[-1]))
replacements['weight_addr_offset'].append(calculateFlatOffsetInBytes(WeightCube, weightBuffer))
else:
inputWeightBaseOffsets, outputWeightBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel,
Expand All @@ -228,7 +230,8 @@ def serializeTilingSolution(

for cube, load in zip(outputCubes, inputLoadSchedule):
COffset, CSize = cube.offset[-1], cube.dims[-1]
load['weight'] = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1]))
load['weight'] = HyperRectangle((COffset, 0, 0, 0),
(CSize, weightShape[-3], weightShape[-2], weightShape[-1]))

tilingSchedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, outputLoadSchedule)
variableReplacementSchedule = VariableReplacementScheme(replacements, replacementTypes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,7 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw
tilerModel.addConstraint(outputBatchVar == inputBatchVar)
tilerModel.addConstraint(outputChannelVar == inputChannelVar)

weightBuffer = ctxt.lookup(weightBufferName)
if hasattr(weightBuffer, "_memoryLevel") and weightBuffer._memoryLevel == "WeightMemory_SRAM":
tilerModel.addConstraint(weightOutChannelVar == weightOutChannelVar.Max())
else:
tilerModel.addConstraint(weightOutChannelVar == outputChannelVar)
tilerModel.addConstraint(weightOutChannelVar == weightOutChannelVar.Max())

tilerModel.addConstraint(inputHeightVar >= 3)
tilerModel.addConstraint(inputWidthVar >= 3)
Expand Down Expand Up @@ -214,7 +210,8 @@ def serializeTilingSolution(
replacementTypes['weight_addr_offset'] = PointerClass(uint32_t)
for absoluteCube in absoluteOutputCubes:
COffset, CSize = absoluteCube.absoluteOffset[-1], absoluteCube.rectangle.dims[-1]
WeightCube = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1]))
WeightCube = HyperRectangle((COffset, 0, 0, 0),
(CSize, weightShape[-3], weightShape[-2], weightShape[-1]))
replacements['weight_addr_offset'].append(calculateFlatOffsetInBytes(WeightCube, weightBuffer))
else:
inputWeightBaseOffsets, outputWeightBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel,
Expand All @@ -223,8 +220,7 @@ def serializeTilingSolution(
outputBaseOffsets.update(outputWeightBaseOffsets)

for cube, load in zip(outputCubes, inputLoadSchedule):
COffset, CSize = cube.offset[-1], cube.dims[-1]
load['weight'] = HyperRectangle((COffset, 0, 0), (CSize, weightShape[-2], weightShape[-1]))
load['weight'] = HyperRectangle((0,) * len(weightShape), tuple(weightShape))

tilingSchedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, outputLoadSchedule)
variableReplacementSchedule = VariableReplacementScheme(replacements, replacementTypes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ def _weightEncode(weight: npt.NDArray[np.uint8], bits: int, depthwise: bool = Fa
_NEUREKA_CIN_SUBTILE_1x1 = 32
_NEUREKA_CIN_SUBTILE_3x3 = 28

if depthwise:
weight = weight.transpose(1, 0, 2, 3) # Swap cout and cin

cout, cin, height, width = weight.shape
cinSubtile = (_NEUREKA_CIN_SUBTILE_3x3 if height == 3 else _NEUREKA_CIN_SUBTILE_1x1)

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 4 additions & 0 deletions DeeployTest/testUtils/deeployRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ def create_config_from_args(args: argparse.Namespace,
gen_args_list.append(f"--searchStrategy={args.searchStrategy}")
if hasattr(args, 'plotMemAlloc') and args.plotMemAlloc:
gen_args_list.append("--plotMemAlloc")
if hasattr(args, 'enable_3x3') and args.enable_3x3:
gen_args_list.append("--enable-3x3")
if hasattr(args, 'neureka_wmem') and args.neureka_wmem:
gen_args_list.append("--neureka-wmem")

if not tiling and getattr(args, 'profileUntiled', False):
gen_args_list.append("--profileUntiled")
Expand Down
Loading