diff --git a/software/glasgow/applet/interface/better_la/__init__.py b/software/glasgow/applet/interface/better_la/__init__.py new file mode 100644 index 000000000..3507eb279 --- /dev/null +++ b/software/glasgow/applet/interface/better_la/__init__.py @@ -0,0 +1,191 @@ +from collections import defaultdict +import io +import logging +import argparse +from vcd import VCDWriter +from amaranth import * +from amaranth.lib.cdc import FFSynchronizer + +from ....gateware.pads import * +from ....gateware.analyzer import * +from ... import * +from .signal_compressor import SignalCompressor +from .arbiter import LAArbiter + +# This LA uses a simple protocol for sending compressed values over the FIFO which is explained +# in the arbiter.py (high level chunks) and signal_compressor.py (low level packets) files. +# The basic architecture is as follows: +# +------------------+ +--------+ +# Pin0 --->| SignalCompressor |------>| FIFO |-----+ +# +------------------+ +--------+ | +# | +# +------------------+ +--------+ | +# Pin1 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+ +# +------------------+ +--------+ | | | | | +# +---->| LAArbiter |----->| USB-FIFO | +# +------------------+ +--------+ | | | | | +# Pin2 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+ +# +------------------+ +--------+ | +# | +# +------------------+ +--------+ | +# PinN --->| ... |------>| ... |-----+ +# +------------------+ +--------+ + +class BetterLASubtarget(Elaboratable): + def __init__(self, pads, in_fifo, counter_target=False): + self.pads = pads + self.in_fifo = in_fifo + self.counter_target = counter_target + + self.la = LAArbiter(in_fifo) + + def elaborate(self, platform): + m = Module() + m.submodules += self.la + + if self.counter_target: + print("building bitstream with simulated counter target") + counter = Signal(len(self.pads.i_t.i)+2) + m.d.sync += counter.eq(counter + 1) + m.d.comb += self.la.input.eq(counter[2:]) + else: + print("building bitstream connected to real target") + pins_i = Signal.like(self.pads.i_t.i) + m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i) + m.d.comb += self.la.input.eq(pins_i) + + return m + + +class BetterLAApplet(GlasgowApplet): + logger = logging.getLogger(__name__) + help = "capture logic waveforms" + description = """ + A somewhat better logic analyzer applet that allows for the capture of traces as VCD files. + """ + + # The FPGA on revA/revB is too slow for the complicated logic in this Applet + required_revision = "C0" + + @classmethod + def add_build_arguments(cls, parser, access): + super().add_build_arguments(parser, access) + + access.add_pin_set_argument(parser, "i", width=range(1, 17), default=1) + parser.add_argument( + "--counter-target", default=False, action="store_true", + help="simulate a target with a counter signal", + ) + + def build(self, target, args): + self.mux_interface = iface = target.multiplexer.claim_interface(self, args) + iface.add_subtarget(BetterLASubtarget( + pads=iface.get_pads(args, pin_sets=("i",)), + in_fifo=iface.get_in_fifo(depth=512*16, auto_flush=False), + counter_target=args.counter_target + )) + + self._sample_freq = target.sys_clk_freq + self._pins = getattr(args, "pin_set_i") + + @classmethod + def add_run_arguments(cls, parser, access): + super().add_run_arguments(parser, access) + + g_pulls = parser.add_mutually_exclusive_group() + g_pulls.add_argument( + "--pull-ups", default=False, action="store_true", + help="enable pull-ups on all pins") + g_pulls.add_argument( + "--pull-downs", default=False, action="store_true", + help="enable pull-downs on all pins") + + async def run(self, device, args): + pull_low = set() + pull_high = set() + if args.pull_ups: + pull_high = set(args.pin_set_i) + if args.pull_downs: + pull_low = set(args.pin_set_i) + iface = await device.demultiplexer.claim_interface(self, self.mux_interface, args, + pull_low=pull_low, pull_high=pull_high) + return iface + + @classmethod + def add_interact_arguments(cls, parser): + parser.add_argument( + "file", metavar="VCD-FILE", type=argparse.FileType("w"), + help="write VCD waveforms to VCD-FILE") + parser.add_argument("--buffer-size", type=int, default=10, + help="how much data to capture in MB") + + async def interact(self, device, args, iface): + # Step 1: record a buffer + # we do this before to get the full USB performance and not have any lag-spikes in between + try: + print(f"starting capture of {args.buffer_size} MB") + buffer = await iface.read(1024*1024 * args.buffer_size) + except KeyboardInterrupt: + pass + finally: + print("captured buffer, converting...") + + + # Step 2: parse the packets from the captured buffer and sort them into channels + ptr = 0 + async def read(size, ) -> bytes: + nonlocal ptr + to_return = buffer[ptr:ptr+size] + ptr += size + if ptr >= len(buffer): + return None + return to_return + channels = defaultdict(list) + chunks = 0 + while True: + read_result = await LAArbiter.read_chunk(read) + if read_result is None: + break + channel, chunk = read_result + if len(chunk) == 255: + print(f"channel {channel} overrun") + break + channels[self._pins[channel]].extend(chunk) + chunks += 1 + + # Step 3: convert each channels packets into events, attach timestamps and sort them by + # timestamp + events = [] + cycles = None + for p, pkgs in channels.items(): + cycle = 0 + for pkg in pkgs: + for value, duration in SignalCompressor.decode_pkg(pkg): + events.append((cycle, p, value)) + cycle += duration + cycles = cycle if cycles is None else cycle if cycle < cycles else cycles + events.sort(key=lambda e: e[0]) + + # Step 3.5: report statistics + total_pkgs = sum(len(pkgs) for pkgs in channels.values()) + total_bytes = chunks + total_pkgs * 2 + print(f"captured {cycles} samples ({cycles / self._sample_freq * 1000}ms)") + print(f"chunking overhead: {chunks / total_bytes * 100}%") + print(f"compression gain: {100 - (total_bytes * 8 / (cycle * len(self._pins)) * 100)}%") + + + # Step 4: write out VCD file + vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False) + vcd_signals = { + p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire", + size=1, init=0) + for p in self._pins + } + for cycle, p, value in events: + if cycle > cycles: + # we dont write any timestamps for which we dont have data on all channels + break + signal = vcd_signals[p] + timestamp = cycle * 1_000_000_000 // self._sample_freq + vcd_writer.change(signal, timestamp, value) + vcd_writer.close(timestamp) diff --git a/software/glasgow/applet/interface/better_la/arbiter.py b/software/glasgow/applet/interface/better_la/arbiter.py new file mode 100644 index 000000000..e96850f3b --- /dev/null +++ b/software/glasgow/applet/interface/better_la/arbiter.py @@ -0,0 +1,110 @@ +from typing import Callable, List +from amaranth import * +from amaranth.lib.fifo import SyncFIFOBuffered + +from .signal_compressor import SignalCompressor +from .step_encoder import StepEncoder +from .argmax import ArgMax + +class LAArbiter(Elaboratable): + """This Logic Analyzer arbiter instanciates n Signal compressors and n Fifos and arbeites the + output of the fifos based on priority. Its output format is one byte of + [4bit channel][4bit length encoded using the table below] followed by 2*length bytes of + compressed channel data. + """ + + LENGTH_ENCODING = [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 255] + + def __init__(self, output_fifo: SyncFIFOBuffered, n_channels=16): + self.output_fifo = output_fifo + assert output_fifo.width == 8 + self.input = Signal(n_channels) + + def elaborate(self, platform): + m = Module() + + fifos: List[SyncFIFOBuffered] = [] + encoded_fifo_levels = [] + for i, sig in enumerate(self.input): + fifo = SyncFIFOBuffered(width=16, depth=256) # this is exactly one ice40 bram + m.submodules[f"fifo_{i}"] = fifo + fifos.append(fifo) + + compressor = SignalCompressor(sig) + m.submodules[f"compressor_{i}"] = compressor + m.d.comb += fifo.w_en.eq(compressor.valid) + m.d.comb += fifo.w_data.eq(compressor.value) + + step_encoder = StepEncoder(fifo.r_level, self.LENGTH_ENCODING) + m.submodules[f"step_encoder_{i}"] = step_encoder + encoded_fifo_levels.append(step_encoder.output) + + fifo_r_data = Array(fifo.r_data for fifo in fifos) + fifo_r_en = Array(fifo.r_en for fifo in fifos) + fifo_r_rdy = Array(fifo.r_rdy for fifo in fifos) + length_decoding = Array(self.LENGTH_ENCODING) + + # the argmax introduces 2 cycles of latency with pipelining to meet timing + # to acomodate for that we get the real level of the selected fifo in a combinatorial path + # it does not matter if we select a suboptimal fifo but it is bad if we assume a wrong level + argmax = m.submodules.argmax = ArgMax(encoded_fifo_levels, sync_levels=[1, 3]) + max_fifo_idx = argmax.max_idx + encoded_fifo_levels_array = Array(encoded_fifo_levels) + max_fifo_level_encoded = Signal(4) + m.d.comb += max_fifo_level_encoded.eq(encoded_fifo_levels_array[max_fifo_idx]) + max_fifo_level = Signal(8) + m.d.comb += max_fifo_level.eq(length_decoding[max_fifo_level_encoded]) + max_fifo_r_rdy = Signal() + m.d.comb += max_fifo_r_rdy.eq(fifo_r_rdy[max_fifo_idx]) + + to_transfer = Signal(4) + current_channel = Signal(4) + with m.FSM(): + with m.State("wait"): + with m.If(max_fifo_r_rdy): + m.next = "announce" + + with m.State("announce"): + m.d.sync += to_transfer.eq(max_fifo_level) + m.d.sync += current_channel.eq(max_fifo_idx) + + m.d.comb += self.output_fifo.w_data.eq(Cat(max_fifo_idx, max_fifo_level_encoded)) + m.d.comb += self.output_fifo.w_en.eq(max_fifo_r_rdy) + with m.If(~max_fifo_r_rdy): + m.next = "wait" + with m.Elif(self.output_fifo.w_rdy): + m.next = "send_lower" + + with m.State("send_lower"): + m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][0:8]) + m.d.comb += self.output_fifo.w_en.eq(1) + with m.If(self.output_fifo.w_rdy): + m.next = "send_upper" + with m.State("send_upper"): + m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][8:16]) + m.d.comb += self.output_fifo.w_en.eq(1) + with m.If(self.output_fifo.w_rdy): + m.d.comb += fifo_r_en[current_channel].eq(1) + with m.If(to_transfer > 1): + m.next = "send_lower" + m.d.sync += to_transfer.eq(to_transfer - 1) + with m.Else(): + with m.If(max_fifo_r_rdy): + m.next = "announce" + with m.Else(): + m.next = "wait" + + return m + + @staticmethod + async def read_chunk(read: Callable[[int], bytes]): + header = (await read(1))[0] + if header is None: + return None + channel = header & 0b1111 + length_encoded = header >> 4 + length = LAArbiter.LENGTH_ENCODING[length_encoded] + contents = (await read(2 * length)) + if contents is None: + return None + return channel, [contents[2*i+1] << 8 | contents[2*i] for i in range(length)] diff --git a/software/glasgow/applet/interface/better_la/argmax.py b/software/glasgow/applet/interface/better_la/argmax.py new file mode 100644 index 000000000..95db8477d --- /dev/null +++ b/software/glasgow/applet/interface/better_la/argmax.py @@ -0,0 +1,52 @@ +from typing import List +from amaranth import * + +class ArgMax(Elaboratable): + """ + Find the maximum value and the index of the maximum value of a list of signals using a + comparison-tree. + """ + def __init__(self, signals: List[Signal], sync_levels=[]): + self.signals = signals + + self.sync_levels = sync_levels + + self.max_value = Signal.like(signals[0]) + self.max_idx = Signal(range(len(signals))) + + def elaborate(self, platform): + m = Module() + + def build_tree(signals, offset=0, level=0): + suffix = f"l{level}_{offset}to{offset+len(signals)}" + + domain = m.d.sync if level in self.sync_levels else m.d.comb + + if len(signals) == 1: + return signals[0], offset + elif len(signals) == 2: + a, b = signals + value = Signal.like(self.signals[0], name=f"max_val_{suffix}") + index = Signal.like(self.max_idx, name=f"max_idx_{suffix}") + domain += [ + value.eq(Mux(a > b, a, b)), + index.eq(Mux(a > b, offset, offset + 1)) + ] + return value, index + else: + half = len(signals) // 2 + a, a_idx = build_tree(signals[:half], offset=offset, level=level+1) + b, b_idx = build_tree(signals[half:], offset=offset + half, level=level+1) + value = Signal.like(self.signals[0], name=f"max_val_{suffix}") + index = Signal.like(self.max_idx, name=f"max_idx_{suffix}") + domain += [ + value.eq(Mux(a > b, a, b)), + index.eq(Mux(a > b, a_idx, b_idx)) + ] + return value, index + + val, idx = build_tree(self.signals) + m.d.comb += self.max_value.eq(val) + m.d.comb += self.max_idx.eq(idx) + + return m diff --git a/software/glasgow/applet/interface/better_la/signal_compressor.py b/software/glasgow/applet/interface/better_la/signal_compressor.py new file mode 100644 index 000000000..f405e8e0b --- /dev/null +++ b/software/glasgow/applet/interface/better_la/signal_compressor.py @@ -0,0 +1,70 @@ +from itertools import chain +from typing import List, Tuple +from amaranth import * + +class SignalCompressor(Elaboratable): + """The SignalCompressor converts information about value changes into an efficient compressed + format. It outputs a 16bit stream that is encoded in one of three ways: + + 0b0: plain, no compression [15 bit value dump] + 0b10: constant 0 for the following n [14 bit] cycles + 0b11: constant 1 for the following n [14 bit] cycles + """ + def __init__(self, signal): + self.signal = signal + + self.valid = Signal() + self.value = Signal(16) + + def elaborate(self, platform): + m = Module() + + last = Signal() + m.d.sync += last.eq(self.signal) + change = Signal() + m.d.comb += change.eq(self.signal ^ last) + + + counter = Signal(14) + m.d.sync += counter.eq(counter + 1) + + buffer = Signal(15) + m.d.sync += buffer.eq((buffer << 1) | self.signal) + + plain_mode = Signal() + + with m.If(change): + with m.If(counter < 15): + m.d.sync += plain_mode.eq(1) + with m.Elif(~plain_mode): + m.d.comb += self.valid.eq(1) + m.d.comb += self.value.eq(Cat(1, last, counter)) + m.d.sync += counter.eq(0) + m.d.sync += plain_mode.eq(0) + + with m.If(counter == 2**len(counter) - 1): + m.d.comb += self.valid.eq(1) + m.d.comb += self.value.eq(Cat(1, last, counter)) + m.d.sync += counter.eq(0) + m.d.sync += plain_mode.eq(0) + + with m.If(plain_mode & (counter == 14)): + m.d.comb += self.valid.eq(1) + m.d.comb += self.value.eq(Cat(0, buffer)) + m.d.sync += counter.eq(0) + m.d.sync += plain_mode.eq(0) + + return m + + @staticmethod + def decode_pkg(pkg) -> List[Tuple[int, int]]: + if pkg & 0b1: + value = pkg >> 1 & 0b01 + duration = pkg >> 2 + return [(value, duration + 1)] + else: + return [(int(x), 1) for x in list('{0:015b}'.format(pkg >> 1))] + + @staticmethod + def expand_duration_list(duration_list: List[Tuple[int, int]]) -> List[int]: + return list(chain(*[[value] * duration for value, duration in duration_list])) diff --git a/software/glasgow/applet/interface/better_la/step_encoder.py b/software/glasgow/applet/interface/better_la/step_encoder.py new file mode 100644 index 000000000..0966b8ed3 --- /dev/null +++ b/software/glasgow/applet/interface/better_la/step_encoder.py @@ -0,0 +1,22 @@ +from typing import List +from amaranth import * + +class StepEncoder(Elaboratable): + def __init__(self, input: Signal, possible_values: List[int]): + self.input = input + self.possible_values = possible_values + + self.output = Signal(range(len(possible_values))) + + def elaborate(self, platform): + m = Module() + + for i, v in enumerate(self.possible_values): + with m.If(self.input >= v): + m.d.comb += self.output.eq(i) + + # we add this to have a sync domain and be able to use the simulation helpers + a = Signal() + m.d.sync += a.eq(~a) + + return m \ No newline at end of file diff --git a/software/glasgow/applet/interface/better_la/test.py b/software/glasgow/applet/interface/better_la/test.py new file mode 100644 index 000000000..9f8ec4add --- /dev/null +++ b/software/glasgow/applet/interface/better_la/test.py @@ -0,0 +1,149 @@ +import unittest +import random +from amaranth import * + +from ....gateware import simulation_test +from ....applet import GlasgowAppletTestCase, applet_simulation_test, synthesis_test +from .signal_compressor import SignalCompressor +from .arbiter import LAArbiter +from .argmax import ArgMax +from .step_encoder import StepEncoder +from . import BetterLAApplet + + +class SignalCompressorTestCase(unittest.TestCase): + def setUp(self): + self.tb = SignalCompressor(Signal(name="input")) + + @simulation_test + def test_rlu(self, tb): + for _ in range(100): + yield + yield self.tb.signal.eq(1) + for _ in range(100): + yield + yield self.tb.signal.eq(0) + yield + + assert (yield self.tb.valid) == 1 + duration_list = SignalCompressor.decode_pkg((yield self.tb.value)) + assert SignalCompressor.expand_duration_list(duration_list) == [1] * 100 + + @simulation_test + def test_fallback(self, tb): + tx_string = "1011001001010000111100010010011100011100101010001010111001111000" + tx = [int(x) for x in tx_string] + + rx = [] + for x in tx: + yield self.tb.signal.eq(x) + if (yield self.tb.valid): + rx.append((yield self.tb.value)) + yield + + + decoded = [] + for pkg in rx: + decoded.extend(SignalCompressor.expand_duration_list(SignalCompressor.decode_pkg(pkg))) + + print(f"saved {100 - (len(rx) * 16 / len(decoded) * 100)}%") + assert decoded[2:] == tx[:len(decoded)-2] + + @simulation_test + def test_decode(self, tb): + random.seed(0) + tx = [] + for _ in range(100): + val = random.randint(0, 1) + length = random.randint(1, 7) if random.randint(0, 1) else random.randint(1, 250) + tx.extend(val for _ in range(length)) + + rx = [] + for x in tx: + yield self.tb.signal.eq(x) + if (yield self.tb.valid): + rx.append((yield self.tb.value)) + yield + + + decoded = [] + for pkg in rx: + decoded.extend(SignalCompressor.expand_duration_list(SignalCompressor.decode_pkg(pkg))) + + print(f"saved {100 - (len(rx) * 16 / len(decoded) * 100)}%") + assert decoded[2:] == tx[:len(decoded)-2] + + +class ArgMaxTestCase(unittest.TestCase): + def setUp(self): + self.tb = ArgMax([Signal(8, name=f"input_{i}") for i in range(10)], sync_levels=[1, 3]) + + @simulation_test + def test(self, tb): + yield self.tb.signals[3].eq(10) + yield + yield + yield + assert (yield self.tb.max_idx) == 3 + assert (yield self.tb.max_value) == 10 + + yield self.tb.signals[7].eq(22) + yield + yield + yield + assert (yield self.tb.max_idx) == 7 + assert (yield self.tb.max_value) == 22 + + +class StepEncoderTestCase(unittest.TestCase): + def setUp(self): + self.tb = StepEncoder(Signal(8, name="input"), LAArbiter.LENGTH_ENCODING) + + @simulation_test + def test(self, tb): + testdata = [ + (0, 0), + (1, 0), + (10, 5), + (100, 12) + ] + + for input, output in testdata: + yield self.tb.input.eq(input) + yield + assert (yield self.tb.output) == output + + +class BetterLAAppletTestCase(GlasgowAppletTestCase, applet=BetterLAApplet): + @synthesis_test + def test_build(self): + self.assertBuilds() + + def setup_demo_source(self): + self.build_simulated_applet() + mux_iface = self.applet.mux_interface + m = Module() + m.d.sync += mux_iface.pads.i_t.i.eq(mux_iface.pads.i_t.i + 1) + self.target.add_submodule(m) + + @applet_simulation_test("setup_demo_source", ["--pins-i", "0:15"]) + async def test_smoke(self): + applet = await self.run_simulated_applet() + channels = [[] for _ in range(16)] + for _ in range(100): + channel, chunk = await LAArbiter.read_chunk(applet.read) + assert len(chunk) != 255 + for pkg in chunk: + duration_list = SignalCompressor.decode_pkg(pkg) + expanded = SignalCompressor.expand_duration_list(duration_list) + channels[channel].extend(expanded) + for i, channel in enumerate(channels): + duration = 0 + last = 0 + for j, x in enumerate(channel[3:]): + if x == last: + duration += 1 + else: + assert duration == 2**i, f"channel {i} at position {j}" + duration = 1 + last = x diff --git a/software/pyproject.toml b/software/pyproject.toml index fcaab3a85..ae149ecb2 100644 --- a/software/pyproject.toml +++ b/software/pyproject.toml @@ -81,6 +81,7 @@ selftest = "glasgow.applet.internal.selftest:SelfTestApplet" benchmark = "glasgow.applet.internal.benchmark:BenchmarkApplet" analyzer = "glasgow.applet.interface.analyzer:AnalyzerApplet" +better-la = "glasgow.applet.interface.better_la:BetterLAApplet" uart = "glasgow.applet.interface.uart:UARTApplet" spi-controller = "glasgow.applet.interface.spi_controller:SPIControllerApplet" i2c-initiator = "glasgow.applet.interface.i2c_initiator:I2CInitiatorApplet"