diff --git a/software/glasgow/applet/interface/better_la/__init__.py b/software/glasgow/applet/interface/better_la/__init__.py
new file mode 100644
index 000000000..3507eb279
--- /dev/null
+++ b/software/glasgow/applet/interface/better_la/__init__.py
@@ -0,0 +1,191 @@
+from collections import defaultdict
+import io
+import logging
+import argparse
+from vcd import VCDWriter
+from amaranth import *
+from amaranth.lib.cdc import FFSynchronizer
+
+from ....gateware.pads import *
+from ....gateware.analyzer import *
+from ... import *
+from .signal_compressor import SignalCompressor
+from .arbiter import LAArbiter
+
+# This LA uses a simple protocol for sending compressed values over the FIFO which is explained
+# in the arbiter.py (high level chunks) and signal_compressor.py (low level packets) files.
+# The basic architecture is as follows:
+#          +------------------+       +--------+
+# Pin0 --->| SignalCompressor |------>|  FIFO  |-----+
+#          +------------------+       +--------+     |
+#                                                    |
+#          +------------------+       +--------+     |
+# Pin1 --->| SignalCompressor |------>|  FIFO  |-----+     +-----------+      +----------+
+#          +------------------+       +--------+     |     |           |      |          |
+#                                                    +---->| LAArbiter |----->| USB-FIFO |
+#          +------------------+       +--------+     |     |           |      |          |
+# Pin2 --->| SignalCompressor |------>|  FIFO  |-----+     +-----------+      +----------+
+#          +------------------+       +--------+     |
+#                                                    |
+#          +------------------+       +--------+     |
+# PinN --->|       ...        |------>|   ...  |-----+
+#          +------------------+       +--------+
+
+class BetterLASubtarget(Elaboratable):
+    def __init__(self, pads, in_fifo, counter_target=False):
+        self.pads    = pads
+        self.in_fifo = in_fifo
+        self.counter_target = counter_target
+
+        self.la = LAArbiter(in_fifo)
+
+    def elaborate(self, platform):
+        m = Module()
+        m.submodules += self.la
+
+        if self.counter_target:
+            print("building bitstream with simulated counter target")
+            counter = Signal(len(self.pads.i_t.i)+2)
+            m.d.sync += counter.eq(counter + 1)
+            m.d.comb += self.la.input.eq(counter[2:])
+        else:
+            print("building bitstream connected to real target")
+            pins_i = Signal.like(self.pads.i_t.i)
+            m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i)
+            m.d.comb += self.la.input.eq(pins_i)
+
+        return m
+
+
+class BetterLAApplet(GlasgowApplet):
+    logger = logging.getLogger(__name__)
+    help = "capture logic waveforms"
+    description = """
+    A somewhat better logic analyzer applet that allows for the capture of traces as VCD files.
+    """
+
+    # The FPGA on revA/revB is too slow for the complicated logic in this Applet
+    required_revision = "C0"
+
+    @classmethod
+    def add_build_arguments(cls, parser, access):
+        super().add_build_arguments(parser, access)
+
+        access.add_pin_set_argument(parser, "i", width=range(1, 17), default=1)
+        parser.add_argument(
+            "--counter-target", default=False, action="store_true",
+            help="simulate a target with a counter signal",
+        )
+
+    def build(self, target, args):
+        self.mux_interface = iface = target.multiplexer.claim_interface(self, args)
+        iface.add_subtarget(BetterLASubtarget(
+            pads=iface.get_pads(args, pin_sets=("i",)),
+            in_fifo=iface.get_in_fifo(depth=512*16, auto_flush=False),
+            counter_target=args.counter_target
+        ))
+
+        self._sample_freq = target.sys_clk_freq
+        self._pins = getattr(args, "pin_set_i")
+
+    @classmethod
+    def add_run_arguments(cls, parser, access):
+        super().add_run_arguments(parser, access)
+
+        g_pulls = parser.add_mutually_exclusive_group()
+        g_pulls.add_argument(
+            "--pull-ups", default=False, action="store_true",
+            help="enable pull-ups on all pins")
+        g_pulls.add_argument(
+            "--pull-downs", default=False, action="store_true",
+            help="enable pull-downs on all pins")
+
+    async def run(self, device, args):
+        pull_low  = set()
+        pull_high = set()
+        if args.pull_ups:
+            pull_high = set(args.pin_set_i)
+        if args.pull_downs:
+            pull_low = set(args.pin_set_i)
+        iface = await device.demultiplexer.claim_interface(self, self.mux_interface, args,
+                                                           pull_low=pull_low, pull_high=pull_high)
+        return iface
+
+    @classmethod
+    def add_interact_arguments(cls, parser):
+        parser.add_argument(
+            "file", metavar="VCD-FILE", type=argparse.FileType("w"),
+            help="write VCD waveforms to VCD-FILE")
+        parser.add_argument("--buffer-size", type=int, default=10,
+                            help="how much data to capture in MB")
+
+    async def interact(self, device, args, iface):
+        # Step 1: record a buffer
+        # we do this before to get the full USB performance and not have any lag-spikes in between
+        try:
+            print(f"starting capture of {args.buffer_size} MB")
+            buffer = await iface.read(1024*1024 * args.buffer_size)
+        except KeyboardInterrupt:
+            pass
+        finally:
+            print("captured buffer, converting...")
+        
+
+        # Step 2: parse the packets from the captured buffer and sort them into channels
+        ptr = 0
+        async def read(size, ) -> bytes:
+            nonlocal ptr
+            to_return = buffer[ptr:ptr+size]
+            ptr += size
+            if ptr >= len(buffer):
+                return None
+            return to_return
+        channels = defaultdict(list)
+        chunks = 0
+        while True:
+            read_result = await LAArbiter.read_chunk(read)
+            if read_result is None:
+                break
+            channel, chunk = read_result
+            if len(chunk) == 255:
+                print(f"channel {channel} overrun")
+                break
+            channels[self._pins[channel]].extend(chunk)
+            chunks += 1
+
+        # Step 3: convert each channels packets into events, attach timestamps and sort them by
+        # timestamp
+        events = []
+        cycles = None
+        for p, pkgs in channels.items():
+            cycle = 0
+            for pkg in pkgs:
+                for value, duration in SignalCompressor.decode_pkg(pkg):
+                    events.append((cycle, p, value))
+                    cycle += duration
+            cycles = cycle if cycles is None else cycle if cycle < cycles else cycles
+        events.sort(key=lambda e: e[0])
+
+        # Step 3.5: report statistics
+        total_pkgs = sum(len(pkgs) for pkgs in channels.values())
+        total_bytes = chunks + total_pkgs * 2
+        print(f"captured {cycles} samples ({cycles / self._sample_freq * 1000}ms)")
+        print(f"chunking overhead: {chunks / total_bytes * 100}%")
+        print(f"compression gain: {100 - (total_bytes * 8 / (cycle * len(self._pins)) * 100)}%")
+        
+
+        # Step 4: write out VCD file
+        vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False)
+        vcd_signals = {
+            p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire", 
+                                        size=1, init=0)
+            for p in self._pins
+        }
+        for cycle, p, value in events:
+            if cycle > cycles:
+                # we dont write any timestamps for which we dont have data on all channels
+                break
+            signal = vcd_signals[p]
+            timestamp = cycle * 1_000_000_000 // self._sample_freq
+            vcd_writer.change(signal, timestamp, value)
+        vcd_writer.close(timestamp)
diff --git a/software/glasgow/applet/interface/better_la/arbiter.py b/software/glasgow/applet/interface/better_la/arbiter.py
new file mode 100644
index 000000000..e96850f3b
--- /dev/null
+++ b/software/glasgow/applet/interface/better_la/arbiter.py
@@ -0,0 +1,110 @@
+from typing import Callable, List
+from amaranth import *
+from amaranth.lib.fifo import SyncFIFOBuffered
+
+from .signal_compressor import SignalCompressor
+from .step_encoder import StepEncoder
+from .argmax import ArgMax
+
+class LAArbiter(Elaboratable):
+    """This Logic Analyzer arbiter instanciates n Signal compressors and n Fifos and arbeites the
+    output of the fifos based on priority. Its output format is one byte of 
+    [4bit channel][4bit length encoded using the table below] followed by 2*length bytes of 
+    compressed channel data.
+    """
+
+    LENGTH_ENCODING = [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 255]
+
+    def __init__(self, output_fifo: SyncFIFOBuffered, n_channels=16):
+        self.output_fifo = output_fifo
+        assert output_fifo.width == 8
+        self.input = Signal(n_channels)
+    
+    def elaborate(self, platform):
+        m = Module()
+
+        fifos: List[SyncFIFOBuffered] = []
+        encoded_fifo_levels = []
+        for i, sig in enumerate(self.input):
+            fifo = SyncFIFOBuffered(width=16, depth=256)  # this is exactly one ice40 bram
+            m.submodules[f"fifo_{i}"] = fifo
+            fifos.append(fifo)
+
+            compressor = SignalCompressor(sig)
+            m.submodules[f"compressor_{i}"] = compressor
+            m.d.comb += fifo.w_en.eq(compressor.valid)
+            m.d.comb += fifo.w_data.eq(compressor.value)
+            
+            step_encoder = StepEncoder(fifo.r_level, self.LENGTH_ENCODING)
+            m.submodules[f"step_encoder_{i}"] = step_encoder
+            encoded_fifo_levels.append(step_encoder.output)
+
+        fifo_r_data = Array(fifo.r_data for fifo in fifos)
+        fifo_r_en = Array(fifo.r_en for fifo in fifos)
+        fifo_r_rdy = Array(fifo.r_rdy for fifo in fifos)
+        length_decoding = Array(self.LENGTH_ENCODING)
+
+        # the argmax introduces 2 cycles of latency with pipelining to meet timing
+        # to acomodate for that we get the real level of the selected fifo in a combinatorial path
+        # it does not matter if we select a suboptimal fifo but it is bad if we assume a wrong level
+        argmax = m.submodules.argmax = ArgMax(encoded_fifo_levels, sync_levels=[1, 3])
+        max_fifo_idx = argmax.max_idx
+        encoded_fifo_levels_array = Array(encoded_fifo_levels)
+        max_fifo_level_encoded = Signal(4)
+        m.d.comb += max_fifo_level_encoded.eq(encoded_fifo_levels_array[max_fifo_idx])
+        max_fifo_level = Signal(8)
+        m.d.comb += max_fifo_level.eq(length_decoding[max_fifo_level_encoded])
+        max_fifo_r_rdy = Signal()
+        m.d.comb += max_fifo_r_rdy.eq(fifo_r_rdy[max_fifo_idx])
+
+        to_transfer = Signal(4)
+        current_channel = Signal(4)
+        with m.FSM():
+            with m.State("wait"):
+                with m.If(max_fifo_r_rdy):
+                    m.next = "announce"
+
+            with m.State("announce"):
+                m.d.sync += to_transfer.eq(max_fifo_level)
+                m.d.sync += current_channel.eq(max_fifo_idx)
+
+                m.d.comb += self.output_fifo.w_data.eq(Cat(max_fifo_idx, max_fifo_level_encoded))
+                m.d.comb += self.output_fifo.w_en.eq(max_fifo_r_rdy)
+                with m.If(~max_fifo_r_rdy):
+                    m.next = "wait"
+                with m.Elif(self.output_fifo.w_rdy):
+                    m.next = "send_lower"
+
+            with m.State("send_lower"):
+                    m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][0:8])
+                    m.d.comb += self.output_fifo.w_en.eq(1)
+                    with m.If(self.output_fifo.w_rdy):
+                        m.next = "send_upper"
+            with m.State("send_upper"):
+                m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][8:16])
+                m.d.comb += self.output_fifo.w_en.eq(1)
+                with m.If(self.output_fifo.w_rdy):
+                    m.d.comb += fifo_r_en[current_channel].eq(1)
+                    with m.If(to_transfer > 1):
+                        m.next = "send_lower"
+                        m.d.sync += to_transfer.eq(to_transfer - 1)
+                    with m.Else():
+                        with m.If(max_fifo_r_rdy):
+                            m.next = "announce"
+                        with m.Else():
+                            m.next = "wait"
+
+        return m
+    
+    @staticmethod
+    async def read_chunk(read: Callable[[int], bytes]):
+        header = (await read(1))[0]
+        if header is None:
+            return None
+        channel = header & 0b1111
+        length_encoded = header >> 4
+        length = LAArbiter.LENGTH_ENCODING[length_encoded]
+        contents = (await read(2 * length))
+        if contents is None:
+            return None
+        return channel, [contents[2*i+1] << 8 | contents[2*i] for i in range(length)]
diff --git a/software/glasgow/applet/interface/better_la/argmax.py b/software/glasgow/applet/interface/better_la/argmax.py
new file mode 100644
index 000000000..95db8477d
--- /dev/null
+++ b/software/glasgow/applet/interface/better_la/argmax.py
@@ -0,0 +1,52 @@
+from typing import List
+from amaranth import *
+
+class ArgMax(Elaboratable):
+    """
+    Find the maximum value and the index of the maximum value of a list of signals using a 
+    comparison-tree.
+    """
+    def __init__(self, signals: List[Signal], sync_levels=[]):
+        self.signals = signals
+
+        self.sync_levels = sync_levels
+
+        self.max_value = Signal.like(signals[0])
+        self.max_idx = Signal(range(len(signals)))
+
+    def elaborate(self, platform):
+        m = Module()
+
+        def build_tree(signals, offset=0, level=0):
+            suffix = f"l{level}_{offset}to{offset+len(signals)}"
+
+            domain = m.d.sync if level in self.sync_levels else m.d.comb
+
+            if len(signals) == 1:
+                return signals[0], offset
+            elif len(signals) == 2:
+                a, b = signals
+                value = Signal.like(self.signals[0], name=f"max_val_{suffix}")
+                index = Signal.like(self.max_idx, name=f"max_idx_{suffix}")
+                domain += [
+                    value.eq(Mux(a > b, a, b)),
+                    index.eq(Mux(a > b, offset, offset + 1))
+                ]
+                return value, index
+            else:
+                half = len(signals) // 2
+                a, a_idx = build_tree(signals[:half], offset=offset, level=level+1)
+                b, b_idx = build_tree(signals[half:], offset=offset + half, level=level+1)
+                value = Signal.like(self.signals[0], name=f"max_val_{suffix}")
+                index = Signal.like(self.max_idx, name=f"max_idx_{suffix}")
+                domain += [
+                    value.eq(Mux(a > b, a, b)),
+                    index.eq(Mux(a > b, a_idx, b_idx))
+                ]
+                return value, index
+            
+        val, idx = build_tree(self.signals)
+        m.d.comb += self.max_value.eq(val)
+        m.d.comb += self.max_idx.eq(idx)
+
+        return m
diff --git a/software/glasgow/applet/interface/better_la/signal_compressor.py b/software/glasgow/applet/interface/better_la/signal_compressor.py
new file mode 100644
index 000000000..f405e8e0b
--- /dev/null
+++ b/software/glasgow/applet/interface/better_la/signal_compressor.py
@@ -0,0 +1,70 @@
+from itertools import chain
+from typing import List, Tuple
+from amaranth import *
+
+class SignalCompressor(Elaboratable):
+    """The SignalCompressor converts information about value changes into an efficient compressed 
+    format. It outputs a 16bit stream that is encoded in one of three ways:
+
+    0b0: plain, no compression [15 bit value dump]
+    0b10: constant 0 for the following n [14 bit] cycles
+    0b11: constant 1 for the following n [14 bit] cycles
+    """
+    def __init__(self, signal):
+        self.signal = signal
+
+        self.valid = Signal()
+        self.value = Signal(16)
+
+    def elaborate(self, platform):
+        m = Module()
+        
+        last = Signal()
+        m.d.sync += last.eq(self.signal)
+        change = Signal()
+        m.d.comb += change.eq(self.signal ^ last)
+
+
+        counter = Signal(14)
+        m.d.sync += counter.eq(counter + 1)
+
+        buffer = Signal(15)
+        m.d.sync += buffer.eq((buffer << 1) | self.signal)
+
+        plain_mode = Signal()
+
+        with m.If(change):
+            with m.If(counter < 15):
+                m.d.sync += plain_mode.eq(1)
+            with m.Elif(~plain_mode):
+                m.d.comb += self.valid.eq(1)
+                m.d.comb += self.value.eq(Cat(1, last, counter))
+                m.d.sync += counter.eq(0)
+                m.d.sync += plain_mode.eq(0)
+
+        with m.If(counter == 2**len(counter) - 1):
+            m.d.comb += self.valid.eq(1)
+            m.d.comb += self.value.eq(Cat(1, last, counter))
+            m.d.sync += counter.eq(0)
+            m.d.sync += plain_mode.eq(0)
+        
+        with m.If(plain_mode & (counter == 14)):
+            m.d.comb += self.valid.eq(1)
+            m.d.comb += self.value.eq(Cat(0, buffer))
+            m.d.sync += counter.eq(0)
+            m.d.sync += plain_mode.eq(0)
+
+        return m
+
+    @staticmethod
+    def decode_pkg(pkg) -> List[Tuple[int, int]]:
+        if pkg & 0b1:
+            value = pkg >> 1 & 0b01
+            duration = pkg >> 2
+            return [(value, duration + 1)]
+        else:
+            return [(int(x), 1) for x in list('{0:015b}'.format(pkg >> 1))]
+
+    @staticmethod
+    def expand_duration_list(duration_list: List[Tuple[int, int]]) -> List[int]:
+        return list(chain(*[[value] * duration for value, duration in duration_list]))
diff --git a/software/glasgow/applet/interface/better_la/step_encoder.py b/software/glasgow/applet/interface/better_la/step_encoder.py
new file mode 100644
index 000000000..0966b8ed3
--- /dev/null
+++ b/software/glasgow/applet/interface/better_la/step_encoder.py
@@ -0,0 +1,22 @@
+from typing import List
+from amaranth import *
+
+class StepEncoder(Elaboratable):
+    def __init__(self, input: Signal, possible_values: List[int]):
+        self.input = input
+        self.possible_values = possible_values
+
+        self.output = Signal(range(len(possible_values)))
+
+    def elaborate(self, platform):
+        m = Module()
+
+        for i, v in enumerate(self.possible_values):
+            with m.If(self.input >= v):
+                m.d.comb += self.output.eq(i)
+
+        # we add this to have a sync domain and be able to use the simulation helpers
+        a = Signal()
+        m.d.sync += a.eq(~a)
+
+        return m
\ No newline at end of file
diff --git a/software/glasgow/applet/interface/better_la/test.py b/software/glasgow/applet/interface/better_la/test.py
new file mode 100644
index 000000000..9f8ec4add
--- /dev/null
+++ b/software/glasgow/applet/interface/better_la/test.py
@@ -0,0 +1,149 @@
+import unittest
+import random
+from amaranth import *
+
+from ....gateware import simulation_test
+from ....applet import GlasgowAppletTestCase, applet_simulation_test, synthesis_test
+from .signal_compressor import SignalCompressor
+from .arbiter import LAArbiter
+from .argmax import ArgMax
+from .step_encoder import StepEncoder
+from . import BetterLAApplet
+
+
+class SignalCompressorTestCase(unittest.TestCase):
+    def setUp(self):
+        self.tb = SignalCompressor(Signal(name="input"))
+
+    @simulation_test
+    def test_rlu(self, tb):
+        for _ in range(100):
+            yield
+        yield self.tb.signal.eq(1)
+        for _ in range(100):
+            yield
+        yield self.tb.signal.eq(0)
+        yield
+
+        assert (yield self.tb.valid) == 1
+        duration_list = SignalCompressor.decode_pkg((yield self.tb.value))
+        assert SignalCompressor.expand_duration_list(duration_list) == [1] * 100
+
+    @simulation_test
+    def test_fallback(self, tb):
+        tx_string = "1011001001010000111100010010011100011100101010001010111001111000"
+        tx = [int(x) for x in tx_string]
+
+        rx = []
+        for x in tx:
+            yield self.tb.signal.eq(x)
+            if (yield self.tb.valid):
+                rx.append((yield self.tb.value))
+            yield
+
+
+        decoded = []
+        for pkg in rx:
+            decoded.extend(SignalCompressor.expand_duration_list(SignalCompressor.decode_pkg(pkg)))
+        
+        print(f"saved {100 - (len(rx) * 16 / len(decoded) * 100)}%")
+        assert decoded[2:] == tx[:len(decoded)-2]
+
+    @simulation_test
+    def test_decode(self, tb):
+        random.seed(0)
+        tx = []
+        for _ in range(100):
+            val = random.randint(0, 1)
+            length = random.randint(1, 7) if random.randint(0, 1) else random.randint(1, 250)
+            tx.extend(val for _ in range(length))
+        
+        rx = []
+        for x in tx:
+            yield self.tb.signal.eq(x)
+            if (yield self.tb.valid):
+                rx.append((yield self.tb.value))
+            yield
+
+
+        decoded = []
+        for pkg in rx:
+            decoded.extend(SignalCompressor.expand_duration_list(SignalCompressor.decode_pkg(pkg)))
+        
+        print(f"saved {100 - (len(rx) * 16 / len(decoded) * 100)}%")
+        assert decoded[2:] == tx[:len(decoded)-2]
+
+
+class ArgMaxTestCase(unittest.TestCase):
+    def setUp(self):
+        self.tb = ArgMax([Signal(8, name=f"input_{i}") for i in range(10)], sync_levels=[1, 3])
+
+    @simulation_test
+    def test(self, tb):
+        yield self.tb.signals[3].eq(10)
+        yield
+        yield
+        yield
+        assert (yield self.tb.max_idx) == 3
+        assert (yield self.tb.max_value) == 10
+
+        yield self.tb.signals[7].eq(22)
+        yield
+        yield
+        yield
+        assert (yield self.tb.max_idx) == 7
+        assert (yield self.tb.max_value) == 22
+
+
+class StepEncoderTestCase(unittest.TestCase):
+    def setUp(self):
+        self.tb = StepEncoder(Signal(8, name="input"), LAArbiter.LENGTH_ENCODING)
+
+    @simulation_test
+    def test(self, tb):
+        testdata = [
+            (0, 0),
+            (1, 0),
+            (10, 5),
+            (100, 12)
+        ]
+
+        for input, output in testdata:
+            yield self.tb.input.eq(input)
+            yield
+            assert (yield self.tb.output) == output
+
+
+class BetterLAAppletTestCase(GlasgowAppletTestCase, applet=BetterLAApplet):
+    @synthesis_test
+    def test_build(self):
+        self.assertBuilds()
+
+    def setup_demo_source(self):
+        self.build_simulated_applet()
+        mux_iface = self.applet.mux_interface
+        m = Module()
+        m.d.sync += mux_iface.pads.i_t.i.eq(mux_iface.pads.i_t.i + 1)
+        self.target.add_submodule(m)
+
+    @applet_simulation_test("setup_demo_source", ["--pins-i", "0:15"])
+    async def test_smoke(self):
+        applet = await self.run_simulated_applet()
+        channels = [[] for _ in range(16)]
+        for _ in range(100):
+            channel, chunk = await LAArbiter.read_chunk(applet.read)
+            assert len(chunk) != 255
+            for pkg in chunk:
+                duration_list = SignalCompressor.decode_pkg(pkg)
+                expanded = SignalCompressor.expand_duration_list(duration_list)
+                channels[channel].extend(expanded)
+        for i, channel in enumerate(channels):
+            duration = 0
+            last = 0
+            for j, x in enumerate(channel[3:]):
+                if x == last:
+                    duration += 1
+                else:
+                    assert duration == 2**i, f"channel {i} at position {j}"
+                    duration = 1
+                    last = x
diff --git a/software/pyproject.toml b/software/pyproject.toml
index fcaab3a85..ae149ecb2 100644
--- a/software/pyproject.toml
+++ b/software/pyproject.toml
@@ -81,6 +81,7 @@ selftest = "glasgow.applet.internal.selftest:SelfTestApplet"
 benchmark = "glasgow.applet.internal.benchmark:BenchmarkApplet"
 
 analyzer = "glasgow.applet.interface.analyzer:AnalyzerApplet"
+better-la = "glasgow.applet.interface.better_la:BetterLAApplet"
 uart = "glasgow.applet.interface.uart:UARTApplet"
 spi-controller = "glasgow.applet.interface.spi_controller:SPIControllerApplet"
 i2c-initiator = "glasgow.applet.interface.i2c_initiator:I2CInitiatorApplet"