diff --git a/R/summariseExpression.R b/R/summariseExpression.R
new file mode 100644
index 00000000..05597890
--- /dev/null
+++ b/R/summariseExpression.R
@@ -0,0 +1,182 @@
+#' Summarise transcript expression to a specified unit
+#' @title summarise by expression
+#' @param se a \code{SummarizedExperiment} object from \code{\link{bambu}}
+#' @param type character, the unit to summarise by. Currently supports
+#'   \code{"exon"} (default) and \code{"gene"}.
+#' @return A \code{RangedSummarizedExperiment} with expression summarised to
+#'   the requested unit. Assays \code{counts} and \code{CPM} are always
+#'   present; \code{fullLengthCounts} and \code{uniqueCounts} are included
+#'   when present in the input. For \code{type = "exon"}, rows are named
+#'   \code{EX1, EX2, ...} and \code{rowRanges} mcols include \code{GENEID},
+#'   \code{txNames}, and \code{exonClass} (\code{"annotated"} or
+#'   \code{"novel"}). For \code{type = "gene"}, rows are named by GENEID and
+#'   \code{rowRanges} is a \code{GRangesList} of reduced exon ranges per gene;
+#'   mcols include \code{GENEID} and \code{newGeneClass}.
+#' @details Counts are summed across all transcripts belonging to the same
+#'   group (e.g. identical exon locus or gene). CPM is recomputed from the
+#'   aggregated counts. For \code{type = "exon"}, an exon is \code{"novel"}
+#'   only if all contributing transcripts have \code{novelTranscript = TRUE}.
+#'   For \code{type = "gene"}, \code{newGeneClass} is derived from
+#'   \code{txClassDescription}: Ensembl genes (GENEID matching \code{"ENSG"})
+#'   are labelled \code{"annotation"}, others inherit the transcript class.
+#'   Incompatible and non-unique counts stored in \code{metadata(se)} are
+#'   added to the aggregated counts before CPM computation.
+#' @examples
+#' se <- readRDS(system.file("extdata",
+#'     "seOutput_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds",
+#'     package = "bambu"
+#' ))
+#' summariseByExpression(se, type = "exon")
+#' summariseByExpression(se, type = "gene")
+#' @importFrom Matrix sparseMatrix Matrix
+#' @importFrom S4Vectors SimpleList metadata
+#' @importFrom SummarizedExperiment rowRanges rowData assays colData SummarizedExperiment
+#' @importFrom GenomicRanges GRanges seqnames start end strand mcols
+#' @importFrom IRanges IRanges
+#' @importFrom dplyr left_join mutate group_by summarise cur_group_id ungroup
+#' @export
+summariseByExpression <- function(se, type = "exon") {
+    # build the grouping index and output ranges for the requested type
+    index <- switch(type,
+        exon = buildExonIndex(se),
+        gene = buildGeneIndex(se),
+        stop("Unsupported type: '", type, "'")
+    )
+
+    # sparse aggregation matrix: rows = groups, cols = transcripts
+    # multiplying by transcript counts gives group-level counts
+    txNames <- rownames(se)
+    nGroups <- length(index$outRanges)
+    groupNames <- names(index$outRanges)
+
+    aggregationMat <- sparseMatrix(
+        i    = index$indexDf$group_idx,
+        j    = match(index$indexDf$TXNAME, txNames),
+        x    = 1L,
+        dims = c(nGroups, length(txNames)),
+        dimnames = list(groupNames, txNames)
+    )
+
+    # aggregate transcript counts to group level
+    counts <- aggregationMat %*% assays(se)$counts
+
+    # for gene level, add reads that could not be assigned to any transcript
+    # but were localised to a gene, stored in metadata by bambu
+    if (type == "gene" && !is.null(metadata(se)$incompatibleCounts)) {
+        incompat <- metadata(se)$incompatibleCounts
+        if ("nonuniqueCounts" %in% names(metadata(se)))
+            incompat <- incompat + metadata(se)$nonuniqueCounts
+        incompat <- Matrix(
+            incompat[match(rownames(counts), rownames(incompat)), ],
+            sparse = TRUE
+        )
+        counts <- counts + incompat
+    }
+
+    # compute CPM from aggregated counts
+    counts.total <- colSums(counts)
+    counts.total[counts.total == 0] <- 1
+    cpm <- counts / counts.total * 10^6
+
+    # aggregate optional assays if present
+    outAssays <- SimpleList(counts = counts, CPM = cpm)
+    for (i in c("fullLengthCounts", "uniqueCounts")) {
+        if (i %in% names(assays(se))) {
+            outAssays[[i]] <- aggregationMat %*% assays(se)[[i]]
+        }
+    }
+
+    SummarizedExperiment(
+        assays    = outAssays,
+        rowRanges = index$outRanges,
+        colData   = colData(se)
+    )
+}
+
+#' @param se a \code{SummarizedExperiment} object from \code{\link{bambu}}
+#' @return a list with:
+#'   \code{indexDf} — data.frame with columns TXNAME and group_idx
+#'     (one row per exon-transcript pair);
+#'   \code{outRanges} — GRanges with one entry per unique exon locus,
+#'     named EX1, EX2, ..., with mcols GENEID, txNames, and exonClass.
+#' @noRd
+buildExonIndex <- function(se) {
+    # one row per exon-transcript pair, joined with transcript metadata
+    txDf <- as.data.frame(rowData(se))[, c("TXNAME", "GENEID", "novelTranscript"), drop = FALSE]
+    exonRanges <- unlist(rowRanges(se), use.names = TRUE)
+    flatDf <- data.frame(
+        TXNAME   = names(exonRanges),
+        seqnames = as.character(seqnames(exonRanges)),
+        start    = start(exonRanges),
+        end      = end(exonRanges),
+        strand   = as.character(strand(exonRanges)),
+        stringsAsFactors = FALSE
+    ) %>%
+        left_join(txDf, by = "TXNAME") %>%
+        # assign each unique exon locus a group index
+        group_by(seqnames, start, end, strand) %>%
+        mutate(group_idx = cur_group_id()) %>%
+        ungroup()
+
+    # one row per unique exon locus with summarised metadata
+    groupMeta <- flatDf %>%
+        group_by(group_idx) %>%
+        summarise(
+            seqnames  = seqnames[1],
+            start     = start[1],
+            end       = end[1],
+            strand    = strand[1],
+            GENEID    = paste(sort(unique(GENEID)), collapse = ","),
+            txNames   = paste(sort(unique(TXNAME)), collapse = ","),
+            exonClass = ifelse(all(novelTranscript), "novel", "annotated"),
+            .groups   = "drop"
+        )
+
+    # build output GRanges with exon metadata
+    outRanges <- GRanges(
+        seqnames = groupMeta$seqnames,
+        ranges   = IRanges(start = groupMeta$start, end = groupMeta$end),
+        strand   = groupMeta$strand
+    )
+    names(outRanges) <- paste0("EX", seq_len(nrow(groupMeta)))
+    mcols(outRanges)$GENEID    <- groupMeta$GENEID
+    mcols(outRanges)$txNames   <- groupMeta$txNames
+    mcols(outRanges)$exonClass <- groupMeta$exonClass
+
+    list(
+        indexDf   = flatDf[, c("TXNAME", "group_idx")],
+        outRanges = outRanges
+    )
+}
+
+#' @param se a \code{SummarizedExperiment} object from \code{\link{bambu}}
+#' @return a list with:
+#'   \code{indexDf} — data.frame with columns TXNAME and group_idx
+#'     (one row per transcript);
+#'   \code{outRanges} — GRangesList with one element per gene, named by
+#'     GENEID, with mcols GENEID and newGeneClass.
+#' @noRd
+buildGeneIndex <- function(se) {
+    # one row per transcript; group_idx preserves first-appearance order of GENEIDs
+    rd <- as.data.frame(rowData(se))[, c("TXNAME", "GENEID", "txClassDescription"), drop = FALSE] %>%
+        mutate(group_idx = match(GENEID, unique(GENEID)))
+
+    # one row per gene with class label derived from txClassDescription
+    groupMeta <- rd %>%
+        group_by(group_idx) %>%
+        summarise(
+            GENEID       = GENEID[1],
+            newGeneClass = ifelse(grepl("ENSG", GENEID[1]), "annotation", unique(txClassDescription)[1]),
+            .groups      = "drop"
+        )
+
+    # build output GRangesList of reduced exon ranges per gene
+    outRanges <- reducedRangesByGenes(rowRanges(se))[groupMeta$GENEID]
+    mcols(outRanges)$GENEID       <- groupMeta$GENEID
+    mcols(outRanges)$newGeneClass <- groupMeta$newGeneClass
+
+    list(
+        indexDf   = rd[, c("TXNAME", "group_idx")],
+        outRanges = outRanges
+    )
+}
diff --git a/tests/testthat/test_summariseExpression.R b/tests/testthat/test_summariseExpression.R
new file mode 100644
index 00000000..e77b22c2
--- /dev/null
+++ b/tests/testthat/test_summariseExpression.R
@@ -0,0 +1,57 @@
+context("Summarise expression")
+
+test_that("summariseByExpression correctly reduces transcript counts to exon and gene counts", {
+    # TX1: exons at 1-100, 200-300 -> GENE1, count = 10
+    # TX2: exons at 1-100, 400-500 -> GENE1, shares first exon with TX1, count = 5
+    # TX3: exon at 600-700         -> GENE2, count = 8
+    #
+    # Expected exon counts:
+    #   chr1:1-100   (TX1 + TX2) -> 15
+    #   chr1:200-300 (TX1 only)  -> 10
+    #   chr1:400-500 (TX2 only)  ->  5
+    #   chr1:600-700 (TX3 only)  ->  8
+    #
+    # Expected gene counts:
+    #   GENE1 (TX1 + TX2) -> 15
+    #   GENE2 (TX3 only)  ->  8
+
+    txRanges <- GRangesList(
+        TX1 = GRanges("chr1", IRanges(c(1, 200), c(100, 300)), strand = "+"),
+        TX2 = GRanges("chr1", IRanges(c(1, 400), c(100, 500)), strand = "+"),
+        TX3 = GRanges("chr1", IRanges(600, 700), strand = "+")
+    )
+    mcols(txRanges)$TXNAME             <- c("TX1", "TX2", "TX3")
+    mcols(txRanges)$GENEID             <- c("GENE1", "GENE1", "GENE2")
+    mcols(txRanges)$novelTranscript    <- c(FALSE, FALSE, FALSE)
+    mcols(txRanges)$txClassDescription <- c("annotation", "annotation", "annotation")
+
+    counts <- matrix(c(10, 5, 8), nrow = 3, ncol = 1,
+        dimnames = list(c("TX1", "TX2", "TX3"), "sample1"))
+
+    se <- SummarizedExperiment(
+        assays    = list(counts = counts),
+        rowRanges = txRanges
+    )
+
+    # exon level
+    seExon <- summariseByExpression(se, type = "exon")
+    exonCounts <- as.matrix(assays(seExon)$counts)
+    exonRanges <- rowRanges(seExon)
+
+    shared  <- which(start(exonRanges) == 1   & end(exonRanges) == 100)
+    tx1only <- which(start(exonRanges) == 200 & end(exonRanges) == 300)
+    tx2only <- which(start(exonRanges) == 400 & end(exonRanges) == 500)
+    tx3only <- which(start(exonRanges) == 600 & end(exonRanges) == 700)
+
+    expect_equal(exonCounts[shared,  1], 15)
+    expect_equal(exonCounts[tx1only, 1], 10)
+    expect_equal(exonCounts[tx2only, 1],  5)
+    expect_equal(exonCounts[tx3only, 1],  8)
+
+    # gene level
+    seGene <- summariseByExpression(se, type = "gene")
+    geneCounts <- as.matrix(assays(seGene)$counts)
+
+    expect_equal(geneCounts["GENE1", 1], 15)
+    expect_equal(geneCounts["GENE2", 1],  8)
+})