Skip to content

Commit 7162bff

Browse files
committed
[mlir][dxsa] Add unknown instruction
Example: dxsa.dcl_temps 1 dxsa.unknown <tokens = [0x030007FF, 0xDEADBEEF, 0x12345678]> dxsa.dcl_temps 2 Signed-off-by: Vladimir Shiryaev <tagolog@users.noreply.github.com>
1 parent 6b59659 commit 7162bff

8 files changed

Lines changed: 239 additions & 4 deletions

File tree

mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,28 @@ def DXSA_Instruction : DXSA_Op<"instruction"> {
493493
let assemblyFormat = "$mnemonic $operands attr-dict";
494494
}
495495

496+
def DXSA_Unknown : DXSA_Op<"unknown"> {
497+
let summary = "raw tokens fallback for an undecodable instruction";
498+
let description = [{
499+
The `dxsa.unknown` operation represents one instruction whose raw
500+
tokens could not be decoded into a structured op — unknown opcode,
501+
undecodable payload, length mismatch, truncated tail, etc. It acts
502+
as a disassembler-style fallback so the surrounding well-formed
503+
instructions still appear in IR.
504+
505+
Example:
506+
507+
```mlir
508+
dxsa.unknown <tokens = [0x030000FF, 0xDEADBEEF, 0x12345678]>
509+
```
510+
}];
511+
512+
let arguments = (ins DenseI32ArrayAttr:$tokens);
513+
let results = (outs);
514+
let hasVerifier = 1;
515+
let assemblyFormat = "custom<HexTokens>($tokens) attr-dict";
516+
}
517+
496518
def DXSA_InlineOperandType_Temp : I32EnumAttrCase<"temp", 0>;
497519
def DXSA_InlineOperandType_Input : I32EnumAttrCase<"input", 1>;
498520
def DXSA_InlineOperandType_Output : I32EnumAttrCase<"output", 2>;

mlir/lib/Dialect/DXSA/IR/DXSA.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "mlir/IR/OpImplementation.h"
1414
#include "llvm/ADT/StringExtras.h"
1515
#include "llvm/ADT/TypeSwitch.h"
16+
#include "llvm/Support/Format.h"
1617

1718
using namespace mlir;
1819
using namespace mlir::dxsa;
@@ -35,6 +36,12 @@ void DXSADialect::initialize() {
3536
>();
3637
}
3738

39+
/// Declarations for custom-directive helpers used by the
40+
/// TableGen-generated print/parse methods.
41+
static ParseResult parseHexTokens(OpAsmParser &parser, DenseI32ArrayAttr &attr);
42+
static void printHexTokens(OpAsmPrinter &printer, Operation *,
43+
DenseI32ArrayAttr attr);
44+
3845
//===----------------------------------------------------------------------===//
3946
// TableGen'd op method definitions
4047
//===----------------------------------------------------------------------===//
@@ -162,6 +169,50 @@ LogicalResult DclConstantBuffer::verify() {
162169
return success();
163170
}
164171

172+
//===----------------------------------------------------------------------===//
173+
// UnknownOp
174+
//===----------------------------------------------------------------------===//
175+
176+
LogicalResult Unknown::verify() {
177+
if (getTokens().empty())
178+
return emitOpError("tokens must not be empty");
179+
return success();
180+
}
181+
182+
/// Parse `<tokens = [0x..., ...]>` for the unknown op.
183+
static ParseResult parseHexTokens(OpAsmParser &parser,
184+
DenseI32ArrayAttr &attr) {
185+
SmallVector<int32_t> tokens;
186+
auto parseOneToken = [&]() -> ParseResult {
187+
uint32_t value;
188+
if (parser.parseInteger(value))
189+
return failure();
190+
tokens.push_back(static_cast<int32_t>(value));
191+
return success();
192+
};
193+
194+
if (parser.parseLess() || parser.parseKeyword("tokens") ||
195+
parser.parseEqual() ||
196+
parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Square,
197+
parseOneToken) ||
198+
parser.parseGreater())
199+
return failure();
200+
201+
attr = DenseI32ArrayAttr::get(parser.getContext(), tokens);
202+
return success();
203+
}
204+
205+
/// Print the tokens array as uppercase, 8-digit, 0x-prefixed hex.
206+
static void printHexTokens(OpAsmPrinter &printer, Operation *,
207+
DenseI32ArrayAttr attr) {
208+
printer << "<tokens = [";
209+
llvm::interleaveComma(attr.asArrayRef(), printer.getStream(), [&](int32_t t) {
210+
printer.getStream() << llvm::format_hex(static_cast<uint32_t>(t),
211+
/*Width=*/10, /*Upper=*/true);
212+
});
213+
printer << "]>";
214+
}
215+
165216
//===----------------------------------------------------------------------===//
166217
// TableGen'd attribute method definitions
167218
//===----------------------------------------------------------------------===//

mlir/lib/Target/DXSA/BinaryParser.cpp

Lines changed: 146 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "mlir/Target/DXSA/BinaryParser.h"
1010
#include "mlir/Dialect/DXSA/IR/DXSA.h"
1111
#include "mlir/IR/Builders.h"
12+
#include "mlir/IR/Diagnostics.h"
1213
#include "mlir/IR/Location.h"
1314
#include "llvm/ADT/ArrayRef.h"
1415
#include "llvm/ADT/SmallVector.h"
@@ -531,6 +532,43 @@ class DXBuilder {
531532
builder.getStringAttr(name));
532533
}
533534

535+
Instruction buildUnknown(ArrayRef<int32_t> tokens, Location loc) {
536+
return dxsa::Unknown::create(
537+
builder, loc, DenseI32ArrayAttr::get(builder.getContext(), tokens));
538+
}
539+
540+
/// RAII guard for speculative IR construction. On destruction, erases
541+
/// every op appended to the active insertion block after the guard
542+
/// was created, unless release() was called first.
543+
class RewindGuard {
544+
public:
545+
explicit RewindGuard(DXBuilder &dxBuilder)
546+
: builder(dxBuilder.builder),
547+
block(dxBuilder.builder.getInsertionBlock()),
548+
numOps(block->getOperations().size()) {}
549+
550+
~RewindGuard() {
551+
if (released)
552+
return;
553+
while (block->getOperations().size() > numOps)
554+
block->back().erase();
555+
builder.setInsertionPointToEnd(block);
556+
}
557+
558+
RewindGuard(const RewindGuard &) = delete;
559+
RewindGuard &operator=(const RewindGuard &) = delete;
560+
561+
/// Accept the speculative work: keep all ops added since
562+
/// construction and disarm the destructor.
563+
void release() { released = true; }
564+
565+
private:
566+
OpBuilder &builder;
567+
Block *block;
568+
size_t numOps;
569+
bool released = false;
570+
};
571+
534572
Instruction buildDclGlobalFlags(dxsa::GlobalFlags flags, Location loc) {
535573
auto flagsAttr = dxsa::GlobalFlagsAttr::get(builder.getContext(), flags);
536574
return dxsa::DclGlobalFlags::create(builder, loc, flagsAttr);
@@ -726,9 +764,11 @@ class Parser {
726764
using Instruction = DXBuilder::Instruction;
727765
using Module = DXBuilder::Module;
728766

767+
/// Width of the token in the program binary stream.
768+
static constexpr size_t tokenSize = sizeof(uint32_t);
769+
729770
/// Parse the current token and move the cursor to the next one.
730771
Token parseToken() {
731-
constexpr size_t tokenSize = sizeof(uint32_t);
732772
if ((currentTokenOffset + tokenSize) > buffer.size()) {
733773
emitError(getLocation(), "unexpected end of file");
734774
return failure();
@@ -1531,6 +1571,110 @@ class Parser {
15311571
loc);
15321572
}
15331573

1574+
struct InstructionSlice {
1575+
size_t beginOffset; // absolute byte offset in `buffer`
1576+
uint32_t numTokens; // number of tokens to consume (>= 1)
1577+
StringRef hint; // short reason, included in the warning
1578+
};
1579+
1580+
std::optional<InstructionSlice> sliceInstruction() {
1581+
auto beginOffset = currentTokenOffset;
1582+
auto remainingBytes = buffer.size() - beginOffset;
1583+
if (remainingBytes < tokenSize)
1584+
return std::nullopt;
1585+
auto token0 = support::endian::read<uint32_t>(buffer.begin() + beginOffset,
1586+
endianness::little);
1587+
auto opcode = DECODE_D3D10_SB_OPCODE_TYPE(token0);
1588+
// CUSTOMDATA: total token count lives in the second token.
1589+
// The minimum legal value is 2 (the header pair itself).
1590+
if (opcode == D3D10_SB_OPCODE_CUSTOMDATA) {
1591+
if (remainingBytes < 2 * tokenSize)
1592+
return InstructionSlice{beginOffset, 1, "truncated customdata header"};
1593+
auto token1 = support::endian::read<uint32_t>(
1594+
buffer.begin() + beginOffset + tokenSize, endianness::little);
1595+
if (token1 < 2)
1596+
return InstructionSlice{beginOffset, 2, "customdata length < 2"};
1597+
auto instructionSize = static_cast<uint64_t>(token1) * tokenSize;
1598+
if (instructionSize > remainingBytes)
1599+
return InstructionSlice{
1600+
beginOffset, static_cast<uint32_t>(remainingBytes / tokenSize),
1601+
"customdata length past EOF"};
1602+
return InstructionSlice{beginOffset, token1, "customdata block"};
1603+
}
1604+
1605+
// Regular instruction
1606+
auto length = DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(token0);
1607+
if (length == 0)
1608+
return InstructionSlice{beginOffset, 1,
1609+
"instruction length encoded as 0"};
1610+
auto instructionSize = static_cast<uint64_t>(length) * tokenSize;
1611+
if (instructionSize > remainingBytes)
1612+
return InstructionSlice{beginOffset,
1613+
static_cast<uint32_t>(remainingBytes / tokenSize),
1614+
"instruction length past EOF"};
1615+
return InstructionSlice{beginOffset, length, "structured decode failed"};
1616+
}
1617+
1618+
/// Read `numTokens` tokens from `beginOffset` and return them as
1619+
/// i32s in little-endian order. Caller must guarantee the range is
1620+
/// in-bounds.
1621+
SmallVector<int32_t> snapshotTokens(size_t beginOffset, uint32_t numTokens) {
1622+
SmallVector<int32_t> out(numTokens);
1623+
for (uint32_t i = 0; i < numTokens; ++i)
1624+
out[i] = static_cast<int32_t>(support::endian::read<uint32_t>(
1625+
buffer.begin() + beginOffset + i * tokenSize, endianness::little));
1626+
return out;
1627+
}
1628+
1629+
/// Run the structured parser over the slice.
1630+
/// On success the produced ops are kept.
1631+
/// On failure any partial IR is rewound by guard.
1632+
bool tryParseAsStructured(const InstructionSlice &slice) {
1633+
auto expectedEnd = slice.beginOffset + slice.numTokens * tokenSize;
1634+
DXBuilder::RewindGuard guard(builder);
1635+
ScopedDiagnosticHandler suppress(name.getContext(),
1636+
[](Diagnostic &) { return success(); });
1637+
if (succeeded(parseInstruction()) && currentTokenOffset == expectedEnd) {
1638+
guard.release();
1639+
return true;
1640+
}
1641+
return false;
1642+
}
1643+
1644+
/// Emit an unknown op for the slice.
1645+
LogicalResult parseAsUnknown(const InstructionSlice &slice) {
1646+
currentTokenOffset = slice.beginOffset + slice.numTokens * tokenSize;
1647+
auto loc = FileLineColLoc::get(name, 0, slice.beginOffset);
1648+
SmallVector<int32_t> rawTokens =
1649+
snapshotTokens(slice.beginOffset, slice.numTokens);
1650+
builder.buildUnknown(rawTokens, loc);
1651+
emitWarning(loc) << "could not decode instruction (" << slice.hint
1652+
<< "); emitted dxsa.unknown with " << slice.numTokens
1653+
<< " token(s)";
1654+
return success();
1655+
}
1656+
1657+
/// Try structured parse; on any failure (or partial consumption)
1658+
/// rewind any IR built during the attempt, advance the byte cursor
1659+
/// past the slice, and emit `dxsa.unknown` covering the same range.
1660+
LogicalResult parseInstructionOrUnknown() {
1661+
auto slice = sliceInstruction();
1662+
if (!slice) {
1663+
// Nothing to wrap in an op.
1664+
auto trailingBytes = buffer.size() - currentTokenOffset;
1665+
if (trailingBytes > 0)
1666+
emitWarning(getLocation(0))
1667+
<< "ignoring " << trailingBytes << " trailing byte(s)";
1668+
currentTokenOffset = buffer.size();
1669+
return success();
1670+
}
1671+
1672+
if (tryParseAsStructured(*slice))
1673+
return success();
1674+
1675+
return parseAsUnknown(*slice);
1676+
}
1677+
15341678
FailureOr<Module> parseModule() {
15351679
FileLineColLoc loc = getLocation(0);
15361680
auto header = parseProgramHeader();
@@ -1545,10 +1689,8 @@ class Parser {
15451689
}
15461690
auto module = builder.createModule(programType, shaderVersion, loc);
15471691
while (currentTokenOffset < buffer.size()) {
1548-
FailureOr<Instruction> inst = parseInstruction();
1549-
if (failed(inst)) {
1692+
if (failed(parseInstructionOrUnknown()))
15501693
return failure();
1551-
}
15521694
}
15531695
return module;
15541696
}
64 Bytes
Binary file not shown.
20 Bytes
Binary file not shown.

mlir/test/Target/DXSA/unknown.mlir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: mlir-translate --import-dxsa-bin %S/inputs/unknown.bin | FileCheck %s
2+
3+
// CHECK: dxsa.module {
4+
// CHECK-NEXT: dxsa.dcl_temps 1
5+
// CHECK-NEXT: dxsa.unknown <tokens = [0x030007FF, 0xDEADBEEF, 0x12345678]>
6+
// CHECK-NEXT: dxsa.dcl_temps 2
7+
// CHECK-NEXT: dxsa.unknown <tokens = [0x00000035, 0x00000004, 0x11111111, 0x22222222]>
8+
// CHECK-NEXT: dxsa.dcl_temps 3
9+
// CHECK-NEXT: dxsa.unknown <tokens = [0x03000068, 0x00000005, 0xCAFEBABE]>
10+
// CHECK-NEXT: }
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// RUN: mlir-opt %s -split-input-file -verify-diagnostics
2+
3+
// expected-error@+1 {{'dxsa.unknown' op tokens must not be empty}}
4+
dxsa.unknown <tokens = []>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// RUN: mlir-translate --import-dxsa-bin %S/inputs/unknown_past_eof.bin | FileCheck %s
2+
3+
// CHECK: dxsa.module {
4+
// CHECK-NEXT: dxsa.dcl_temps 1
5+
// CHECK-NEXT: dxsa.unknown <tokens = [0x050007FF, 0xAAAAAAAA, 0xBBBBBBBB]>
6+
// CHECK-NEXT: }

0 commit comments

Comments
 (0)