99#include " mlir/Target/DXSA/BinaryParser.h"
1010#include " mlir/Dialect/DXSA/IR/DXSA.h"
1111#include " mlir/IR/Builders.h"
12+ #include " mlir/IR/Diagnostics.h"
1213#include " mlir/IR/Location.h"
1314#include " llvm/ADT/ArrayRef.h"
1415#include " llvm/ADT/SmallVector.h"
@@ -531,6 +532,43 @@ class DXBuilder {
531532 builder.getStringAttr (name));
532533 }
533534
535+ Instruction buildUnknown (ArrayRef<int32_t > tokens, Location loc) {
536+ return dxsa::Unknown::create (
537+ builder, loc, DenseI32ArrayAttr::get (builder.getContext (), tokens));
538+ }
539+
540+ // / RAII guard for speculative IR construction. On destruction, erases
541+ // / every op appended to the active insertion block after the guard
542+ // / was created, unless release() was called first.
543+ class RewindGuard {
544+ public:
545+ explicit RewindGuard (DXBuilder &dxBuilder)
546+ : builder(dxBuilder.builder),
547+ block(dxBuilder.builder.getInsertionBlock()),
548+ numOps(block->getOperations ().size()) {}
549+
550+ ~RewindGuard () {
551+ if (released)
552+ return ;
553+ while (block->getOperations ().size () > numOps)
554+ block->back ().erase ();
555+ builder.setInsertionPointToEnd (block);
556+ }
557+
558+ RewindGuard (const RewindGuard &) = delete;
559+ RewindGuard &operator =(const RewindGuard &) = delete;
560+
561+ // / Accept the speculative work: keep all ops added since
562+ // / construction and disarm the destructor.
563+ void release () { released = true ; }
564+
565+ private:
566+ OpBuilder &builder;
567+ Block *block;
568+ size_t numOps;
569+ bool released = false ;
570+ };
571+
534572 Instruction buildDclGlobalFlags (dxsa::GlobalFlags flags, Location loc) {
535573 auto flagsAttr = dxsa::GlobalFlagsAttr::get (builder.getContext (), flags);
536574 return dxsa::DclGlobalFlags::create (builder, loc, flagsAttr);
@@ -726,9 +764,11 @@ class Parser {
726764 using Instruction = DXBuilder::Instruction;
727765 using Module = DXBuilder::Module;
728766
767+ // / Width of the token in the program binary stream.
768+ static constexpr size_t tokenSize = sizeof (uint32_t );
769+
729770 // / Parse the current token and move the cursor to the next one.
730771 Token parseToken () {
731- constexpr size_t tokenSize = sizeof (uint32_t );
732772 if ((currentTokenOffset + tokenSize) > buffer.size ()) {
733773 emitError (getLocation (), " unexpected end of file" );
734774 return failure ();
@@ -1531,6 +1571,110 @@ class Parser {
15311571 loc);
15321572 }
15331573
1574+ struct InstructionSlice {
1575+ size_t beginOffset; // absolute byte offset in `buffer`
1576+ uint32_t numTokens; // number of tokens to consume (>= 1)
1577+ StringRef hint; // short reason, included in the warning
1578+ };
1579+
1580+ std::optional<InstructionSlice> sliceInstruction () {
1581+ auto beginOffset = currentTokenOffset;
1582+ auto remainingBytes = buffer.size () - beginOffset;
1583+ if (remainingBytes < tokenSize)
1584+ return std::nullopt ;
1585+ auto token0 = support::endian::read<uint32_t >(buffer.begin () + beginOffset,
1586+ endianness::little);
1587+ auto opcode = DECODE_D3D10_SB_OPCODE_TYPE (token0);
1588+ // CUSTOMDATA: total token count lives in the second token.
1589+ // The minimum legal value is 2 (the header pair itself).
1590+ if (opcode == D3D10_SB_OPCODE_CUSTOMDATA) {
1591+ if (remainingBytes < 2 * tokenSize)
1592+ return InstructionSlice{beginOffset, 1 , " truncated customdata header" };
1593+ auto token1 = support::endian::read<uint32_t >(
1594+ buffer.begin () + beginOffset + tokenSize, endianness::little);
1595+ if (token1 < 2 )
1596+ return InstructionSlice{beginOffset, 2 , " customdata length < 2" };
1597+ auto instructionSize = static_cast <uint64_t >(token1) * tokenSize;
1598+ if (instructionSize > remainingBytes)
1599+ return InstructionSlice{
1600+ beginOffset, static_cast <uint32_t >(remainingBytes / tokenSize),
1601+ " customdata length past EOF" };
1602+ return InstructionSlice{beginOffset, token1, " customdata block" };
1603+ }
1604+
1605+ // Regular instruction
1606+ auto length = DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH (token0);
1607+ if (length == 0 )
1608+ return InstructionSlice{beginOffset, 1 ,
1609+ " instruction length encoded as 0" };
1610+ auto instructionSize = static_cast <uint64_t >(length) * tokenSize;
1611+ if (instructionSize > remainingBytes)
1612+ return InstructionSlice{beginOffset,
1613+ static_cast <uint32_t >(remainingBytes / tokenSize),
1614+ " instruction length past EOF" };
1615+ return InstructionSlice{beginOffset, length, " structured decode failed" };
1616+ }
1617+
1618+ // / Read `numTokens` tokens from `beginOffset` and return them as
1619+ // / i32s in little-endian order. Caller must guarantee the range is
1620+ // / in-bounds.
1621+ SmallVector<int32_t > snapshotTokens (size_t beginOffset, uint32_t numTokens) {
1622+ SmallVector<int32_t > out (numTokens);
1623+ for (uint32_t i = 0 ; i < numTokens; ++i)
1624+ out[i] = static_cast <int32_t >(support::endian::read<uint32_t >(
1625+ buffer.begin () + beginOffset + i * tokenSize, endianness::little));
1626+ return out;
1627+ }
1628+
1629+ // / Run the structured parser over the slice.
1630+ // / On success the produced ops are kept.
1631+ // / On failure any partial IR is rewound by guard.
1632+ bool tryParseAsStructured (const InstructionSlice &slice) {
1633+ auto expectedEnd = slice.beginOffset + slice.numTokens * tokenSize;
1634+ DXBuilder::RewindGuard guard (builder);
1635+ ScopedDiagnosticHandler suppress (name.getContext (),
1636+ [](Diagnostic &) { return success (); });
1637+ if (succeeded (parseInstruction ()) && currentTokenOffset == expectedEnd) {
1638+ guard.release ();
1639+ return true ;
1640+ }
1641+ return false ;
1642+ }
1643+
1644+ // / Emit an unknown op for the slice.
1645+ LogicalResult parseAsUnknown (const InstructionSlice &slice) {
1646+ currentTokenOffset = slice.beginOffset + slice.numTokens * tokenSize;
1647+ auto loc = FileLineColLoc::get (name, 0 , slice.beginOffset );
1648+ SmallVector<int32_t > rawTokens =
1649+ snapshotTokens (slice.beginOffset , slice.numTokens );
1650+ builder.buildUnknown (rawTokens, loc);
1651+ emitWarning (loc) << " could not decode instruction (" << slice.hint
1652+ << " ); emitted dxsa.unknown with " << slice.numTokens
1653+ << " token(s)" ;
1654+ return success ();
1655+ }
1656+
1657+ // / Try structured parse; on any failure (or partial consumption)
1658+ // / rewind any IR built during the attempt, advance the byte cursor
1659+ // / past the slice, and emit `dxsa.unknown` covering the same range.
1660+ LogicalResult parseInstructionOrUnknown () {
1661+ auto slice = sliceInstruction ();
1662+ if (!slice) {
1663+ // Nothing to wrap in an op.
1664+ auto trailingBytes = buffer.size () - currentTokenOffset;
1665+ if (trailingBytes > 0 )
1666+ emitWarning (getLocation (0 ))
1667+ << " ignoring " << trailingBytes << " trailing byte(s)" ;
1668+ currentTokenOffset = buffer.size ();
1669+ return success ();
1670+ }
1671+
1672+ if (tryParseAsStructured (*slice))
1673+ return success ();
1674+
1675+ return parseAsUnknown (*slice);
1676+ }
1677+
15341678 FailureOr<Module> parseModule () {
15351679 FileLineColLoc loc = getLocation (0 );
15361680 auto header = parseProgramHeader ();
@@ -1545,10 +1689,8 @@ class Parser {
15451689 }
15461690 auto module = builder.createModule (programType, shaderVersion, loc);
15471691 while (currentTokenOffset < buffer.size ()) {
1548- FailureOr<Instruction> inst = parseInstruction ();
1549- if (failed (inst)) {
1692+ if (failed (parseInstructionOrUnknown ()))
15501693 return failure ();
1551- }
15521694 }
15531695 return module ;
15541696 }
0 commit comments