|
| 1 | +--- |
| 2 | +name: secure-microsoft-word-validation |
| 3 | +description: Generate secure microsoft word file validation code. Enforces secure generation of code validating a microsoft word file. Invoke when writing any microsoft word file validation related code. |
| 4 | +allowed-tools: Read Grep Glob |
| 5 | +metadata: |
| 6 | + category: security |
| 7 | +--- |
| 8 | + |
| 9 | +# Secure Microsoft Word File Validation Code Generation Rules |
| 10 | + |
| 11 | +Apply **all** rules below when generating or reviewing any code related to validation of a Microsoft Word file. |
| 12 | + |
| 13 | +## 1. Microsoft Word file validation (CRITICAL) |
| 14 | + |
| 15 | +- ALWAYS ensure that the file is a real Microsoft Word file. |
| 16 | +- ALWAYS ensure that the file use the standard named `Office Open XML`. |
| 17 | +- ALWAYS ensure that the file use the file type named `DOCX`. |
| 18 | +- ALWAYS ensure that the file has a single extension and is `docx`. |
| 19 | +- ALWAYS ensure that the file size does not exceed 5 megabytes before opening or parsing it. |
| 20 | +- ALWAYS ensure that the file has no Visual Basic for Application (VBA) macros. |
| 21 | +- ALWAYS ensure that the file has no Object Linking and Embedding (OLE) package. |
| 22 | + |
| 23 | +```java |
| 24 | +// BAD: No validation is applied |
| 25 | +import org.apache.poi.xwpf.usermodel.*; |
| 26 | +import java.io.*; |
| 27 | +import java.util.List; |
| 28 | + |
| 29 | +public class UnsafeReadWordFile { |
| 30 | + public static void main(String[] args) { |
| 31 | + String filePath = "document.docx"; |
| 32 | + try (FileInputStream fis = new FileInputStream(filePath); |
| 33 | + XWPFDocument document = new XWPFDocument(fis)) { |
| 34 | + // Read all paragraphs |
| 35 | + List<XWPFParagraph> paragraphs = document.getParagraphs(); |
| 36 | + for (XWPFParagraph paragraph : paragraphs) { |
| 37 | + System.out.println(paragraph.getText()); |
| 38 | + } |
| 39 | + } catch (IOException e) { |
| 40 | + e.printStackTrace(); |
| 41 | + } |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +// GOOD: All points are validated |
| 46 | +import org.apache.poi.openxml4j.opc.*; |
| 47 | +import org.apache.poi.poifs.filesystem.POIFSFileSystem; |
| 48 | +import org.apache.poi.xwpf.usermodel.*; |
| 49 | +import java.io.*; |
| 50 | + |
| 51 | +public class SafeWordFileReader { |
| 52 | + |
| 53 | + public static void main(String[] args) { |
| 54 | + try { |
| 55 | + File file = new File("document.docx"); |
| 56 | + |
| 57 | + // ── CHECK 1: Single extension and must be "docx" ────────────────── |
| 58 | + String name = file.getName(); |
| 59 | + int dotCount = name.length() - name.replace(".", "").length(); |
| 60 | + if (dotCount != 1) { |
| 61 | + throw new SecurityException( |
| 62 | + "File must have exactly one extension. Found: " + name); |
| 63 | + } |
| 64 | + if (!name.toLowerCase().endsWith(".docx")) { |
| 65 | + throw new SecurityException( |
| 66 | + "File extension must be '.docx'. Found: " + name); |
| 67 | + } |
| 68 | + |
| 69 | + // ── CHECK 2: File size must not exceed 5 MB ─────────────────────── |
| 70 | + long maxSizeBytes = 5L * 1024 * 1024; |
| 71 | + if (file.length() > maxSizeBytes) { |
| 72 | + throw new SecurityException( |
| 73 | + "File size exceeds the maximum allowed size of 5 MB. " + |
| 74 | + "Found: " + file.length() + " bytes."); |
| 75 | + } |
| 76 | + |
| 77 | + // ── CHECK 3: Office Open XML magic bytes (PK\x03\x04) ───────────── |
| 78 | + try (FileInputStream fis = new FileInputStream(file)) { |
| 79 | + byte[] header = new byte[4]; |
| 80 | + int bytesRead = fis.read(header); |
| 81 | + if (bytesRead < 4 |
| 82 | + || header[0] != 0x50 |
| 83 | + || header[1] != 0x4B |
| 84 | + || header[2] != 0x03 |
| 85 | + || header[3] != 0x04) { |
| 86 | + throw new SecurityException( |
| 87 | + "File is not a valid Office Open XML (OOXML/ZIP) file. " + |
| 88 | + "Magic bytes do not match PK\\x03\\x04."); |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + // ── CHECK 4: Real DOCX (contains word/document.xml) ─────────────── |
| 93 | + // ── CHECK 5: No VBA macros (no word/vbaProject.bin) ─────────────── |
| 94 | + // ── CHECK 6: No embedded OLE/ActiveX objects ────────────────────── |
| 95 | + String oleObjectUri = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; |
| 96 | + String activeXUri = "http://schemas.microsoft.com/office/2006/relationships/activeX"; |
| 97 | + |
| 98 | + try (OPCPackage pkg = OPCPackage.open(file)) { |
| 99 | + if (pkg.getPartsByName( |
| 100 | + java.util.regex.Pattern.compile("/word/document\\.xml")) |
| 101 | + .isEmpty()) { |
| 102 | + throw new SecurityException( |
| 103 | + "File does not contain 'word/document.xml'. " + |
| 104 | + "It is not a valid DOCX file."); |
| 105 | + } |
| 106 | + |
| 107 | + if (!pkg.getPartsByName( |
| 108 | + java.util.regex.Pattern.compile("(?i)/word/vbaProject\\.bin")) |
| 109 | + .isEmpty()) { |
| 110 | + throw new SecurityException( |
| 111 | + "File contains a VBA macro project (vbaProject.bin). " + |
| 112 | + "Macro-enabled documents (DOCM) are not allowed."); |
| 113 | + } |
| 114 | + |
| 115 | + for (PackagePart part : pkg.getParts()) { |
| 116 | + for (PackageRelationship rel : part.getRelationships()) { |
| 117 | + String relType = rel.getRelationshipType(); |
| 118 | + if (relType != null) { |
| 119 | + String lower = relType.toLowerCase(); |
| 120 | + if (lower.startsWith(oleObjectUri.toLowerCase()) |
| 121 | + || lower.startsWith(activeXUri.toLowerCase())) { |
| 122 | + throw new SecurityException( |
| 123 | + "File contains an embedded OLE object in part: " + |
| 124 | + part.getPartName() + ". OLE packages are not allowed."); |
| 125 | + } |
| 126 | + } |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + // ── CHECK 7: No raw OLE2 compound file (old .doc binary) ────────── |
| 132 | + try (FileInputStream fis = new FileInputStream(file)) { |
| 133 | + byte[] header = new byte[8]; |
| 134 | + if (fis.read(header) >= 4 && POIFSFileSystem.hasPOIFSHeader(header)) { |
| 135 | + throw new SecurityException( |
| 136 | + "File appears to be an OLE2 compound document (old .doc binary format). " + |
| 137 | + "Only OOXML DOCX files are accepted."); |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + // ── READ: Paragraphs and tables ──────────────────────────────────── |
| 142 | + try (FileInputStream fis = new FileInputStream(file); |
| 143 | + XWPFDocument document = new XWPFDocument(fis)) { |
| 144 | + |
| 145 | + System.out.println("=== Paragraphs ==="); |
| 146 | + for (XWPFParagraph para : document.getParagraphs()) { |
| 147 | + if (!para.getText().isBlank()) { |
| 148 | + System.out.println(para.getText()); |
| 149 | + } |
| 150 | + } |
| 151 | + |
| 152 | + System.out.println("\n=== Tables ==="); |
| 153 | + for (XWPFTable table : document.getTables()) { |
| 154 | + for (XWPFTableRow row : table.getRows()) { |
| 155 | + for (XWPFTableCell cell : row.getTableCells()) { |
| 156 | + System.out.print(cell.getText() + "\t"); |
| 157 | + } |
| 158 | + System.out.println(); |
| 159 | + } |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + } catch (SecurityException e) { |
| 164 | + System.err.println("Security validation failed: " + e.getMessage()); |
| 165 | + } catch (Exception e) { |
| 166 | + System.err.println("Error reading file: " + e.getMessage()); |
| 167 | + } |
| 168 | + } |
| 169 | +} |
| 170 | +``` |
| 171 | + |
| 172 | +## 2. Output Checklist |
| 173 | + |
| 174 | +Before finalizing generated code, verify: |
| 175 | + |
| 176 | +- [ ] The file is a real Microsoft Word file. |
| 177 | +- [ ] The file use the standard named `Office Open XML`. |
| 178 | +- [ ] The file use the file type named `DOCX`. |
| 179 | +- [ ] The file has a single extension and is `docx`. |
| 180 | +- [ ] The file size does not exceed 5 megabytes. |
| 181 | +- [ ] The file has no Visual Basic for Application (VBA) macros. |
| 182 | +- [ ] The file has no Object Linking and Embedding (OLE) package. |
| 183 | + |
| 184 | +## References |
| 185 | + |
| 186 | +- [ECMA-376 - Office Open XML file formats](https://ecma-international.org/publications-and-standards/standards/ecma-376/) |
| 187 | +- [Learn about file formats](https://support.microsoft.com/en-us/office/learn-about-file-formats-56dc3b55-7681-402e-a727-c59fa0884b30). |
| 188 | +- [Linked objects and embedded objects](https://support.microsoft.com/en-au/office/linked-objects-and-embedded-objects-0bf81db2-8aa3-4148-be4a-c8b6e55e0d7c). |
| 189 | +- [Open XML Formats and file name extensions](https://support.microsoft.com/en-us/office/open-xml-formats-and-file-name-extensions-5200d93c-3449-4380-8e11-31ef14555b18). |
| 190 | +- [OWASP File Upload Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html). |
| 191 | +- [MITRE ATT&CK T1059.005 - Visual Basic](https://attack.mitre.org/techniques/T1059/005/). |
| 192 | +- [MITRE ATT&CK T1566.001 - Spearphishing Attachment](https://attack.mitre.org/techniques/T1566/001/). |
| 193 | +- [Microsoft: Macros from the internet are blocked by default](https://learn.microsoft.com/en-us/deployoffice/security/internet-macros-blocked). |
0 commit comments