@@ -32,6 +32,28 @@ interface PDFConfig {
3232
3333// Helper function to clean text for PDF rendering
3434const cleanTextForPDF = ( text : string ) : string => {
35+ // Strip invisible/control-ish unicode chars that commonly appear via copy/paste.
36+ // These aren't visible in the editor, but our previous implementation converted them
37+ // into "?" which *is* visible and looks like random corruption in PDFs.
38+ //
39+ // - U+00AD: soft hyphen
40+ // - U+200B..U+200F: zero-width space/joiners + direction marks
41+ // - U+202A..U+202E: bidi embedding/override marks
42+ // - U+2060..U+206F: word joiner + other format chars
43+ // - U+FEFF: byte order mark
44+ // - U+FFFD: replacement character
45+ const stripInvisibleChars = ( value : string ) : string => {
46+ return value
47+ . replace ( / \u00AD / g, '' )
48+ . replace ( / [ \u200B - \u200F ] / g, '' )
49+ . replace ( / [ \u202A - \u202E ] / g, '' )
50+ . replace ( / [ \u2060 - \u206F ] / g, '' )
51+ . replace ( / \uFEFF / g, '' )
52+ . replace ( / \uFFFD / g, '' ) ;
53+ } ;
54+
55+ const strippedText = stripInvisibleChars ( text ) ;
56+
3557 // First, handle specific problematic characters that cause font issues
3658 const replacements : { [ key : string ] : string } = {
3759 '\u2018' : "'" , // left single quotation mark
@@ -58,14 +80,14 @@ const cleanTextForPDF = (text: string): string => {
5880 } ;
5981
6082 // Replace known problematic characters
61- let cleanedText = text ;
83+ let cleanedText = strippedText ;
6284 for ( const [ unicode , replacement ] of Object . entries ( replacements ) ) {
6385 cleanedText = cleanedText . replace ( new RegExp ( unicode , 'g' ) , replacement ) ;
6486 }
6587
6688 // For any remaining non-ASCII characters, try to preserve them first
6789 // Only replace if they cause font rendering issues
68- return cleanedText . replace ( / [ ^ \x00 - \x7F ] / g, function ( char ) {
90+ return cleanedText . replace ( / [ ^ \x00 - \x7F ] / g, function ( char ) {
6991 // Common accented characters that should work fine in most PDF fonts
7092 const safeChars = / [ à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß ] / ;
7193
@@ -89,7 +111,10 @@ const cleanTextForPDF = (text: string): string => {
89111 'Ñ' : 'N' , 'Ç' : 'C' , 'Ý' : 'Y'
90112 } ;
91113
92- return fallbacks [ char ] || '?' ; // Use ? for unknown characters
114+ // Preserve unknown characters instead of coercing to "?".
115+ // If the active PDF font can't render a glyph, viewers may show a tofu box,
116+ // but that's still preferable to inserting random "?" where the editor shows nothing.
117+ return fallbacks [ char ] ?? char ;
93118 } ) ;
94119} ;
95120
0 commit comments