Skip to content

Commit ea87136

Browse files
committed
Merge branch 'fix' of https://github.com/CleanTalk/wordpress-antispam into beta
2 parents 1e6083b + 91ba182 commit ea87136

2 files changed

Lines changed: 239 additions & 5 deletions

File tree

lib/Cleantalk/ApbctWP/ContactsEncoder/Shortcodes/EncodeContentSC.php

Lines changed: 96 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace Cleantalk\ApbctWP\ContactsEncoder\Shortcodes;
44

55
use Cleantalk\ApbctWP\ContactsEncoder\ContactsEncoder;
6+
use Cleantalk\ApbctWP\Escape;
67
use Cleantalk\ApbctWP\Variables\Cookie;
78
use Cleantalk\Common\ContactsEncoder\Dto\Params;
89
use Cleantalk\Common\ContactsEncoder\Exclusions\ExclusionsService;
@@ -102,24 +103,114 @@ public function changeContentBeforeEncoderModify($content)
102103
return $content;
103104
}
104105

106+
if ($this->isShortcodeInsideHtmlTag($content)) {
107+
return $content;
108+
}
109+
105110
// skip encoding if the content is already encoded with hook
106-
// Extract shortcode content to protect it from email encoding
107-
$shortcode_exist_pattern = sprintf('/\[%s\](.*?)\[\/%s\]/s', $this->public_name, $this->public_name);
111+
// Extract shortcode content to protect it from email encoding, supports sc attributes(!)
112+
$shortcode_exist_pattern = sprintf('/(\[%s(?:\s[^\]]*)?\])([\s\S]*?)(\[\/%s\])/s', $this->public_name, $this->public_name);
108113
$content = preg_replace_callback($shortcode_exist_pattern, function ($matches) {
109114
$placeholder = preg_replace('/EE\_\d+/', 'EE_' . (string)$this->shortcode_counter++, $this->exclusion_wrapper);
110115
if (is_null($placeholder)) {
111116
$placeholder = $this->exclusion_wrapper;
112117
}
113-
if (isset($matches[0])) {
114-
$this->shortcode_replacements[$placeholder] = $matches[0];
118+
if (isset($matches[1], $matches[2], $matches[3])) {
119+
$prefix = $matches[1];
120+
$entity = $matches[2];
121+
$suffix = $matches[3];
122+
$entity = Escape::escKsesPost($entity);
123+
$this->shortcode_replacements[$placeholder] = $prefix . $entity . $suffix;
115124
}
116125

117126
return $placeholder;
118127
}, $content);
119-
120128
return $content;
121129
}
122130

131+
/**
132+
* Checks whether any shortcode occurrence is located inside an HTML tag.
133+
*
134+
* This validation is used to prevent shortcode extraction from HTML
135+
* attribute contexts such as:
136+
*
137+
* <a title="[apbct_encode_data]...[/apbct_encode_data]">
138+
*
139+
* Processing shortcodes inside HTML tags may lead to malformed markup
140+
* after WordPress content filters (e.g. wptexturize()) mutate surrounding
141+
* content. Such mutations may potentially lead to attribute injection or
142+
* mutation-XSS issues.
143+
*
144+
* The method scans all opening and closing shortcode tags and verifies
145+
* whether their offsets are located between an unclosed "<" and ">" pair.
146+
*
147+
* @param string $content The content to validate.
148+
*
149+
* @return bool True if any shortcode boundary is detected inside an HTML tag,
150+
* false otherwise.
151+
*/
152+
protected function isShortcodeInsideHtmlTag($content)
153+
{
154+
preg_match_all(
155+
sprintf(
156+
'/\[\/?%s(?:\s[^\]]*)?\]/', //supports sc attributes(!)
157+
preg_quote($this->public_name, '/')
158+
),
159+
$content,
160+
$matches,
161+
PREG_OFFSET_CAPTURE
162+
);
163+
164+
if (isset($matches[0])) {
165+
foreach ($matches[0] as $match) {
166+
$offset = $match[1] ?? null;
167+
168+
if ($offset === null) {
169+
continue;
170+
}
171+
172+
if ($this->isOffsetInsideHtmlTag($content, $offset)) {
173+
return true;
174+
}
175+
}
176+
}
177+
178+
return false;
179+
}
180+
181+
182+
/**
183+
* Determines whether a given character offset is located inside an HTML tag.
184+
*
185+
* The method performs a lightweight context check by locating the nearest
186+
* "<" and ">" characters before the specified offset.
187+
*
188+
* If the last "<" appears after the last ">", the offset is considered
189+
* to be inside an HTML tag or attribute context.
190+
*
191+
* Example:
192+
*
193+
* <a href="value [OFFSET HERE]
194+
*
195+
* In this case the offset is inside the opening <a> tag.
196+
*
197+
* @param string $content The full content string.
198+
* @param int $offset Character offset to validate.
199+
*
200+
* @return bool True if the offset is located inside an HTML tag,
201+
* false otherwise.
202+
*/
203+
public function isOffsetInsideHtmlTag($content, $offset)
204+
{
205+
$before = substr($content, 0, $offset);
206+
207+
$last_open = strrpos($before, '<');
208+
$last_close = strrpos($before, '>');
209+
210+
return $last_open !== false &&
211+
($last_close === false || $last_open > $last_close);
212+
}
213+
123214
/**
124215
* Modifies the content after the encoder processes it.
125216
*

tests/ApbctWP/ContactsEncoder/TestContactsEncoderShortCodeEncode.php

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,147 @@ public function testChangeContentAfterEncoderModifyRestoresShortcodes()
6969

7070
$this->assertEquals('Test content', $result);
7171
}
72+
73+
public function testShortcodeInsideHtmlAttributeIsNotProcessed()
74+
{
75+
$content = '<a title="[apbct_encode_data]test[/apbct_encode_data]">X</a>';
76+
77+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
78+
79+
// shortcode should NOT be replaced because it's inside HTML tag
80+
$this->assertEquals($content, $result);
81+
}
82+
83+
public function testShortcodeOutsideHtmlIsProcessed()
84+
{
85+
$content = '[apbct_encode_data]Test content[/apbct_encode_data]';
86+
87+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
88+
89+
$this->assertStringContainsString(
90+
'%%APBCT_SHORT_CODE_INCLUDE_EE_0%%',
91+
$result
92+
);
93+
94+
$this->assertNotEquals($content, $result);
95+
}
96+
97+
public function testMultipleShortcodesAreHandled()
98+
{
99+
$content =
100+
'[apbct_encode_data]A[/apbct_encode_data]' .
101+
' middle ' .
102+
'[apbct_encode_data]B[/apbct_encode_data]';
103+
104+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
105+
106+
$this->assertStringContainsString('%%APBCT_SHORT_CODE_INCLUDE_EE_0%%', $result);
107+
$this->assertStringContainsString('%%APBCT_SHORT_CODE_INCLUDE_EE_1%%', $result);
108+
}
109+
110+
public function testHtmlAttributeBreakPayloadDoesNotExplode()
111+
{
112+
$content = '<a href="http://x" title="[/apbct_encode_data]">Test</a>';
113+
114+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
115+
116+
// must remain stable, no corruption, no placeholder injection inside tag
117+
$this->assertStringContainsString('<a', $result);
118+
$this->assertStringContainsString('</a>', $result);
119+
}
120+
121+
public function testOffsetDetectionInsideHtmlTag()
122+
{
123+
$content = '<a title="[apbct_encode_data]">X</a>';
124+
125+
$pos = strpos($content, '[apbct_encode_data]');
126+
127+
$this->assertTrue(
128+
$this->shortcode->isOffsetInsideHtmlTag($content, $pos)
129+
);
130+
}
131+
132+
public function testOffsetDetectionOutsideHtmlTag()
133+
{
134+
$content = '[apbct_encode_data]test[/apbct_encode_data]';
135+
136+
$pos = strpos($content, '[apbct_encode_data]');
137+
138+
$this->assertFalse(
139+
$this->shortcode->isOffsetInsideHtmlTag($content, $pos)
140+
);
141+
}
142+
143+
public function testShortcodeWithAttributesIsProcessed()
144+
{
145+
$content = '[apbct_encode_data mode="blur"]Test[/apbct_encode_data]';
146+
147+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
148+
149+
$this->assertStringContainsString(
150+
'%%APBCT_SHORT_CODE_INCLUDE_EE_0%%',
151+
$result
152+
);
153+
154+
$this->assertNotEquals($content, $result);
155+
}
156+
157+
public function testShortcodeWithAttributesIsDetectedInHtmlContext()
158+
{
159+
$content = '<a title="[apbct_encode_data mode=\"blur\"]test[/apbct_encode_data]">X</a>';
160+
161+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
162+
163+
// must be blocked due to HTML attribute context
164+
$this->assertEquals($content, $result);
165+
}
166+
167+
public function testMixedShortcodesSafeAndUnsafe()
168+
{
169+
$content =
170+
'[apbct_encode_data]SAFE[/apbct_encode_data]' .
171+
'<a title="[apbct_encode_data]BAD[/apbct_encode_data]">' .
172+
'X</a>';
173+
174+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
175+
176+
// because of current design: full block is skipped if ANY HTML-unsafe shortcode exists
177+
$this->assertEquals($content, $result);
178+
}
179+
180+
public function testPlaceholderNeverAppearsInsideHtmlAttribute()
181+
{
182+
$content = '<a title="[apbct_encode_data]Test[/apbct_encode_data]">X</a>';
183+
184+
$result = $this->shortcode->changeContentBeforeEncoderModify($content);
185+
186+
$this->assertStringNotContainsString('%%APBCT_SHORT_CODE_INCLUDE_EE_0%%', $result);
187+
}
188+
189+
public function testCallbackEscapesReplacingText()
190+
{
191+
$result = $this->shortcode->callback(
192+
['replacing_text' => '<script>alert(1)</script>'],
193+
'content',
194+
'apbct_encode_data'
195+
);
196+
197+
$this->assertStringNotContainsString('<script>', $result);
198+
}
199+
200+
public function testRestoreIntegrityWithMultiplePlaceholders()
201+
{
202+
$this->shortcode->shortcode_replacements = [
203+
'%%APBCT_SHORT_CODE_INCLUDE_EE_0%%' => '[apbct_encode_data]A[/apbct_encode_data]',
204+
'%%APBCT_SHORT_CODE_INCLUDE_EE_1%%' => '[apbct_encode_data]B[/apbct_encode_data]',
205+
];
206+
207+
$content = '%%APBCT_SHORT_CODE_INCLUDE_EE_0%% and %%APBCT_SHORT_CODE_INCLUDE_EE_1%%';
208+
209+
$result = $this->shortcode->changeContentAfterEncoderModify($content);
210+
211+
$this->assertStringContainsString('A', $result);
212+
$this->assertStringContainsString('B', $result);
213+
}
214+
72215
}

0 commit comments

Comments
 (0)