Skip to content
This repository was archived by the owner on Jul 26, 2024. It is now read-only.

Commit 73f41bd

Browse files
committed
Removed unrequired use of mb_ functions
These were slowing down content parsing by at least 20x, Most were checks for non-empty content, Otherwise should not be affected by not being mulit-byte managed.
1 parent d1978c7 commit 73f41bd

3 files changed

Lines changed: 42 additions & 15 deletions

File tree

phpunit.xml.dist

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
backupStaticAttributes="false"
55
colors="true"
66
verbose="true"
7+
enforceTimeLimit="false"
8+
defaultTimeLimit="5"
79
convertErrorsToExceptions="true"
810
convertNoticesToExceptions="true"
911
convertWarningsToExceptions="true"

src/WordSplitter.php

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
2020
$isGrouping = false;
2121
$groupingUntil = -1;
2222

23-
for ($index = 0; $index < mb_strlen($text); $index++)
23+
$length = strlen($text);
24+
for ($index = 0; $index < $length; $index++)
2425
{
25-
$character = mb_substr($text, $index, 1);
26+
$character = substr($text, $index, 1);
2627

2728
// Don't bother executing block checks if we don't have any blocks to check for!
2829
if ($isBlockCheckRequired) {
@@ -51,28 +52,28 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
5152
switch ($mode) {
5253
case Mode::CHARACTER:
5354
if (Utils::isStartOfTag($character)) {
54-
if (mb_strlen($currentWord) !== 0) {
55+
if (strlen($currentWord) !== 0) {
5556
$words[] = $currentWord;
5657
}
5758
$currentWord = "<";
5859
$mode = Mode::TAG;
5960
} else if (Utils::isStartOfEntity($character)) {
60-
if (mb_strlen($currentWord) !== 0) {
61+
if (strlen($currentWord) !== 0) {
6162
$words[] = $currentWord;
6263
}
6364
$currentWord = $character;
6465
$mode = Mode::ENTITY;
6566
} else if (Utils::isWhiteSpace($character)) {
66-
if (mb_strlen($currentWord) !== 0) {
67+
if (strlen($currentWord) !== 0) {
6768
$words[] = $currentWord;
6869
}
6970
$currentWord = $character;
7071
$mode = Mode::WHITESPACE;
7172
} else if (Utils::isWord($character) &&
72-
(mb_strlen($currentWord) === 0) || Utils::isWord(substr($currentWord, -1))) {
73+
(strlen($currentWord) === 0) || Utils::isWord(substr($currentWord, -1))) {
7374
$currentWord .= $character;
7475
} else {
75-
if (mb_strlen($currentWord) !== 0) {
76+
if (strlen($currentWord) !== 0) {
7677
$words[] = $currentWord;
7778
}
7879
$currentWord = $character;
@@ -96,21 +97,21 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
9697

9798
if (Utils::isStartOfTag($character))
9899
{
99-
if (mb_strlen($currentWord) !== 0) {
100+
if (strlen($currentWord) !== 0) {
100101
$words[] = $currentWord;
101102
}
102103
$currentWord = $character;
103104
$mode = Mode::TAG;
104105
} else if (Utils::isStartOfEntity($character)) {
105-
if (mb_strlen($currentWord) !== 0) {
106+
if (strlen($currentWord) !== 0) {
106107
$words[] = $currentWord;
107108
}
108109
$currentWord = $character;
109110
$mode = Mode::ENTITY;
110111
} else if (Utils::isWhiteSpace($character)) {
111112
$currentWord .= $character;
112113
} else {
113-
if (mb_strlen($currentWord) !== 0) {
114+
if (strlen($currentWord) !== 0) {
114115
$words[] = $currentWord;
115116
}
116117
$currentWord = $character;
@@ -122,20 +123,20 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
122123

123124
if (Utils::isStartOfTag($character))
124125
{
125-
if (mb_strlen($currentWord) !== 0) {
126+
if (strlen($currentWord) !== 0) {
126127
$words[] = $currentWord;
127128
}
128129
$currentWord = $character;
129130
$mode = Mode::TAG;
130131
} else if (Utils::isWhiteSpace($character)) {
131-
if (mb_strlen($currentWord) !== 0) {
132+
if (strlen($currentWord) !== 0) {
132133
$words[] = $currentWord;
133134
}
134135
$currentWord = $character;
135136
$mode = Mode::WHITESPACE;
136137
} else if (Utils::isEndOfEntity($character)) {
137138
$switchToNextMode = true;
138-
if (mb_strlen($currentWord) !== 0) {
139+
if (strlen($currentWord) !== 0) {
139140
$currentWord .= $character;
140141
$words[] = $currentWord;
141142

@@ -158,7 +159,7 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
158159
} else if (Utils::isWord($character)) {
159160
$currentWord .= $character;
160161
} else {
161-
if (mb_strlen($currentWord) !== 0) {
162+
if (strlen($currentWord) !== 0) {
162163
$words[] = $currentWord;
163164
}
164165
$currentWord = $character;
@@ -168,7 +169,7 @@ public static function convertHtmlToListOfWords(string $text, array $blockExpres
168169
}
169170
}
170171

171-
if (mb_strlen($currentWord) !== 0) {
172+
if (strlen($currentWord) !== 0) {
172173
$words[] = $currentWord;
173174
}
174175

tests/HeavyContentTest.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?php namespace Ssddanbrown\HtmlDiff\Tests;
2+
3+
use PHPUnit\Framework\TestCase;
4+
use Ssddanbrown\HtmlDiff\Diff;
5+
6+
class HeavyContentTest extends TestCase
7+
{
8+
9+
public function test_large_attribute_content()
10+
{
11+
$start = time();
12+
$strToEncode = '';
13+
for ($i = 0; $i < 10000; $i++) {
14+
$strToEncode .= 'cattestingstring';
15+
}
16+
$a = '<p data-test="' . base64_encode($strToEncode) . '">contnent</p>';
17+
$b = '<p data-test="' . base64_encode($strToEncode) . 'cat">contnent2</p>';
18+
19+
$output = Diff::excecute($a, $b);
20+
$this->assertNotEmpty($output);
21+
$this->assertLessThan(3, time() - $start);
22+
}
23+
24+
}

0 commit comments

Comments
 (0)