Skip to content

Commit a34cf16

Browse files
committed
Email: Add unit tests covering email validation and sanitization.
In preparation for later work to allow non-US-ASCII email addresses, this change extends the unit test suite for `is_email()` and adds new tests covering `antispambot()` and `sanitize_email()`. This work was done collaboratively during WordCamp Vienna, 2026 as a Contributor Challenge in cooperation with and support from ICANN and also GeoTLDs Universal Acceptance Local Initiative. Developed in: #11552 Discussed in: https://core.trac.wordpress.org/ticket/31992 Props agulbra, akirk, benniledl, dmsnell. See #31992. git-svn-id: https://develop.svn.wordpress.org/trunk@62225 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 8043320 commit a34cf16

File tree

3 files changed

+193
-12
lines changed

3 files changed

+193
-12
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?php
2+
/**
3+
* Tests for the antispambot() function.
4+
*
5+
* @group formatting
6+
* @covers ::antispambot
7+
*/
8+
class Tests_Formatting_Antispambot extends WP_UnitTestCase {
9+
/**
10+
* Ensures that antispambot will not produce invalid UTF-8 when hiding email addresses.
11+
*
12+
* Were a non-US-ASCII email address be sent into `antispambot()`, then a naive approach
13+
* to obfuscation could break apart multibyte characters and leave invalid UTF-8 as a
14+
* result.
15+
*
16+
* @ticket 31992
17+
*
18+
* @dataProvider data_returns_valid_utf8
19+
*
20+
* @param string $email The email address to obfuscate.
21+
*/
22+
public function test_returns_valid_utf8( $email ) {
23+
$this->assertTrue( wp_is_valid_utf8( antispambot( $email ) ) );
24+
}
25+
26+
/**
27+
* Data provider.
28+
*
29+
* return array[]
30+
*/
31+
public function data_returns_valid_utf8() {
32+
return array(
33+
'plain' => array( 'bob@example.com' ),
34+
'plain with ip' => array( 'ace@204.32.222.14' ),
35+
'deep subdomain' => array( 'kevin@many.subdomains.make.a.happy.man.edu' ),
36+
'short address' => array( 'a@b.co' ),
37+
'weird but legal dots' => array( '..@example.com' ),
38+
);
39+
}
40+
41+
/**
42+
* This tests that antispambot performs some sort of obfuscation
43+
* and that the obfuscation maps back to the original value.
44+
*
45+
* @ticket 31992
46+
*
47+
* @dataProvider data_antispambot_obfuscates
48+
*
49+
* @param string $provided The email address to obfuscate.
50+
*/
51+
public function test_antispambot_obfuscates( $provided ) {
52+
// The only token should be the email address, so advance once and treat as a text node.
53+
$obfuscated = antispambot( $provided );
54+
$p = new WP_HTML_Tag_Processor( $obfuscated );
55+
$p->next_token();
56+
$decoded = rawurldecode( $p->get_modifiable_text() );
57+
58+
$this->assertNotSame( $provided, $obfuscated, 'Should have produced an obfuscated representation.' );
59+
$this->assertSame( $provided, $decoded, 'Should have decoded to the original email after restoring.' );
60+
}
61+
62+
/**
63+
* Data provider.
64+
*
65+
* @return array[]
66+
*/
67+
public function data_antispambot_obfuscates() {
68+
return array(
69+
array( 'example@example.com' ),
70+
array( '#@example.com' ),
71+
);
72+
}
73+
}

tests/phpunit/tests/formatting/isEmail.php

Lines changed: 77 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,44 @@
11
<?php
2-
32
/**
3+
* Tests for the is_email() function.
4+
*
45
* @group formatting
56
*
67
* @covers ::is_email
78
*/
89
class Tests_Formatting_IsEmail extends WP_UnitTestCase {
9-
1010
/**
11-
* @dataProvider valid_email_provider
11+
* Ensures that valid emails are returned unchanged.
12+
*
13+
* @ticket 31992
14+
*
15+
* @dataProvider data_valid_email_provider
16+
*
17+
* @param string $email Valid email address.
1218
*/
1319
public function test_returns_the_email_address_if_it_is_valid( $email ) {
14-
$this->assertSame( $email, is_email( $email ), "is_email() should return the email address for $email." );
20+
$this->assertSame(
21+
$email,
22+
is_email( $email ),
23+
'Should return the given email address unchanged when valid.'
24+
);
1525
}
1626

1727
/**
18-
* Data provider for valid email addresses.
28+
* Data provider.
1929
*
20-
* @return array
30+
* @return Generator
2131
*/
22-
public static function valid_email_provider() {
32+
public static function data_valid_email_provider() {
2333
$valid_emails = array(
2434
'bob@example.com',
2535
'phil@example.info',
36+
'phil@TLA.example',
2637
'ace@204.32.222.14',
2738
'kevin@many.subdomains.make.a.happy.man.edu',
2839
'a@b.co',
2940
'bill+ted@example.com',
41+
'..@example.com',
3042
);
3143

3244
foreach ( $valid_emails as $email ) {
@@ -35,25 +47,78 @@ public static function valid_email_provider() {
3547
}
3648

3749
/**
38-
* @dataProvider invalid_email_provider
50+
* Ensures that unrecognized email addresses are rejected.
51+
*
52+
* @ticket 31992
53+
*
54+
* @dataProvider data_invalid_email_provider
55+
*
56+
* @param string $email Invalid or unrecognized-to-WordPress email address.
3957
*/
4058
public function test_returns_false_if_given_an_invalid_email_address( $email ) {
41-
$this->assertFalse( is_email( $email ), "is_email() should return false for $email." );
59+
$this->assertFalse(
60+
is_email( $email ),
61+
'Should have rejected the email as invalid.'
62+
);
4263
}
4364

4465
/**
45-
* Data provider for invalid email addresses.
66+
* Data provider.
4667
*
47-
* @return array
68+
* @return Generator
4869
*/
49-
public static function invalid_email_provider() {
70+
public static function data_invalid_email_provider() {
5071
$invalid_emails = array(
5172
'khaaaaaaaaaaaaaaan!',
5273
'http://bob.example.com/',
5374
"sif i'd give u it, spamer!1",
5475
'com.exampleNOSPAMbob',
5576
'bob@your mom',
5677
'a@b.c',
78+
'" "@b.c',
79+
'"@"@b.c',
80+
'a@route.org@b.c',
81+
'h(aj@couc.ou', // bad comment.
82+
'hi@',
83+
'hi@hi@couc.ou', // double @.
84+
85+
/*
86+
* The next address is not deliverable as described,
87+
* SMTP servers should strip the (ab), so it is very
88+
* likely a source of confusion or a typo.
89+
* Best rejected.
90+
*/
91+
'(ab)cd@couc.ou',
92+
93+
/*
94+
* The next address is not globally deliverable,
95+
* so it may work with PHPMailer and break with
96+
* mail sending services. Best not allow users
97+
* to paint themselves into that corner. This also
98+
* avoids security problems like those that were
99+
* used to probe the WordPress server's local
100+
* network.
101+
*/
102+
'toto@to',
103+
104+
/*
105+
* Several addresses are best rejected because
106+
* we don't want to allow sending to fe80::, 192.168
107+
* and other special addresses; that too might
108+
* be used to probe the WordPress server's local
109+
* network.
110+
*/
111+
'to@[2001:db8::1]',
112+
'to@[IPv6:2001:db8::1]',
113+
'to@[192.168.1.1]',
114+
115+
/*
116+
* Ill-formed UTF-8 byte sequences must be rejected.
117+
* A lone continuation byte (0x80) is not valid UTF-8
118+
* whether it appears in the local part or the domain.
119+
*/
120+
"a\x80b@example.com", // invalid UTF-8 in local part.
121+
"abc@\x80.org", // invalid UTF-8 in domain subdomain.
57122
);
58123

59124
foreach ( $invalid_emails as $email ) {
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php
2+
/**
3+
* Tests for the sanitize_email() function.
4+
*
5+
* @group formatting
6+
* @covers ::sanitize_email
7+
*/
8+
class Tests_Formatting_SanitizeEmail extends WP_UnitTestCase {
9+
/**
10+
* This test checks that email addresses are properly sanitized.
11+
*
12+
* @ticket 31992
13+
*
14+
* @dataProvider data_sanitized_email_pairs
15+
*
16+
* @param string $address The email address to sanitize.
17+
* @param string $expected The expected sanitized email address.
18+
*/
19+
public function test_returns_stripped_email_address( $address, $expected ) {
20+
$this->assertSame(
21+
$expected,
22+
sanitize_email( $address ),
23+
'Should have produced the known sanitized form of the email.'
24+
);
25+
}
26+
27+
/**
28+
* Data provider.
29+
*
30+
* @return array[]
31+
*/
32+
public function data_sanitized_email_pairs() {
33+
return array(
34+
'shorter than 6 characters' => array( 'a@b', '' ),
35+
'contains no @' => array( 'ab', '' ),
36+
'just a TLD' => array( 'abc@com', '' ),
37+
'plain' => array( 'abc@example.com', 'abc@example.com' ),
38+
'invalid utf8 in local' => array( "a\x80b@example.com", '' ),
39+
'invalid utf8 subdomain dropped' => array( "abc@sub.\x80.org", 'abc@sub.org' ),
40+
'all subdomains invalid utf8' => array( "abc@\x80.org", '' ),
41+
);
42+
}
43+
}

0 commit comments

Comments
 (0)