-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathtest_tokenize_urls.rb
More file actions
62 lines (49 loc) · 2.03 KB
/
test_tokenize_urls.rb
File metadata and controls
62 lines (49 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# coding: utf-8
require 'minitest/autorun'
require 'minitest/spec'
require 'tokenizer'
class TestTokenizerUrls < Minitest::Test
def setup
@t = Tokenizer::WhitespaceTokenizer.new(:de)
end
def test_url_tokenization_001
assert_equal(@t.tokenize('test url www.google.com.'),
['test','url','www.google.com','.'])
end
def test_url_tokenization_002
assert_equal(@t.tokenize('test url www.google.com.au.'),
['test','url','www.google.com.au','.'])
end
def test_url_tokenization_003
assert_equal(@t.tokenize('test url http://www.google.com.au.'),
['test','url','http://www.google.com.au','.'])
end
def test_url_tokenization_004
assert_equal(@t.tokenize('test url https://www.google.com.au.'),
['test','url','https://www.google.com.au','.'])
end
def test_url_tokenization_005
assert_equal(@t.tokenize('test url ftp://www.google.com.au.'),
['test','url','ftp://www.google.com.au','.'])
end
def test_url_tokenization_006
assert_equal(@t.tokenize('test url Google.com.'),
['test','url','Google.com','.'])
end
def test_url_tokenization_007
assert_equal(@t.tokenize('test url Au.ac.'),
['test','url','Au.ac','.'])
end
def test_url_tokenization_008
assert_equal(@t.tokenize('test url google.com. Another sentence.'),
['test','url','google.com','.','Another','sentence','.'])
end
def test_url_tokenization_009
assert_equal(@t.tokenize('test url www.culture.gov.uk/heritage/search_frame.asp?name=/heritage/lib1.html another word.'),
['test','url','www.culture.gov.uk/heritage/search_frame.asp?name=/heritage/lib1.html','another','word','.'])
end
def test_url_tokenization_010
assert_equal(@t.tokenize('test url www.culture.gov.uk/heritage/search_frame.asp?name=/heritage/lib1.html. Another sentence.'),
['test','url','www.culture.gov.uk/heritage/search_frame.asp?name=/heritage/lib1.html','.','Another','sentence','.'])
end
end