1
0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-09-22 09:10:42 +02:00

Added test to validate proper handling of user agent crawler

names that consist of multiple words.
This commit is contained in:
kkrugler_lists@transpac.com 2012-08-15 14:00:24 +00:00
parent bf3666d2b2
commit 78e4ae5e9e

View File

@ -532,6 +532,19 @@ public class SimpleRobotRulesParserTest {
assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
}
@Test
public void testMultiWordAgentName() throws MalformedURLException {
// When the user agent name has a space in it.
final String simpleRobotsTxt = "User-agent: Download Ninja" + CRLF
+ "Disallow: /index.html" + CRLF
+ "Allow: /";
BaseRobotRules rules = createRobotRules("Download Ninja", simpleRobotsTxt.getBytes());
assertFalse(rules.isAllowed("http://www.domain.com/index.html"));
assertTrue(rules.isAllowed("http://www.domain.com/anypage.html"));
}
@Test
public void testUnsupportedFields() throws MalformedURLException {
// When we have a new field type that we don't know about.