mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-09-20 08:42:15 +02:00
Make RobotRules accessible #134
- Makes SimpleRobotRulesParser._rules property protected and adds getters for SimpleRobotRulesParser._rules and RobotRules's properties - Changes SimpleRobotRulesParser return type from BaseRobotRules to SimpleRobotRules to allow access to concrete class without nasty type casts while still obeying super class contract
This commit is contained in:
parent
e25309d26c
commit
7bef14d386
@ -13,6 +13,7 @@ Current Development 0.10-SNAPSHOT (yyyy-mm-dd)
|
||||
- Handle new suffixes in PaidLevelDomain (kkrugler) #183
|
||||
- Remove Tika dependency (kkrugler) #199
|
||||
- Improve MIME detection for sitemaps (sebastian-nagel) #200
|
||||
- Make RobotRules accessible #134
|
||||
|
||||
Release 0.9 (2017-10-27)
|
||||
- [Sitemaps] Removed DOM-based sitemap parser (jnioche) #177
|
||||
|
@ -21,6 +21,7 @@ import java.net.URL;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Result from parsing a single robots.txt file - which means we get a set of
|
||||
@ -38,6 +39,7 @@ public class SimpleRobotRules extends BaseRobotRules {
|
||||
* Single rule that maps from a path prefix to an allow flag.
|
||||
*/
|
||||
public static class RobotRule implements Comparable<RobotRule>, Serializable {
|
||||
|
||||
String _prefix;
|
||||
boolean _allow;
|
||||
|
||||
@ -46,6 +48,14 @@ public class SimpleRobotRules extends BaseRobotRules {
|
||||
_allow = allow;
|
||||
}
|
||||
|
||||
public boolean isAllow() {
|
||||
return this._allow;
|
||||
}
|
||||
|
||||
public String getPrefix() {
|
||||
return this._prefix;
|
||||
}
|
||||
|
||||
// Sort from longest to shortest rules.
|
||||
@Override
|
||||
public int compareTo(RobotRule o) {
|
||||
@ -103,8 +113,8 @@ public class SimpleRobotRules extends BaseRobotRules {
|
||||
|
||||
}
|
||||
|
||||
private ArrayList<RobotRule> _rules;
|
||||
private RobotRulesMode _mode;
|
||||
protected ArrayList<RobotRule> _rules;
|
||||
protected RobotRulesMode _mode;
|
||||
|
||||
public SimpleRobotRules() {
|
||||
this(RobotRulesMode.ALLOW_SOME);
|
||||
@ -131,6 +141,10 @@ public class SimpleRobotRules extends BaseRobotRules {
|
||||
_rules.add(new RobotRule(prefix, allow));
|
||||
}
|
||||
|
||||
public List<RobotRule> getRobotRules() {
|
||||
return this._rules;
|
||||
}
|
||||
|
||||
public boolean isAllowed(String url) {
|
||||
if (_mode == RobotRulesMode.ALLOW_NONE) {
|
||||
return false;
|
||||
|
@ -351,7 +351,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
|
||||
private int _numWarnings;
|
||||
|
||||
@Override
|
||||
public BaseRobotRules failedFetch(int httpStatusCode) {
|
||||
public SimpleRobotRules failedFetch(int httpStatusCode) {
|
||||
SimpleRobotRules result;
|
||||
|
||||
if ((httpStatusCode >= 200) && (httpStatusCode < 300)) {
|
||||
@ -386,7 +386,7 @@ public class SimpleRobotRulesParser extends BaseRobotsParser {
|
||||
* byte[], java.lang.String, java.lang.String)
|
||||
*/
|
||||
@Override
|
||||
public BaseRobotRules parseContent(String url, byte[] content, String contentType, String robotNames) {
|
||||
public SimpleRobotRules parseContent(String url, byte[] content, String contentType, String robotNames) {
|
||||
_numWarnings = 0;
|
||||
|
||||
// If there's nothing there, treat it like we have no restrictions.
|
||||
|
Loading…
Reference in New Issue
Block a user