1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-05-20 02:26:03 +02:00

Upgrade to JDK 1.8

This commit is contained in:
Lewis John McGibbney 2016-09-29 21:39:24 -07:00
parent fc3378cb95
commit 18bbae908c
9 changed files with 75 additions and 40 deletions

View File

@ -1,7 +1,9 @@
language: java language: java
jdk:
- oraclejdk8
script: script:
- jdk_switcher use oraclejdk8
- mvn install javadoc:aggregate - mvn install javadoc:aggregate
notifications: notifications:

View File

@ -108,9 +108,11 @@ public class EffectiveTldFinder {
} }
/** /**
* @param hostname the hostname for which to find the * @param hostname
* {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD} * the hostname for which to find the
* @return the {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD} * {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD}
* @return the
* {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD}
*/ */
public static EffectiveTLD getEffectiveTLD(String hostname) { public static EffectiveTLD getEffectiveTLD(String hostname) {
if (getInstance().domains.containsKey(hostname)) { if (getInstance().domains.containsKey(hostname)) {
@ -145,7 +147,8 @@ public class EffectiveTldFinder {
* This method uses the effective TLD to determine which component of a FQDN * This method uses the effective TLD to determine which component of a FQDN
* is the NIC-assigned domain name. * is the NIC-assigned domain name.
* *
* @param hostname a string for which to obtain a NIC-assigned domain name * @param hostname
* a string for which to obtain a NIC-assigned domain name
* @return the NIC-assigned domain name * @return the NIC-assigned domain name
*/ */
public static String getAssignedDomain(String hostname) { public static String getAssignedDomain(String hostname) {

View File

@ -54,9 +54,12 @@ public class UserAgent implements Serializable {
/** /**
* Set user agent characteristics * Set user agent characteristics
* *
* @param agentName an agent name string to associate with the crawler * @param agentName
* @param emailAddress an agent email address string to associate with the crawler * an agent name string to associate with the crawler
* @param webAddress a Web address string to associate with the crawler * @param emailAddress
* an agent email address string to associate with the crawler
* @param webAddress
* a Web address string to associate with the crawler
*/ */
public UserAgent(String agentName, String emailAddress, String webAddress) { public UserAgent(String agentName, String emailAddress, String webAddress) {
this(agentName, emailAddress, webAddress, DEFAULT_BROWSER_VERSION); this(agentName, emailAddress, webAddress, DEFAULT_BROWSER_VERSION);
@ -65,10 +68,14 @@ public class UserAgent implements Serializable {
/** /**
* Set user agent characteristics * Set user agent characteristics
* *
* @param agentName an agent name string to associate with the crawler * @param agentName
* @param emailAddress an agent email address string to associate with the crawler * an agent name string to associate with the crawler
* @param webAddress a Web address string to associate with the crawler * @param emailAddress
* @param browserVersion a browser version to mimic * an agent email address string to associate with the crawler
* @param webAddress
* a Web address string to associate with the crawler
* @param browserVersion
* a browser version to mimic
*/ */
public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion) { public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion) {
this(agentName, emailAddress, webAddress, browserVersion, DEFAULT_CRAWLER_VERSION); this(agentName, emailAddress, webAddress, browserVersion, DEFAULT_CRAWLER_VERSION);
@ -77,11 +84,16 @@ public class UserAgent implements Serializable {
/** /**
* Set user agent characteristics * Set user agent characteristics
* *
* @param agentName an agent name string to associate with the crawler * @param agentName
* @param emailAddress an agent email address string to associate with the crawler * an agent name string to associate with the crawler
* @param webAddress a Web address string to associate with the crawler * @param emailAddress
* @param browserVersion a browser version to mimic * an agent email address string to associate with the crawler
* @param crawlerVersion the version of your crawler/crawl agent * @param webAddress
* a Web address string to associate with the crawler
* @param browserVersion
* a browser version to mimic
* @param crawlerVersion
* the version of your crawler/crawl agent
*/ */
public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion, String crawlerVersion) { public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion, String crawlerVersion) {
this.agentName = agentName; this.agentName = agentName;
@ -106,7 +118,8 @@ public class UserAgent implements Serializable {
* @return User Agent String * @return User Agent String
*/ */
public String getUserAgentString() { public String getUserAgentString() {
// Mozilla/5.0 (compatible; mycrawler/1.0; +http://www.mydomain.com; mycrawler@mydomain.com) // Mozilla/5.0 (compatible; mycrawler/1.0; +http://www.mydomain.com;
// mycrawler@mydomain.com)
return String.format(Locale.getDefault(), "%s (compatible; %s%s; +%s; %s)", browserVersion, getAgentName(), crawlerConfiguration, webAddress, emailAddress); return String.format(Locale.getDefault(), "%s (compatible; %s%s; +%s; %s)", browserVersion, getAgentName(), crawlerConfiguration, webAddress, emailAddress);
} }
} }

View File

@ -21,7 +21,9 @@ public abstract class URLFilter {
/** /**
* Returns a modified version of the input URL or null if the URL should be * Returns a modified version of the input URL or null if the URL should be
* removed * removed
* @param urlString a URL string to check against filter(s) *
* @param urlString
* a URL string to check against filter(s)
* @return a filtered URL * @return a filtered URL
**/ **/
public abstract String filter(String urlString); public abstract String filter(String urlString);

View File

@ -22,11 +22,11 @@ import java.io.Serializable;
public abstract class BaseRobotsParser implements Serializable { public abstract class BaseRobotsParser implements Serializable {
/** /**
* Parse the robots.txt file in <i>content</i>, and return rules appropriate for * Parse the robots.txt file in <i>content</i>, and return rules appropriate
* processing paths by <i>userAgent</i>. Note that multiple agent names may be * for processing paths by <i>userAgent</i>. Note that multiple agent names
* provided as comma-separated values; the order of these shouldn't matter, * may be provided as comma-separated values; the order of these shouldn't
* as the file is parsed in order, and each agent name found in the file * matter, as the file is parsed in order, and each agent name found in the
* will be compared to every agent name found in robotNames. * file will be compared to every agent name found in robotNames.
* *
* Also note that names are lower-cased before comparison, and that any * Also note that names are lower-cased before comparison, and that any
* robot name you pass shouldn't contain commas or spaces; if the name has * robot name you pass shouldn't contain commas or spaces; if the name has

View File

@ -86,7 +86,8 @@ public class RobotUtils {
* @param fetcher * @param fetcher
* Fetcher for downloading robots.txt file * Fetcher for downloading robots.txt file
* @param parser * @param parser
* a {@link crawlercommons.robots.BaseRobotsParser} to use for obtaining appropriate rules * a {@link crawlercommons.robots.BaseRobotsParser} to use for
* obtaining appropriate rules
* @param robotsUrl * @param robotsUrl
* URL to robots.txt file * URL to robots.txt file
* @return Robot rules * @return Robot rules

View File

@ -65,7 +65,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/* /*
* (non-Javadoc) * (non-Javadoc)
* *
* @see java.lang.Object#hashCode() * @see java.lang.Object#hashCode()
*/ */
@Override @Override
@ -79,7 +79,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/* /*
* (non-Javadoc) * (non-Javadoc)
* *
* @see java.lang.Object#equals(java.lang.Object) * @see java.lang.Object#equals(java.lang.Object)
*/ */
@Override @Override
@ -275,7 +275,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/** /**
* Is our ruleset set up to allow all access? * Is our ruleset set up to allow all access?
* *
* @return true if all URLs are allowed. * @return true if all URLs are allowed.
*/ */
@Override @Override
@ -285,7 +285,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/** /**
* Is our ruleset set up to disallow all access? * Is our ruleset set up to disallow all access?
* *
* @return true if no URLs are allowed. * @return true if no URLs are allowed.
*/ */
@Override @Override

View File

@ -104,7 +104,8 @@ public class SiteMapURL {
/** /**
* Set the URL. * Set the URL.
* *
* @param url of the sitemap * @param url
* of the sitemap
*/ */
public void setUrl(URL url) { public void setUrl(URL url) {
this.url = url; this.url = url;
@ -138,7 +139,8 @@ public class SiteMapURL {
/** /**
* Set when this URL was last modified. * Set when this URL was last modified.
* *
* @param lastModified the last time the sitemap was modified * @param lastModified
* the last time the sitemap was modified
*/ */
public void setLastModified(String lastModified) { public void setLastModified(String lastModified) {
this.lastModified = SiteMap.convertToDate(lastModified); this.lastModified = SiteMap.convertToDate(lastModified);
@ -147,7 +149,8 @@ public class SiteMapURL {
/** /**
* Set when this URL was last modified. * Set when this URL was last modified.
* *
* @param lastModified the last time the sitemap was modified * @param lastModified
* the last time the sitemap was modified
*/ */
public void setLastModified(Date lastModified) { public void setLastModified(Date lastModified) {
this.lastModified = lastModified; this.lastModified = lastModified;
@ -166,7 +169,8 @@ public class SiteMapURL {
* Set the URL's priority to a value between [0.0 - 1.0] (Default Priority * Set the URL's priority to a value between [0.0 - 1.0] (Default Priority
* is used if the given priority is out of range). * is used if the given priority is out of range).
* *
* @param priority a value between [0.0 - 1.0] * @param priority
* a value between [0.0 - 1.0]
*/ */
public void setPriority(double priority) { public void setPriority(double priority) {
@ -183,7 +187,8 @@ public class SiteMapURL {
* Set the URL's priority to a value between [0.0 - 1.0] (Default Priority * Set the URL's priority to a value between [0.0 - 1.0] (Default Priority
* is used if the given priority missing or is out of range). * is used if the given priority missing or is out of range).
* *
* @param priorityStr a value between [0.0 - 1.0] * @param priorityStr
* a value between [0.0 - 1.0]
*/ */
public void setPriority(String priorityStr) { public void setPriority(String priorityStr) {
try { try {
@ -211,8 +216,9 @@ public class SiteMapURL {
/** /**
* Set the URL's change frequency * Set the URL's change frequency
* *
* @param changeFreq a {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} * @param changeFreq
* for this sitemap * a {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency}
* for this sitemap
*/ */
public void setChangeFrequency(ChangeFrequency changeFreq) { public void setChangeFrequency(ChangeFrequency changeFreq) {
this.changeFreq = changeFreq; this.changeFreq = changeFreq;
@ -222,8 +228,10 @@ public class SiteMapURL {
* Set the URL's change frequency In case of a bad ChangeFrequency, the * Set the URL's change frequency In case of a bad ChangeFrequency, the
* current frequency in this instance will be set to NULL * current frequency in this instance will be set to NULL
* *
* @param changeFreq a string representing a * @param changeFreq
* {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} for this sitemap * a string representing a
* {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} for
* this sitemap
*/ */
public void setChangeFrequency(String changeFreq) { public void setChangeFrequency(String changeFreq) {
@ -253,7 +261,9 @@ public class SiteMapURL {
/** /**
* Valid means that it follows the official guidelines that the siteMapURL * Valid means that it follows the official guidelines that the siteMapURL
* must be under the base url * must be under the base url
* @param valid whether the Sitemap is valid syntax or not *
* @param valid
* whether the Sitemap is valid syntax or not
*/ */
public void setValid(boolean valid) { public void setValid(boolean valid) {
this.valid = valid; this.valid = valid;
@ -261,6 +271,7 @@ public class SiteMapURL {
/** /**
* Is the siteMapURL under the base url ? * Is the siteMapURL under the base url ?
*
* @return true if the syntax is valid, false otherwise * @return true if the syntax is valid, false otherwise
*/ */
public boolean isValid() { public boolean isValid() {

View File

@ -30,7 +30,9 @@ public class UnknownFormatException extends Exception {
/** /**
* Constructor receives some kind of message that is saved in an instance * Constructor receives some kind of message that is saved in an instance
* variable. * variable.
* @param err a String object to use within the Execption *
* @param err
* a String object to use within the Execption
*/ */
public UnknownFormatException(String err) { public UnknownFormatException(String err) {
super(err); super(err);
@ -40,6 +42,7 @@ public class UnknownFormatException extends Exception {
/** /**
* public method, callable by exception catcher. It returns the error * public method, callable by exception catcher. It returns the error
* message. * message.
*
* @return a populated Exception as a String * @return a populated Exception as a String
*/ */
public String getError() { public String getError() {