1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-05-08 15:36:04 +02:00

Upgrade to JDK 1.8

This commit is contained in:
Lewis John McGibbney 2016-09-29 21:39:24 -07:00
parent fc3378cb95
commit 18bbae908c
9 changed files with 75 additions and 40 deletions

View File

@ -1,7 +1,9 @@
language: java
jdk:
- oraclejdk8
script:
- jdk_switcher use oraclejdk8
- mvn install javadoc:aggregate
notifications:

View File

@ -108,9 +108,11 @@ public class EffectiveTldFinder {
}
/**
* @param hostname the hostname for which to find the
* {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD}
* @return the {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD}
* @param hostname
* the hostname for which to find the
* {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD}
* @return the
* {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD}
*/
public static EffectiveTLD getEffectiveTLD(String hostname) {
if (getInstance().domains.containsKey(hostname)) {
@ -145,7 +147,8 @@ public class EffectiveTldFinder {
* This method uses the effective TLD to determine which component of a FQDN
* is the NIC-assigned domain name.
*
* @param hostname a string for which to obtain a NIC-assigned domain name
* @param hostname
* a string for which to obtain a NIC-assigned domain name
* @return the NIC-assigned domain name
*/
public static String getAssignedDomain(String hostname) {

View File

@ -54,9 +54,12 @@ public class UserAgent implements Serializable {
/**
* Set user agent characteristics
*
* @param agentName an agent name string to associate with the crawler
* @param emailAddress an agent email address string to associate with the crawler
* @param webAddress a Web address string to associate with the crawler
* @param agentName
* an agent name string to associate with the crawler
* @param emailAddress
* an agent email address string to associate with the crawler
* @param webAddress
* a Web address string to associate with the crawler
*/
public UserAgent(String agentName, String emailAddress, String webAddress) {
this(agentName, emailAddress, webAddress, DEFAULT_BROWSER_VERSION);
@ -65,10 +68,14 @@ public class UserAgent implements Serializable {
/**
* Set user agent characteristics
*
* @param agentName an agent name string to associate with the crawler
* @param emailAddress an agent email address string to associate with the crawler
* @param webAddress a Web address string to associate with the crawler
* @param browserVersion a browser version to mimic
* @param agentName
* an agent name string to associate with the crawler
* @param emailAddress
* an agent email address string to associate with the crawler
* @param webAddress
* a Web address string to associate with the crawler
* @param browserVersion
* a browser version to mimic
*/
public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion) {
this(agentName, emailAddress, webAddress, browserVersion, DEFAULT_CRAWLER_VERSION);
@ -77,11 +84,16 @@ public class UserAgent implements Serializable {
/**
* Set user agent characteristics
*
* @param agentName an agent name string to associate with the crawler
* @param emailAddress an agent email address string to associate with the crawler
* @param webAddress a Web address string to associate with the crawler
* @param browserVersion a browser version to mimic
* @param crawlerVersion the version of your crawler/crawl agent
* @param agentName
* an agent name string to associate with the crawler
* @param emailAddress
* an agent email address string to associate with the crawler
* @param webAddress
* a Web address string to associate with the crawler
* @param browserVersion
* a browser version to mimic
* @param crawlerVersion
* the version of your crawler/crawl agent
*/
public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion, String crawlerVersion) {
this.agentName = agentName;
@ -106,7 +118,8 @@ public class UserAgent implements Serializable {
* @return User Agent String
*/
public String getUserAgentString() {
// Mozilla/5.0 (compatible; mycrawler/1.0; +http://www.mydomain.com; mycrawler@mydomain.com)
// Mozilla/5.0 (compatible; mycrawler/1.0; +http://www.mydomain.com;
// mycrawler@mydomain.com)
return String.format(Locale.getDefault(), "%s (compatible; %s%s; +%s; %s)", browserVersion, getAgentName(), crawlerConfiguration, webAddress, emailAddress);
}
}

View File

@ -21,7 +21,9 @@ public abstract class URLFilter {
/**
* Returns a modified version of the input URL or null if the URL should be
* removed
* @param urlString a URL string to check against filter(s)
*
* @param urlString
* a URL string to check against filter(s)
* @return a filtered URL
**/
public abstract String filter(String urlString);

View File

@ -22,11 +22,11 @@ import java.io.Serializable;
public abstract class BaseRobotsParser implements Serializable {
/**
* Parse the robots.txt file in <i>content</i>, and return rules appropriate for
* processing paths by <i>userAgent</i>. Note that multiple agent names may be
* provided as comma-separated values; the order of these shouldn't matter,
* as the file is parsed in order, and each agent name found in the file
* will be compared to every agent name found in robotNames.
* Parse the robots.txt file in <i>content</i>, and return rules appropriate
* for processing paths by <i>userAgent</i>. Note that multiple agent names
* may be provided as comma-separated values; the order of these shouldn't
* matter, as the file is parsed in order, and each agent name found in the
* file will be compared to every agent name found in robotNames.
*
* Also note that names are lower-cased before comparison, and that any
* robot name you pass shouldn't contain commas or spaces; if the name has

View File

@ -86,7 +86,8 @@ public class RobotUtils {
* @param fetcher
* Fetcher for downloading robots.txt file
* @param parser
* a {@link crawlercommons.robots.BaseRobotsParser} to use for obtaining appropriate rules
* a {@link crawlercommons.robots.BaseRobotsParser} to use for
* obtaining appropriate rules
* @param robotsUrl
* URL to robots.txt file
* @return Robot rules

View File

@ -65,7 +65,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/*
* (non-Javadoc)
*
*
* @see java.lang.Object#hashCode()
*/
@Override
@ -79,7 +79,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/*
* (non-Javadoc)
*
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
@ -275,7 +275,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/**
* Is our ruleset set up to allow all access?
*
*
* @return true if all URLs are allowed.
*/
@Override
@ -285,7 +285,7 @@ public class SimpleRobotRules extends BaseRobotRules {
/**
* Is our ruleset set up to disallow all access?
*
*
* @return true if no URLs are allowed.
*/
@Override

View File

@ -104,7 +104,8 @@ public class SiteMapURL {
/**
* Set the URL.
*
* @param url of the sitemap
* @param url
* of the sitemap
*/
public void setUrl(URL url) {
this.url = url;
@ -138,7 +139,8 @@ public class SiteMapURL {
/**
* Set when this URL was last modified.
*
* @param lastModified the last time the sitemap was modified
* @param lastModified
* the last time the sitemap was modified
*/
public void setLastModified(String lastModified) {
this.lastModified = SiteMap.convertToDate(lastModified);
@ -147,7 +149,8 @@ public class SiteMapURL {
/**
* Set when this URL was last modified.
*
* @param lastModified the last time the sitemap was modified
* @param lastModified
* the last time the sitemap was modified
*/
public void setLastModified(Date lastModified) {
this.lastModified = lastModified;
@ -166,7 +169,8 @@ public class SiteMapURL {
* Set the URL's priority to a value between [0.0 - 1.0] (Default Priority
* is used if the given priority is out of range).
*
* @param priority a value between [0.0 - 1.0]
* @param priority
* a value between [0.0 - 1.0]
*/
public void setPriority(double priority) {
@ -183,7 +187,8 @@ public class SiteMapURL {
* Set the URL's priority to a value between [0.0 - 1.0] (Default Priority
* is used if the given priority missing or is out of range).
*
* @param priorityStr a value between [0.0 - 1.0]
* @param priorityStr
* a value between [0.0 - 1.0]
*/
public void setPriority(String priorityStr) {
try {
@ -211,8 +216,9 @@ public class SiteMapURL {
/**
* Set the URL's change frequency
*
* @param changeFreq a {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency}
* for this sitemap
* @param changeFreq
* a {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency}
* for this sitemap
*/
public void setChangeFrequency(ChangeFrequency changeFreq) {
this.changeFreq = changeFreq;
@ -222,8 +228,10 @@ public class SiteMapURL {
* Set the URL's change frequency In case of a bad ChangeFrequency, the
* current frequency in this instance will be set to NULL
*
* @param changeFreq a string representing a
* {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} for this sitemap
* @param changeFreq
* a string representing a
* {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} for
* this sitemap
*/
public void setChangeFrequency(String changeFreq) {
@ -253,7 +261,9 @@ public class SiteMapURL {
/**
* Valid means that it follows the official guidelines that the siteMapURL
* must be under the base url
* @param valid whether the Sitemap is valid syntax or not
*
* @param valid
* whether the Sitemap is valid syntax or not
*/
public void setValid(boolean valid) {
this.valid = valid;
@ -261,6 +271,7 @@ public class SiteMapURL {
/**
* Is the siteMapURL under the base url ?
*
* @return true if the syntax is valid, false otherwise
*/
public boolean isValid() {

View File

@ -30,7 +30,9 @@ public class UnknownFormatException extends Exception {
/**
* Constructor receives some kind of message that is saved in an instance
* variable.
* @param err a String object to use within the Execption
*
* @param err
* a String object to use within the Execption
*/
public UnknownFormatException(String err) {
super(err);
@ -40,6 +42,7 @@ public class UnknownFormatException extends Exception {
/**
* public method, callable by exception catcher. It returns the error
* message.
*
* @return a populated Exception as a String
*/
public String getError() {