diff --git a/doc/javadoc/allclasses-frame.html b/doc/javadoc/allclasses-frame.html new file mode 100644 index 0000000..e3f731d --- /dev/null +++ b/doc/javadoc/allclasses-frame.html @@ -0,0 +1,101 @@ + + + + + + +All Classes (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +All Classes +
+ + + + + +
AbortedFetchException +
+AbortedFetchReason +
+AbstractSiteMap +
+AbstractSiteMap.SitemapType +
+BadProtocolFetchException +
+BaseFetcher +
+BaseFetchException +
+BaseHttpFetcher +
+BaseHttpFetcher.RedirectMode +
+BaseRobotRules +
+BaseRobotsParser +
+EffectiveTldFinder +
+EffectiveTldFinder.EffectiveTLD +
+EncodingUtils +
+EncodingUtils.ExpandedResult +
+FetchedResult +
+HttpFetchException +
+IOFetchException +
+PaidLevelDomain +
+Payload +
+RedirectFetchException +
+RedirectFetchException.RedirectExceptionReason +
+RobotUtils +
+SimpleFileFetcher +
+SimpleHttpFetcher +
+SimpleRobotRules +
+SimpleRobotRules.RobotRulesMode +
+SimpleRobotRulesParser +
+SiteMap +
+SiteMapIndex +
+SiteMapParser +
+SiteMapURL +
+SiteMapURL.ChangeFrequency +
+UnknownFormatException +
+UrlFetchException +
+UserAgent +
+
+ + + diff --git a/doc/javadoc/allclasses-noframe.html b/doc/javadoc/allclasses-noframe.html new file mode 100644 index 0000000..aecab4c --- /dev/null +++ b/doc/javadoc/allclasses-noframe.html @@ -0,0 +1,101 @@ + + + + + + +All Classes (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +All Classes +
+ + + + + +
AbortedFetchException +
+AbortedFetchReason +
+AbstractSiteMap +
+AbstractSiteMap.SitemapType +
+BadProtocolFetchException +
+BaseFetcher +
+BaseFetchException +
+BaseHttpFetcher +
+BaseHttpFetcher.RedirectMode +
+BaseRobotRules +
+BaseRobotsParser +
+EffectiveTldFinder +
+EffectiveTldFinder.EffectiveTLD +
+EncodingUtils +
+EncodingUtils.ExpandedResult +
+FetchedResult +
+HttpFetchException +
+IOFetchException +
+PaidLevelDomain +
+Payload +
+RedirectFetchException +
+RedirectFetchException.RedirectExceptionReason +
+RobotUtils +
+SimpleFileFetcher +
+SimpleHttpFetcher +
+SimpleRobotRules +
+SimpleRobotRules.RobotRulesMode +
+SimpleRobotRulesParser +
+SiteMap +
+SiteMapIndex +
+SiteMapParser +
+SiteMapURL +
+SiteMapURL.ChangeFrequency +
+UnknownFormatException +
+UrlFetchException +
+UserAgent +
+
+ + + diff --git a/doc/javadoc/constant-values.html b/doc/javadoc/constant-values.html new file mode 100644 index 0000000..13141e9 --- /dev/null +++ b/doc/javadoc/constant-values.html @@ -0,0 +1,326 @@ + + + + + + +Constant Field Values (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Constant Field Values

+
+
+Contents + + + + + + +
+crawlercommons.fetcher.*
+ +

+ + + + + + + + + + + + +
crawlercommons.fetcher.BaseFetcher
+public static final intDEFAULT_MAX_CONTENT_SIZE65536
+ +

+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
crawlercommons.fetcher.http.BaseHttpFetcher
+public static final java.lang.StringDEFAULT_ACCEPT_LANGUAGE"en-us,en-gb,en;q=0.7,*;q=0.3"
+public static final intDEFAULT_MAX_CONNECTIONS_PER_HOST2
+public static final intDEFAULT_MAX_REDIRECTS20
+public static final intDEFAULT_MIN_RESPONSE_RATE-2147483648
+public static final intNO_MIN_RESPONSE_RATE-2147483648
+public static final intNO_REDIRECTS0
+ +

+ +

+ + + + + + + + + + + + +
crawlercommons.fetcher.http.UserAgent
+public static final java.lang.StringDEFAULT_BROWSER_VERSION"Mozilla/5.0"
+ +

+ +

+ + + + + +
+crawlercommons.robots.*
+ +

+ + + + + + + + + + + + +
crawlercommons.robots.BaseRobotRules
+public static final longUNSET_CRAWL_DELAY-9223372036854775808L
+ +

+ +

+ + + + + +
+crawlercommons.url.*
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
crawlercommons.url.EffectiveTldFinder
+public static final java.lang.StringCOMMENT"//"
+public static final charDOT46
+public static final java.lang.StringDOT_REGEX"\\."
+public static final java.lang.StringETLD_DATA"/effective_tld_names.dat"
+public static final java.lang.StringEXCEPTION"!"
+public static final java.lang.StringWILD_CARD"*."
+ +

+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/AbortedFetchException.html b/doc/javadoc/crawlercommons/fetcher/AbortedFetchException.html new file mode 100644 index 0000000..6f66ef8 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/AbortedFetchException.html @@ -0,0 +1,312 @@ + + + + + + +AbortedFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class AbortedFetchException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.fetcher.BaseFetchException
+              extended by crawlercommons.fetcher.AbortedFetchException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class AbortedFetchException
extends BaseFetchException
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + + + + +
+Constructor Summary
AbortedFetchException() + +
+           
AbortedFetchException(java.lang.String url, + AbortedFetchReason abortReason) + +
+           
AbortedFetchException(java.lang.String url, + java.lang.String msg, + AbortedFetchReason abortReason) + +
+           
+  + + + + + + + + + + + +
+Method Summary
+ AbortedFetchReasongetAbortReason() + +
+           
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetchException
compareToBase, equals, getCause, getLocalizedMessage, getMessage, getStackTrace, getUrl, hashCode, initCause, printStackTrace, printStackTrace, printStackTrace, readBaseFields, setStackTrace, toString, writeBaseFields
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+AbortedFetchException

+
+public AbortedFetchException()
+
+
+
+ +

+AbortedFetchException

+
+public AbortedFetchException(java.lang.String url,
+                             AbortedFetchReason abortReason)
+
+
+
+ +

+AbortedFetchException

+
+public AbortedFetchException(java.lang.String url,
+                             java.lang.String msg,
+                             AbortedFetchReason abortReason)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getAbortReason

+
+public AbortedFetchReason getAbortReason()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/AbortedFetchReason.html b/doc/javadoc/crawlercommons/fetcher/AbortedFetchReason.html new file mode 100644 index 0000000..675468a --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/AbortedFetchReason.html @@ -0,0 +1,354 @@ + + + + + + +AbortedFetchReason (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Enum AbortedFetchReason

+
+java.lang.Object
+  extended by java.lang.Enum<AbortedFetchReason>
+      extended by crawlercommons.fetcher.AbortedFetchReason
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<AbortedFetchReason>
+
+
+
+
public enum AbortedFetchReason
extends java.lang.Enum<AbortedFetchReason>
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + +
+Enum Constant Summary
CONTENT_SIZE + +
+           
INTERRUPTED + +
+           
INVALID_MIMETYPE + +
+           
SLOW_RESPONSE_RATE + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static AbortedFetchReasonvalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static AbortedFetchReason[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+SLOW_RESPONSE_RATE

+
+public static final AbortedFetchReason SLOW_RESPONSE_RATE
+
+
+
+
+
+ +

+INVALID_MIMETYPE

+
+public static final AbortedFetchReason INVALID_MIMETYPE
+
+
+
+
+
+ +

+INTERRUPTED

+
+public static final AbortedFetchReason INTERRUPTED
+
+
+
+
+
+ +

+CONTENT_SIZE

+
+public static final AbortedFetchReason CONTENT_SIZE
+
+
+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static AbortedFetchReason[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (AbortedFetchReason c : AbortedFetchReason.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static AbortedFetchReason valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/BadProtocolFetchException.html b/doc/javadoc/crawlercommons/fetcher/BadProtocolFetchException.html new file mode 100644 index 0000000..10efa46 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/BadProtocolFetchException.html @@ -0,0 +1,264 @@ + + + + + + +BadProtocolFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class BadProtocolFetchException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.fetcher.BaseFetchException
+              extended by crawlercommons.fetcher.BadProtocolFetchException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class BadProtocolFetchException
extends BaseFetchException
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
BadProtocolFetchException() + +
+           
BadProtocolFetchException(java.lang.String url) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetchException
compareToBase, equals, getCause, getLocalizedMessage, getMessage, getStackTrace, getUrl, hashCode, initCause, printStackTrace, printStackTrace, printStackTrace, readBaseFields, setStackTrace, toString, writeBaseFields
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+BadProtocolFetchException

+
+public BadProtocolFetchException()
+
+
+
+ +

+BadProtocolFetchException

+
+public BadProtocolFetchException(java.lang.String url)
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/BaseFetchException.html b/doc/javadoc/crawlercommons/fetcher/BaseFetchException.html new file mode 100644 index 0000000..a75b347 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/BaseFetchException.html @@ -0,0 +1,672 @@ + + + + + + +BaseFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class BaseFetchException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.fetcher.BaseFetchException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
Direct Known Subclasses:
AbortedFetchException, BadProtocolFetchException, HttpFetchException, IOFetchException, RedirectFetchException, UrlFetchException
+
+
+
+
public abstract class BaseFetchException
extends java.lang.Exception
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Constructor Summary
+protected BaseFetchException() + +
+           
+protected BaseFetchException(java.lang.String url) + +
+           
+protected BaseFetchException(java.lang.String url, + java.lang.Exception e) + +
+           
+protected BaseFetchException(java.lang.String url, + java.lang.String msg) + +
+           
+protected BaseFetchException(java.lang.String url, + java.lang.String msg, + java.lang.Exception e) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected  intcompareToBase(BaseFetchException e) + +
+           
+ booleanequals(java.lang.Object obj) + +
+           
+ java.lang.ThrowablegetCause() + +
+           
+ java.lang.StringgetLocalizedMessage() + +
+           
+ java.lang.StringgetMessage() + +
+           
+ java.lang.StackTraceElement[]getStackTrace() + +
+           
+ java.lang.StringgetUrl() + +
+           
+ inthashCode() + +
+           
+ java.lang.ThrowableinitCause(java.lang.Throwable cause) + +
+           
+ voidprintStackTrace() + +
+           
+ voidprintStackTrace(java.io.PrintStream s) + +
+           
+ voidprintStackTrace(java.io.PrintWriter s) + +
+           
+protected  voidreadBaseFields(java.io.DataInput input) + +
+           
+ voidsetStackTrace(java.lang.StackTraceElement[] stackTrace) + +
+           
+ java.lang.StringtoString() + +
+           
+protected  voidwriteBaseFields(java.io.DataOutput output) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+BaseFetchException

+
+protected BaseFetchException()
+
+
+
+ +

+BaseFetchException

+
+protected BaseFetchException(java.lang.String url)
+
+
+
+ +

+BaseFetchException

+
+protected BaseFetchException(java.lang.String url,
+                             java.lang.String msg)
+
+
+
+ +

+BaseFetchException

+
+protected BaseFetchException(java.lang.String url,
+                             java.lang.Exception e)
+
+
+
+ +

+BaseFetchException

+
+protected BaseFetchException(java.lang.String url,
+                             java.lang.String msg,
+                             java.lang.Exception e)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getUrl

+
+public java.lang.String getUrl()
+
+
+
+
+
+
+ +

+compareToBase

+
+protected int compareToBase(BaseFetchException e)
+
+
+
+
+
+
+ +

+equals

+
+public boolean equals(java.lang.Object obj)
+
+
+
Overrides:
equals in class java.lang.Object
+
+
+
+
+
+
+ +

+getCause

+
+public java.lang.Throwable getCause()
+
+
+
Overrides:
getCause in class java.lang.Throwable
+
+
+
+
+
+
+ +

+getLocalizedMessage

+
+public java.lang.String getLocalizedMessage()
+
+
+
Overrides:
getLocalizedMessage in class java.lang.Throwable
+
+
+
+
+
+
+ +

+getMessage

+
+public java.lang.String getMessage()
+
+
+
Overrides:
getMessage in class java.lang.Throwable
+
+
+
+
+
+
+ +

+getStackTrace

+
+public java.lang.StackTraceElement[] getStackTrace()
+
+
+
Overrides:
getStackTrace in class java.lang.Throwable
+
+
+
+
+
+
+ +

+hashCode

+
+public int hashCode()
+
+
+
Overrides:
hashCode in class java.lang.Object
+
+
+
+
+
+
+ +

+initCause

+
+public java.lang.Throwable initCause(java.lang.Throwable cause)
+
+
+
Overrides:
initCause in class java.lang.Throwable
+
+
+
+
+
+
+ +

+printStackTrace

+
+public void printStackTrace()
+
+
+
Overrides:
printStackTrace in class java.lang.Throwable
+
+
+
+
+
+
+ +

+printStackTrace

+
+public void printStackTrace(java.io.PrintStream s)
+
+
+
Overrides:
printStackTrace in class java.lang.Throwable
+
+
+
+
+
+
+ +

+printStackTrace

+
+public void printStackTrace(java.io.PrintWriter s)
+
+
+
Overrides:
printStackTrace in class java.lang.Throwable
+
+
+
+
+
+
+ +

+setStackTrace

+
+public void setStackTrace(java.lang.StackTraceElement[] stackTrace)
+
+
+
Overrides:
setStackTrace in class java.lang.Throwable
+
+
+
+
+
+
+ +

+toString

+
+public java.lang.String toString()
+
+
+
Overrides:
toString in class java.lang.Throwable
+
+
+
+
+
+
+ +

+readBaseFields

+
+protected void readBaseFields(java.io.DataInput input)
+                       throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+writeBaseFields

+
+protected void writeBaseFields(java.io.DataOutput output)
+                        throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/BaseFetcher.html b/doc/javadoc/crawlercommons/fetcher/BaseFetcher.html new file mode 100644 index 0000000..73232df --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/BaseFetcher.html @@ -0,0 +1,610 @@ + + + + + + +BaseFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class BaseFetcher

+
+java.lang.Object
+  extended by crawlercommons.fetcher.BaseFetcher
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
Direct Known Subclasses:
BaseHttpFetcher, SimpleFileFetcher
+
+
+
+
public abstract class BaseFetcher
extends java.lang.Object
implements java.io.Serializable
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+protected  int_defaultMaxContentSize + +
+           
+protected  java.util.Map<java.lang.String,java.lang.Integer>_maxContentSizes + +
+           
+protected  java.util.Set<java.lang.String>_validMimeTypes + +
+           
+static intDEFAULT_MAX_CONTENT_SIZE + +
+           
+  + + + + + + + + + + +
+Constructor Summary
BaseFetcher() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+abstract  voidabort() + +
+          Terminate any async request being processed.
+ voidaddValidMimeType(java.lang.String validMimeType) + +
+           
+ voidaddValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes) + +
+           
+ FetchedResultget(java.lang.String url) + +
+           
+abstract  FetchedResultget(java.lang.String url, + Payload payload) + +
+          Get the content stored in the resource referenced by
+ intgetDefaultMaxContentSize() + +
+           
+ intgetMaxContentSize(java.lang.String mimeType) + +
+           
+protected static java.lang.StringgetMimeTypeFromContentType(java.lang.String contentType) + +
+           
+ java.util.Set<java.lang.String>getValidMimeTypes() + +
+           
+ voidsetDefaultMaxContentSize(int defaultMaxContentSize) + +
+           
+ voidsetMaxContentSize(java.lang.String mimeType, + int maxContentSize) + +
+           
+ voidsetValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+DEFAULT_MAX_CONTENT_SIZE

+
+public static final int DEFAULT_MAX_CONTENT_SIZE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+_maxContentSizes

+
+protected java.util.Map<java.lang.String,java.lang.Integer> _maxContentSizes
+
+
+
+
+
+ +

+_defaultMaxContentSize

+
+protected int _defaultMaxContentSize
+
+
+
+
+
+ +

+_validMimeTypes

+
+protected java.util.Set<java.lang.String> _validMimeTypes
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+BaseFetcher

+
+public BaseFetcher()
+
+
+ + + + + + + + +
+Method Detail
+ +

+setDefaultMaxContentSize

+
+public void setDefaultMaxContentSize(int defaultMaxContentSize)
+
+
+
+
+
+
+
+
+
+ +

+getDefaultMaxContentSize

+
+public int getDefaultMaxContentSize()
+
+
+
+
+
+
+
+
+
+ +

+setMaxContentSize

+
+public void setMaxContentSize(java.lang.String mimeType,
+                              int maxContentSize)
+
+
+
+
+
+
+
+
+
+ +

+getMaxContentSize

+
+public int getMaxContentSize(java.lang.String mimeType)
+
+
+
+
+
+
+
+
+
+ +

+getValidMimeTypes

+
+public java.util.Set<java.lang.String> getValidMimeTypes()
+
+
+
+
+
+
+
+
+
+ +

+setValidMimeTypes

+
+public void setValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes)
+
+
+
+
+
+
+
+
+
+ +

+addValidMimeTypes

+
+public void addValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes)
+
+
+
+
+
+
+
+
+
+ +

+addValidMimeType

+
+public void addValidMimeType(java.lang.String validMimeType)
+
+
+
+
+
+
+
+
+
+ +

+get

+
+public FetchedResult get(java.lang.String url)
+                  throws BaseFetchException
+
+
+
+
+
+ +
Throws: +
BaseFetchException
+
+
+
+ +

+getMimeTypeFromContentType

+
+protected static java.lang.String getMimeTypeFromContentType(java.lang.String contentType)
+
+
+
+
+
+
+
+
+
+ +

+get

+
+public abstract FetchedResult get(java.lang.String url,
+                                  Payload payload)
+                           throws BaseFetchException
+
+
Get the content stored in the resource referenced by +

+

+
+
+
+
Parameters:
url -
payload - +
Returns:
+
Throws: +
BaseFetchException
+
+
+
+ +

+abort

+
+public abstract void abort()
+
+
Terminate any async request being processed. +

+

+
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/EncodingUtils.ExpandedResult.html b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.ExpandedResult.html new file mode 100644 index 0000000..92d1410 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.ExpandedResult.html @@ -0,0 +1,314 @@ + + + + + + +EncodingUtils.ExpandedResult (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class EncodingUtils.ExpandedResult

+
+java.lang.Object
+  extended by crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+
+
Enclosing class:
EncodingUtils
+
+
+
+
public static class EncodingUtils.ExpandedResult
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
EncodingUtils.ExpandedResult(byte[] expanded, + boolean isTruncated) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ byte[]getExpanded() + +
+           
+ booleanisTruncated() + +
+           
+ voidsetExpanded(byte[] expanded) + +
+           
+ voidsetTruncated(boolean isTruncated) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+EncodingUtils.ExpandedResult

+
+public EncodingUtils.ExpandedResult(byte[] expanded,
+                                    boolean isTruncated)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getExpanded

+
+public byte[] getExpanded()
+
+
+
+
+
+
+ +

+setExpanded

+
+public void setExpanded(byte[] expanded)
+
+
+
+
+
+
+ +

+isTruncated

+
+public boolean isTruncated()
+
+
+
+
+
+
+ +

+setTruncated

+
+public void setTruncated(boolean isTruncated)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/EncodingUtils.html b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.html new file mode 100644 index 0000000..1daf7ce --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.html @@ -0,0 +1,342 @@ + + + + + + +EncodingUtils (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class EncodingUtils

+
+java.lang.Object
+  extended by crawlercommons.fetcher.EncodingUtils
+
+
+
+
public class EncodingUtils
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Nested Class Summary
+static classEncodingUtils.ExpandedResult + +
+           
+  + + + + + + + + + + +
+Constructor Summary
EncodingUtils() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static byte[]processDeflateEncoded(byte[] content) + +
+           
+static byte[]processDeflateEncoded(byte[] compressed, + int sizeLimit) + +
+           
+static byte[]processGzipEncoded(byte[] compressed) + +
+           
+static EncodingUtils.ExpandedResultprocessGzipEncoded(byte[] compressed, + int sizeLimit) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+EncodingUtils

+
+public EncodingUtils()
+
+
+ + + + + + + + +
+Method Detail
+ +

+processGzipEncoded

+
+public static byte[] processGzipEncoded(byte[] compressed)
+                                 throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+processGzipEncoded

+
+public static EncodingUtils.ExpandedResult processGzipEncoded(byte[] compressed,
+                                                              int sizeLimit)
+                                                       throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+processDeflateEncoded

+
+public static byte[] processDeflateEncoded(byte[] content)
+                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+processDeflateEncoded

+
+public static byte[] processDeflateEncoded(byte[] compressed,
+                                           int sizeLimit)
+                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/FetchedResult.html b/doc/javadoc/crawlercommons/fetcher/FetchedResult.html new file mode 100644 index 0000000..3589ab3 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/FetchedResult.html @@ -0,0 +1,500 @@ + + + + + + +FetchedResult (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class FetchedResult

+
+java.lang.Object
+  extended by crawlercommons.fetcher.FetchedResult
+
+
+
+
public class FetchedResult
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
FetchedResult(java.lang.String baseUrl, + java.lang.String redirectedUrl, + long fetchTime, + org.apache.tika.metadata.Metadata headers, + byte[] content, + java.lang.String contentType, + int responseRate, + Payload payload, + java.lang.String newBaseUrl, + int numRedirects, + java.lang.String hostAddress) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ java.lang.StringgetBaseUrl() + +
+           
+ byte[]getContent() + +
+           
+ intgetContentLength() + +
+           
+ java.lang.StringgetContentType() + +
+           
+ java.lang.StringgetFetchedUrl() + +
+           
+ longgetFetchTime() + +
+           
+ org.apache.tika.metadata.MetadatagetHeaders() + +
+           
+ java.lang.StringgetHostAddress() + +
+           
+ java.lang.StringgetNewBaseUrl() + +
+           
+ intgetNumRedirects() + +
+           
+ PayloadgetPayload() + +
+           
+ intgetResponseRate() + +
+           
+ voidsetPayload(Payload payload) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+FetchedResult

+
+public FetchedResult(java.lang.String baseUrl,
+                     java.lang.String redirectedUrl,
+                     long fetchTime,
+                     org.apache.tika.metadata.Metadata headers,
+                     byte[] content,
+                     java.lang.String contentType,
+                     int responseRate,
+                     Payload payload,
+                     java.lang.String newBaseUrl,
+                     int numRedirects,
+                     java.lang.String hostAddress)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getPayload

+
+public Payload getPayload()
+
+
+
+
+
+
+ +

+setPayload

+
+public void setPayload(Payload payload)
+
+
+
+
+
+
+ +

+getBaseUrl

+
+public java.lang.String getBaseUrl()
+
+
+
+
+
+
+ +

+getFetchedUrl

+
+public java.lang.String getFetchedUrl()
+
+
+
+
+
+
+ +

+getFetchTime

+
+public long getFetchTime()
+
+
+
+
+
+
+ +

+getContent

+
+public byte[] getContent()
+
+
+
+
+
+
+ +

+getContentLength

+
+public int getContentLength()
+
+
+
+
+
+
+ +

+getContentType

+
+public java.lang.String getContentType()
+
+
+
+
+
+
+ +

+getResponseRate

+
+public int getResponseRate()
+
+
+
+
+
+
+ +

+getHeaders

+
+public org.apache.tika.metadata.Metadata getHeaders()
+
+
+
+
+
+
+ +

+getNewBaseUrl

+
+public java.lang.String getNewBaseUrl()
+
+
+
+
+
+
+ +

+getNumRedirects

+
+public int getNumRedirects()
+
+
+
+
+
+
+ +

+getHostAddress

+
+public java.lang.String getHostAddress()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/HttpFetchException.html b/doc/javadoc/crawlercommons/fetcher/HttpFetchException.html new file mode 100644 index 0000000..9b31771 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/HttpFetchException.html @@ -0,0 +1,339 @@ + + + + + + +HttpFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class HttpFetchException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.fetcher.BaseFetchException
+              extended by crawlercommons.fetcher.HttpFetchException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class HttpFetchException
extends BaseFetchException
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
HttpFetchException() + +
+           
HttpFetchException(java.lang.String url, + java.lang.String msg, + int httpStatus, + org.apache.tika.metadata.Metadata httpHeaders) + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ org.apache.tika.metadata.MetadatagetHttpHeaders() + +
+           
+ intgetHttpStatus() + +
+           
+ java.lang.StringgetMessage() + +
+           
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetchException
compareToBase, equals, getCause, getLocalizedMessage, getStackTrace, getUrl, hashCode, initCause, printStackTrace, printStackTrace, printStackTrace, readBaseFields, setStackTrace, toString, writeBaseFields
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+HttpFetchException

+
+public HttpFetchException()
+
+
+
+ +

+HttpFetchException

+
+public HttpFetchException(java.lang.String url,
+                          java.lang.String msg,
+                          int httpStatus,
+                          org.apache.tika.metadata.Metadata httpHeaders)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getHttpStatus

+
+public int getHttpStatus()
+
+
+
+
+
+
+ +

+getHttpHeaders

+
+public org.apache.tika.metadata.Metadata getHttpHeaders()
+
+
+
+
+
+
+ +

+getMessage

+
+public java.lang.String getMessage()
+
+
+
Overrides:
getMessage in class BaseFetchException
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/IOFetchException.html b/doc/javadoc/crawlercommons/fetcher/IOFetchException.html new file mode 100644 index 0000000..e2cb6ee --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/IOFetchException.html @@ -0,0 +1,266 @@ + + + + + + +IOFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class IOFetchException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.fetcher.BaseFetchException
+              extended by crawlercommons.fetcher.IOFetchException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class IOFetchException
extends BaseFetchException
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
IOFetchException() + +
+           
IOFetchException(java.lang.String url, + java.io.IOException e) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetchException
compareToBase, equals, getCause, getLocalizedMessage, getMessage, getStackTrace, getUrl, hashCode, initCause, printStackTrace, printStackTrace, printStackTrace, readBaseFields, setStackTrace, toString, writeBaseFields
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+IOFetchException

+
+public IOFetchException()
+
+
+
+ +

+IOFetchException

+
+public IOFetchException(java.lang.String url,
+                        java.io.IOException e)
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/Payload.html b/doc/javadoc/crawlercommons/fetcher/Payload.html new file mode 100644 index 0000000..5db6f03 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/Payload.html @@ -0,0 +1,565 @@ + + + + + + +Payload (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class Payload

+
+java.lang.Object
+  extended by crawlercommons.fetcher.Payload
+
+
+
All Implemented Interfaces:
java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
public class Payload
extends java.lang.Object
implements java.util.Map<java.lang.String,java.lang.Object>
+ + +

+


+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from interface java.util.Map
java.util.Map.Entry<K,V>
+  + + + + + + + + + + + +
+Constructor Summary
Payload() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidclear() + +
+           
+ booleancontainsKey(java.lang.Object key) + +
+           
+ booleancontainsValue(java.lang.Object value) + +
+           
+ java.util.Set<java.util.Map.Entry<java.lang.String,java.lang.Object>>entrySet() + +
+           
+ booleanequals(java.lang.Object o) + +
+           
+ java.lang.Objectget(java.lang.Object key) + +
+           
+ inthashCode() + +
+           
+ booleanisEmpty() + +
+           
+ java.util.Set<java.lang.String>keySet() + +
+           
+ java.lang.Objectput(java.lang.String key, + java.lang.Object value) + +
+           
+ voidputAll(java.util.Map<? extends java.lang.String,? extends java.lang.Object> m) + +
+           
+ java.lang.Objectremove(java.lang.Object key) + +
+           
+ intsize() + +
+           
+ java.util.Collection<java.lang.Object>values() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+Payload

+
+public Payload()
+
+
+ + + + + + + + +
+Method Detail
+ +

+clear

+
+public void clear()
+
+
+
Specified by:
clear in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+containsKey

+
+public boolean containsKey(java.lang.Object key)
+
+
+
Specified by:
containsKey in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+containsValue

+
+public boolean containsValue(java.lang.Object value)
+
+
+
Specified by:
containsValue in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+entrySet

+
+public java.util.Set<java.util.Map.Entry<java.lang.String,java.lang.Object>> entrySet()
+
+
+
Specified by:
entrySet in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+equals

+
+public boolean equals(java.lang.Object o)
+
+
+
Specified by:
equals in interface java.util.Map<java.lang.String,java.lang.Object>
Overrides:
equals in class java.lang.Object
+
+
+
+
+
+
+ +

+get

+
+public java.lang.Object get(java.lang.Object key)
+
+
+
Specified by:
get in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+hashCode

+
+public int hashCode()
+
+
+
Specified by:
hashCode in interface java.util.Map<java.lang.String,java.lang.Object>
Overrides:
hashCode in class java.lang.Object
+
+
+
+
+
+
+ +

+isEmpty

+
+public boolean isEmpty()
+
+
+
Specified by:
isEmpty in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+keySet

+
+public java.util.Set<java.lang.String> keySet()
+
+
+
Specified by:
keySet in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+put

+
+public java.lang.Object put(java.lang.String key,
+                            java.lang.Object value)
+
+
+
Specified by:
put in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+putAll

+
+public void putAll(java.util.Map<? extends java.lang.String,? extends java.lang.Object> m)
+
+
+
Specified by:
putAll in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+remove

+
+public java.lang.Object remove(java.lang.Object key)
+
+
+
Specified by:
remove in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+size

+
+public int size()
+
+
+
Specified by:
size in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+ +

+values

+
+public java.util.Collection<java.lang.Object> values()
+
+
+
Specified by:
values in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.RedirectExceptionReason.html b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.RedirectExceptionReason.html new file mode 100644 index 0000000..199b966 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.RedirectExceptionReason.html @@ -0,0 +1,341 @@ + + + + + + +RedirectFetchException.RedirectExceptionReason (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Enum RedirectFetchException.RedirectExceptionReason

+
+java.lang.Object
+  extended by java.lang.Enum<RedirectFetchException.RedirectExceptionReason>
+      extended by crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<RedirectFetchException.RedirectExceptionReason>
+
+
+
Enclosing class:
RedirectFetchException
+
+
+
+
public static enum RedirectFetchException.RedirectExceptionReason
extends java.lang.Enum<RedirectFetchException.RedirectExceptionReason>
+ + +

+


+ +

+ + + + + + + + + + + + + + + + +
+Enum Constant Summary
PERM_REDIRECT_DISALLOWED + +
+           
TEMP_REDIRECT_DISALLOWED + +
+           
TOO_MANY_REDIRECTS + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static RedirectFetchException.RedirectExceptionReasonvalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static RedirectFetchException.RedirectExceptionReason[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+TOO_MANY_REDIRECTS

+
+public static final RedirectFetchException.RedirectExceptionReason TOO_MANY_REDIRECTS
+
+
+
+
+
+ +

+PERM_REDIRECT_DISALLOWED

+
+public static final RedirectFetchException.RedirectExceptionReason PERM_REDIRECT_DISALLOWED
+
+
+
+
+
+ +

+TEMP_REDIRECT_DISALLOWED

+
+public static final RedirectFetchException.RedirectExceptionReason TEMP_REDIRECT_DISALLOWED
+
+
+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static RedirectFetchException.RedirectExceptionReason[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (RedirectFetchException.RedirectExceptionReason c : RedirectFetchException.RedirectExceptionReason.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static RedirectFetchException.RedirectExceptionReason valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.html b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.html new file mode 100644 index 0000000..ef95987 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.html @@ -0,0 +1,332 @@ + + + + + + +RedirectFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class RedirectFetchException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.fetcher.BaseFetchException
+              extended by crawlercommons.fetcher.RedirectFetchException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class RedirectFetchException
extends BaseFetchException
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + +
+Nested Class Summary
+static classRedirectFetchException.RedirectExceptionReason + +
+           
+  + + + + + + + + + + + + + +
+Constructor Summary
RedirectFetchException() + +
+           
RedirectFetchException(java.lang.String url, + java.lang.String redirectedUrl, + RedirectFetchException.RedirectExceptionReason reason) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ RedirectFetchException.RedirectExceptionReasongetReason() + +
+           
+ java.lang.StringgetRedirectedUrl() + +
+           
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetchException
compareToBase, equals, getCause, getLocalizedMessage, getMessage, getStackTrace, getUrl, hashCode, initCause, printStackTrace, printStackTrace, printStackTrace, readBaseFields, setStackTrace, toString, writeBaseFields
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+RedirectFetchException

+
+public RedirectFetchException()
+
+
+
+ +

+RedirectFetchException

+
+public RedirectFetchException(java.lang.String url,
+                              java.lang.String redirectedUrl,
+                              RedirectFetchException.RedirectExceptionReason reason)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getRedirectedUrl

+
+public java.lang.String getRedirectedUrl()
+
+
+
+
+
+
+ +

+getReason

+
+public RedirectFetchException.RedirectExceptionReason getReason()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/UrlFetchException.html b/doc/javadoc/crawlercommons/fetcher/UrlFetchException.html new file mode 100644 index 0000000..6621651 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/UrlFetchException.html @@ -0,0 +1,266 @@ + + + + + + +UrlFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher +
+Class UrlFetchException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.fetcher.BaseFetchException
+              extended by crawlercommons.fetcher.UrlFetchException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class UrlFetchException
extends BaseFetchException
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
UrlFetchException() + +
+           
UrlFetchException(java.lang.String url, + java.lang.String msg) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetchException
compareToBase, equals, getCause, getLocalizedMessage, getMessage, getStackTrace, getUrl, hashCode, initCause, printStackTrace, printStackTrace, printStackTrace, readBaseFields, setStackTrace, toString, writeBaseFields
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+UrlFetchException

+
+public UrlFetchException()
+
+
+
+ +

+UrlFetchException

+
+public UrlFetchException(java.lang.String url,
+                         java.lang.String msg)
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchException.html new file mode 100644 index 0000000..b9bb7c7 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchException.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.AbortedFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.AbortedFetchException

+
+No usage of crawlercommons.fetcher.AbortedFetchException +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchReason.html b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchReason.html new file mode 100644 index 0000000..6b6bf57 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchReason.html @@ -0,0 +1,220 @@ + + + + + + +Uses of Class crawlercommons.fetcher.AbortedFetchReason (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.AbortedFetchReason

+
+ + + + + + + + + +
+Packages that use AbortedFetchReason
crawlercommons.fetcher  
+  +

+ + + + + +
+Uses of AbortedFetchReason in crawlercommons.fetcher
+  +

+ + + + + + + + + + + + + + + + + +
Methods in crawlercommons.fetcher that return AbortedFetchReason
+ AbortedFetchReasonAbortedFetchException.getAbortReason() + +
+           
+static AbortedFetchReasonAbortedFetchReason.valueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static AbortedFetchReason[]AbortedFetchReason.values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+  +

+ + + + + + + + + + + +
Constructors in crawlercommons.fetcher with parameters of type AbortedFetchReason
AbortedFetchException(java.lang.String url, + AbortedFetchReason abortReason) + +
+           
AbortedFetchException(java.lang.String url, + java.lang.String msg, + AbortedFetchReason abortReason) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/BadProtocolFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/BadProtocolFetchException.html new file mode 100644 index 0000000..4e9a442 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/BadProtocolFetchException.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.BadProtocolFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.BadProtocolFetchException

+
+No usage of crawlercommons.fetcher.BadProtocolFetchException +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetchException.html new file mode 100644 index 0000000..3d3a337 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetchException.html @@ -0,0 +1,339 @@ + + + + + + +Uses of Class crawlercommons.fetcher.BaseFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.BaseFetchException

+
+ + + + + + + + + + + + + + + + + +
+Packages that use BaseFetchException
crawlercommons.fetcher  
crawlercommons.fetcher.file  
crawlercommons.fetcher.http  
+  +

+ + + + + +
+Uses of BaseFetchException in crawlercommons.fetcher
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Subclasses of BaseFetchException in crawlercommons.fetcher
+ classAbortedFetchException + +
+           
+ classBadProtocolFetchException + +
+           
+ classHttpFetchException + +
+           
+ classIOFetchException + +
+           
+ classRedirectFetchException + +
+           
+ classUrlFetchException + +
+           
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher with parameters of type BaseFetchException
+protected  intBaseFetchException.compareToBase(BaseFetchException e) + +
+           
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.fetcher that throw BaseFetchException
+ FetchedResultBaseFetcher.get(java.lang.String url) + +
+           
+abstract  FetchedResultBaseFetcher.get(java.lang.String url, + Payload payload) + +
+          Get the content stored in the resource referenced by
+  +

+ + + + + +
+Uses of BaseFetchException in crawlercommons.fetcher.file
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher.file that throw BaseFetchException
+ FetchedResultSimpleFileFetcher.get(java.lang.String url, + Payload payload) + +
+           
+  +

+ + + + + +
+Uses of BaseFetchException in crawlercommons.fetcher.http
+  +

+ + + + + + + + + + + + + + + + + +
Methods in crawlercommons.fetcher.http that throw BaseFetchException
+ FetchedResultSimpleHttpFetcher.fetch(org.apache.http.client.methods.HttpRequestBase request, + java.lang.String url, + Payload payload) + +
+           
+ FetchedResultSimpleHttpFetcher.fetch(java.lang.String url) + +
+           
+ FetchedResultSimpleHttpFetcher.get(java.lang.String url, + Payload payload) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetcher.html b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetcher.html new file mode 100644 index 0000000..aa9f90f --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetcher.html @@ -0,0 +1,217 @@ + + + + + + +Uses of Class crawlercommons.fetcher.BaseFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.BaseFetcher

+
+ + + + + + + + + + + + + +
+Packages that use BaseFetcher
crawlercommons.fetcher.file  
crawlercommons.fetcher.http  
+  +

+ + + + + +
+Uses of BaseFetcher in crawlercommons.fetcher.file
+  +

+ + + + + + + + + +
Subclasses of BaseFetcher in crawlercommons.fetcher.file
+ classSimpleFileFetcher + +
+           
+  +

+ + + + + +
+Uses of BaseFetcher in crawlercommons.fetcher.http
+  +

+ + + + + + + + + + + + + +
Subclasses of BaseFetcher in crawlercommons.fetcher.http
+ classBaseHttpFetcher + +
+           
+ classSimpleHttpFetcher + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.ExpandedResult.html b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.ExpandedResult.html new file mode 100644 index 0000000..5377c00 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.ExpandedResult.html @@ -0,0 +1,181 @@ + + + + + + +Uses of Class crawlercommons.fetcher.EncodingUtils.ExpandedResult (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.EncodingUtils.ExpandedResult

+
+ + + + + + + + + +
+Packages that use EncodingUtils.ExpandedResult
crawlercommons.fetcher  
+  +

+ + + + + +
+Uses of EncodingUtils.ExpandedResult in crawlercommons.fetcher
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher that return EncodingUtils.ExpandedResult
+static EncodingUtils.ExpandedResultEncodingUtils.processGzipEncoded(byte[] compressed, + int sizeLimit) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.html b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.html new file mode 100644 index 0000000..b907d90 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.EncodingUtils (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.EncodingUtils

+
+No usage of crawlercommons.fetcher.EncodingUtils +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/FetchedResult.html b/doc/javadoc/crawlercommons/fetcher/class-use/FetchedResult.html new file mode 100644 index 0000000..26da49d --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/FetchedResult.html @@ -0,0 +1,267 @@ + + + + + + +Uses of Class crawlercommons.fetcher.FetchedResult (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.FetchedResult

+
+ + + + + + + + + + + + + + + + + +
+Packages that use FetchedResult
crawlercommons.fetcher  
crawlercommons.fetcher.file  
crawlercommons.fetcher.http  
+  +

+ + + + + +
+Uses of FetchedResult in crawlercommons.fetcher
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.fetcher that return FetchedResult
+ FetchedResultBaseFetcher.get(java.lang.String url) + +
+           
+abstract  FetchedResultBaseFetcher.get(java.lang.String url, + Payload payload) + +
+          Get the content stored in the resource referenced by
+  +

+ + + + + +
+Uses of FetchedResult in crawlercommons.fetcher.file
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher.file that return FetchedResult
+ FetchedResultSimpleFileFetcher.get(java.lang.String url, + Payload payload) + +
+           
+  +

+ + + + + +
+Uses of FetchedResult in crawlercommons.fetcher.http
+  +

+ + + + + + + + + + + + + + + + + +
Methods in crawlercommons.fetcher.http that return FetchedResult
+ FetchedResultSimpleHttpFetcher.fetch(org.apache.http.client.methods.HttpRequestBase request, + java.lang.String url, + Payload payload) + +
+           
+ FetchedResultSimpleHttpFetcher.fetch(java.lang.String url) + +
+           
+ FetchedResultSimpleHttpFetcher.get(java.lang.String url, + Payload payload) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/HttpFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/HttpFetchException.html new file mode 100644 index 0000000..2b0bc7b --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/HttpFetchException.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.HttpFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.HttpFetchException

+
+No usage of crawlercommons.fetcher.HttpFetchException +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/IOFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/IOFetchException.html new file mode 100644 index 0000000..3f2e2cc --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/IOFetchException.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.IOFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.IOFetchException

+
+No usage of crawlercommons.fetcher.IOFetchException +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/Payload.html b/doc/javadoc/crawlercommons/fetcher/class-use/Payload.html new file mode 100644 index 0000000..d8e67b4 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/Payload.html @@ -0,0 +1,299 @@ + + + + + + +Uses of Class crawlercommons.fetcher.Payload (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.Payload

+
+ + + + + + + + + + + + + + + + + +
+Packages that use Payload
crawlercommons.fetcher  
crawlercommons.fetcher.file  
crawlercommons.fetcher.http  
+  +

+ + + + + +
+Uses of Payload in crawlercommons.fetcher
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher that return Payload
+ PayloadFetchedResult.getPayload() + +
+           
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.fetcher with parameters of type Payload
+abstract  FetchedResultBaseFetcher.get(java.lang.String url, + Payload payload) + +
+          Get the content stored in the resource referenced by
+ voidFetchedResult.setPayload(Payload payload) + +
+           
+  +

+ + + + + + + + +
Constructors in crawlercommons.fetcher with parameters of type Payload
FetchedResult(java.lang.String baseUrl, + java.lang.String redirectedUrl, + long fetchTime, + org.apache.tika.metadata.Metadata headers, + byte[] content, + java.lang.String contentType, + int responseRate, + Payload payload, + java.lang.String newBaseUrl, + int numRedirects, + java.lang.String hostAddress) + +
+           
+  +

+ + + + + +
+Uses of Payload in crawlercommons.fetcher.file
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher.file with parameters of type Payload
+ FetchedResultSimpleFileFetcher.get(java.lang.String url, + Payload payload) + +
+           
+  +

+ + + + + +
+Uses of Payload in crawlercommons.fetcher.http
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.fetcher.http with parameters of type Payload
+ FetchedResultSimpleHttpFetcher.fetch(org.apache.http.client.methods.HttpRequestBase request, + java.lang.String url, + Payload payload) + +
+           
+ FetchedResultSimpleHttpFetcher.get(java.lang.String url, + Payload payload) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.RedirectExceptionReason.html b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.RedirectExceptionReason.html new file mode 100644 index 0000000..b60c83a --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.RedirectExceptionReason.html @@ -0,0 +1,213 @@ + + + + + + +Uses of Class crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason

+
+ + + + + + + + + +
+Packages that use RedirectFetchException.RedirectExceptionReason
crawlercommons.fetcher  
+  +

+ + + + + +
+Uses of RedirectFetchException.RedirectExceptionReason in crawlercommons.fetcher
+  +

+ + + + + + + + + + + + + + + + + +
Methods in crawlercommons.fetcher that return RedirectFetchException.RedirectExceptionReason
+ RedirectFetchException.RedirectExceptionReasonRedirectFetchException.getReason() + +
+           
+static RedirectFetchException.RedirectExceptionReasonRedirectFetchException.RedirectExceptionReason.valueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static RedirectFetchException.RedirectExceptionReason[]RedirectFetchException.RedirectExceptionReason.values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+  +

+ + + + + + + + +
Constructors in crawlercommons.fetcher with parameters of type RedirectFetchException.RedirectExceptionReason
RedirectFetchException(java.lang.String url, + java.lang.String redirectedUrl, + RedirectFetchException.RedirectExceptionReason reason) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.html new file mode 100644 index 0000000..35cd4c0 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.RedirectFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.RedirectFetchException

+
+No usage of crawlercommons.fetcher.RedirectFetchException +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/UrlFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/UrlFetchException.html new file mode 100644 index 0000000..61fd4df --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/class-use/UrlFetchException.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.UrlFetchException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.UrlFetchException

+
+No usage of crawlercommons.fetcher.UrlFetchException +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/file/SimpleFileFetcher.html b/doc/javadoc/crawlercommons/fetcher/file/SimpleFileFetcher.html new file mode 100644 index 0000000..769fe39 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/file/SimpleFileFetcher.html @@ -0,0 +1,322 @@ + + + + + + +SimpleFileFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher.file +
+Class SimpleFileFetcher

+
+java.lang.Object
+  extended by crawlercommons.fetcher.BaseFetcher
+      extended by crawlercommons.fetcher.file.SimpleFileFetcher
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class SimpleFileFetcher
extends BaseFetcher
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class crawlercommons.fetcher.BaseFetcher
_defaultMaxContentSize, _maxContentSizes, _validMimeTypes, DEFAULT_MAX_CONTENT_SIZE
+  + + + + + + + + + + +
+Constructor Summary
SimpleFileFetcher() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ voidabort() + +
+          Terminate any async request being processed.
+ FetchedResultget(java.lang.String url, + Payload payload) + +
+          Get the content stored in the resource referenced by
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetcher
addValidMimeType, addValidMimeTypes, get, getDefaultMaxContentSize, getMaxContentSize, getMimeTypeFromContentType, getValidMimeTypes, setDefaultMaxContentSize, setMaxContentSize, setValidMimeTypes
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SimpleFileFetcher

+
+public SimpleFileFetcher()
+
+
+ + + + + + + + +
+Method Detail
+ +

+get

+
+public FetchedResult get(java.lang.String url,
+                         Payload payload)
+                  throws BaseFetchException
+
+
Description copied from class: BaseFetcher
+
Get the content stored in the resource referenced by +

+

+
Specified by:
get in class BaseFetcher
+
+
+ +
Returns:
+
Throws: +
BaseFetchException
+
+
+
+ +

+abort

+
+public void abort()
+
+
Description copied from class: BaseFetcher
+
Terminate any async request being processed. +

+

+
Specified by:
abort in class BaseFetcher
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/file/class-use/SimpleFileFetcher.html b/doc/javadoc/crawlercommons/fetcher/file/class-use/SimpleFileFetcher.html new file mode 100644 index 0000000..fc757e0 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/file/class-use/SimpleFileFetcher.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.file.SimpleFileFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.file.SimpleFileFetcher

+
+No usage of crawlercommons.fetcher.file.SimpleFileFetcher +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-frame.html b/doc/javadoc/crawlercommons/fetcher/file/package-frame.html new file mode 100644 index 0000000..74cac2d --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/file/package-frame.html @@ -0,0 +1,32 @@ + + + + + + +crawlercommons.fetcher.file (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +crawlercommons.fetcher.file + + + + +
+Classes  + +
+SimpleFileFetcher
+ + + + diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-summary.html b/doc/javadoc/crawlercommons/fetcher/file/package-summary.html new file mode 100644 index 0000000..2c7781f --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/file/package-summary.html @@ -0,0 +1,157 @@ + + + + + + +crawlercommons.fetcher.file (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package crawlercommons.fetcher.file +

+ + + + + + + + + +
+Class Summary
SimpleFileFetcher 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-tree.html b/doc/javadoc/crawlercommons/fetcher/file/package-tree.html new file mode 100644 index 0000000..fb0f400 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/file/package-tree.html @@ -0,0 +1,156 @@ + + + + + + +crawlercommons.fetcher.file Class Hierarchy (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package crawlercommons.fetcher.file +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-use.html b/doc/javadoc/crawlercommons/fetcher/file/package-use.html new file mode 100644 index 0000000..e17fd25 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/file/package-use.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Package crawlercommons.fetcher.file (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
crawlercommons.fetcher.file

+
+No usage of crawlercommons.fetcher.file +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.RedirectMode.html b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.RedirectMode.html new file mode 100644 index 0000000..eeeeea1 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.RedirectMode.html @@ -0,0 +1,341 @@ + + + + + + +BaseHttpFetcher.RedirectMode (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher.http +
+Enum BaseHttpFetcher.RedirectMode

+
+java.lang.Object
+  extended by java.lang.Enum<BaseHttpFetcher.RedirectMode>
+      extended by crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<BaseHttpFetcher.RedirectMode>
+
+
+
Enclosing class:
BaseHttpFetcher
+
+
+
+
public static enum BaseHttpFetcher.RedirectMode
extends java.lang.Enum<BaseHttpFetcher.RedirectMode>
+ + +

+


+ +

+ + + + + + + + + + + + + + + + +
+Enum Constant Summary
FOLLOW_ALL + +
+           
FOLLOW_NONE + +
+           
FOLLOW_TEMP + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static BaseHttpFetcher.RedirectModevalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static BaseHttpFetcher.RedirectMode[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+FOLLOW_ALL

+
+public static final BaseHttpFetcher.RedirectMode FOLLOW_ALL
+
+
+
+
+
+ +

+FOLLOW_TEMP

+
+public static final BaseHttpFetcher.RedirectMode FOLLOW_TEMP
+
+
+
+
+
+ +

+FOLLOW_NONE

+
+public static final BaseHttpFetcher.RedirectMode FOLLOW_NONE
+
+
+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static BaseHttpFetcher.RedirectMode[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (BaseHttpFetcher.RedirectMode c : BaseHttpFetcher.RedirectMode.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static BaseHttpFetcher.RedirectMode valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.html new file mode 100644 index 0000000..6141bd6 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.html @@ -0,0 +1,781 @@ + + + + + + +BaseHttpFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher.http +
+Class BaseHttpFetcher

+
+java.lang.Object
+  extended by crawlercommons.fetcher.BaseFetcher
+      extended by crawlercommons.fetcher.http.BaseHttpFetcher
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
Direct Known Subclasses:
SimpleHttpFetcher
+
+
+
+
public abstract class BaseHttpFetcher
extends BaseFetcher
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + +
+Nested Class Summary
+static classBaseHttpFetcher.RedirectMode + +
+           
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+protected  java.lang.String_acceptLanguage + +
+           
+protected  int_maxConnectionsPerHost + +
+           
+protected  int_maxRedirects + +
+           
+protected  int_maxThreads + +
+           
+protected  int_minResponseRate + +
+           
+protected  BaseHttpFetcher.RedirectMode_redirectMode + +
+           
+protected  UserAgent_userAgent + +
+           
+static java.lang.StringDEFAULT_ACCEPT_LANGUAGE + +
+           
+static intDEFAULT_MAX_CONNECTIONS_PER_HOST + +
+           
+static intDEFAULT_MAX_REDIRECTS + +
+           
+static intDEFAULT_MIN_RESPONSE_RATE + +
+           
+static BaseHttpFetcher.RedirectModeDEFAULT_REDIRECT_MODE + +
+           
+static intNO_MIN_RESPONSE_RATE + +
+           
+static intNO_REDIRECTS + +
+           
+ + + + + + + +
Fields inherited from class crawlercommons.fetcher.BaseFetcher
_defaultMaxContentSize, _maxContentSizes, _validMimeTypes, DEFAULT_MAX_CONTENT_SIZE
+  + + + + + + + + + + +
+Constructor Summary
BaseHttpFetcher(int maxThreads, + UserAgent userAgent) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ java.lang.StringgetAcceptLanguage() + +
+           
+ intgetMaxConnectionsPerHost() + +
+           
+ intgetMaxRedirects() + +
+           
+ intgetMaxThreads() + +
+           
+ intgetMinResponseRate() + +
+          Return the minimum response rate.
+ BaseHttpFetcher.RedirectModegetRedirectMode() + +
+           
+ UserAgentgetUserAgent() + +
+           
+ voidsetAcceptLanguage(java.lang.String acceptLanguage) + +
+           
+ voidsetMaxConnectionsPerHost(int maxConnectionsPerHost) + +
+           
+ voidsetMaxRedirects(int maxRedirects) + +
+           
+ voidsetMinResponseRate(int minResponseRate) + +
+           
+ voidsetRedirectMode(BaseHttpFetcher.RedirectMode mode) + +
+           
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetcher
abort, addValidMimeType, addValidMimeTypes, get, get, getDefaultMaxContentSize, getMaxContentSize, getMimeTypeFromContentType, getValidMimeTypes, setDefaultMaxContentSize, setMaxContentSize, setValidMimeTypes
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+NO_MIN_RESPONSE_RATE

+
+public static final int NO_MIN_RESPONSE_RATE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+NO_REDIRECTS

+
+public static final int NO_REDIRECTS
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DEFAULT_MIN_RESPONSE_RATE

+
+public static final int DEFAULT_MIN_RESPONSE_RATE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DEFAULT_MAX_CONNECTIONS_PER_HOST

+
+public static final int DEFAULT_MAX_CONNECTIONS_PER_HOST
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DEFAULT_MAX_REDIRECTS

+
+public static final int DEFAULT_MAX_REDIRECTS
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DEFAULT_ACCEPT_LANGUAGE

+
+public static final java.lang.String DEFAULT_ACCEPT_LANGUAGE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DEFAULT_REDIRECT_MODE

+
+public static final BaseHttpFetcher.RedirectMode DEFAULT_REDIRECT_MODE
+
+
+
+
+
+ +

+_maxThreads

+
+protected int _maxThreads
+
+
+
+
+
+ +

+_userAgent

+
+protected UserAgent _userAgent
+
+
+
+
+
+ +

+_maxRedirects

+
+protected int _maxRedirects
+
+
+
+
+
+ +

+_maxConnectionsPerHost

+
+protected int _maxConnectionsPerHost
+
+
+
+
+
+ +

+_minResponseRate

+
+protected int _minResponseRate
+
+
+
+
+
+ +

+_acceptLanguage

+
+protected java.lang.String _acceptLanguage
+
+
+
+
+
+ +

+_redirectMode

+
+protected BaseHttpFetcher.RedirectMode _redirectMode
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+BaseHttpFetcher

+
+public BaseHttpFetcher(int maxThreads,
+                       UserAgent userAgent)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getMaxThreads

+
+public int getMaxThreads()
+
+
+
+
+
+
+ +

+getUserAgent

+
+public UserAgent getUserAgent()
+
+
+
+
+
+
+ +

+setMaxConnectionsPerHost

+
+public void setMaxConnectionsPerHost(int maxConnectionsPerHost)
+
+
+
+
+
+
+ +

+getMaxConnectionsPerHost

+
+public int getMaxConnectionsPerHost()
+
+
+
+
+
+
+ +

+setMinResponseRate

+
+public void setMinResponseRate(int minResponseRate)
+
+
+
+
+
+
+ +

+getMinResponseRate

+
+public int getMinResponseRate()
+
+
Return the minimum response rate. If the speed at which bytes are being returned + from the server drops below this, the fetch of that page will be aborted. +

+

+ +
Returns:
bytes/second
+
+
+
+ +

+setAcceptLanguage

+
+public void setAcceptLanguage(java.lang.String acceptLanguage)
+
+
+
+
+
+
+ +

+getAcceptLanguage

+
+public java.lang.String getAcceptLanguage()
+
+
+
+
+
+
+ +

+setMaxRedirects

+
+public void setMaxRedirects(int maxRedirects)
+
+
+
+
+
+
+ +

+getMaxRedirects

+
+public int getMaxRedirects()
+
+
+
+
+
+
+ +

+setRedirectMode

+
+public void setRedirectMode(BaseHttpFetcher.RedirectMode mode)
+
+
+
+
+
+
+ +

+getRedirectMode

+
+public BaseHttpFetcher.RedirectMode getRedirectMode()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/SimpleHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/SimpleHttpFetcher.html new file mode 100644 index 0000000..2b4346f --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/SimpleHttpFetcher.html @@ -0,0 +1,576 @@ + + + + + + +SimpleHttpFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher.http +
+Class SimpleHttpFetcher

+
+java.lang.Object
+  extended by crawlercommons.fetcher.BaseFetcher
+      extended by crawlercommons.fetcher.http.BaseHttpFetcher
+          extended by crawlercommons.fetcher.http.SimpleHttpFetcher
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class SimpleHttpFetcher
extends BaseHttpFetcher
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class crawlercommons.fetcher.http.BaseHttpFetcher
BaseHttpFetcher.RedirectMode
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class crawlercommons.fetcher.http.BaseHttpFetcher
_acceptLanguage, _maxConnectionsPerHost, _maxRedirects, _maxThreads, _minResponseRate, _redirectMode, _userAgent, DEFAULT_ACCEPT_LANGUAGE, DEFAULT_MAX_CONNECTIONS_PER_HOST, DEFAULT_MAX_REDIRECTS, DEFAULT_MIN_RESPONSE_RATE, DEFAULT_REDIRECT_MODE, NO_MIN_RESPONSE_RATE, NO_REDIRECTS
+ + + + + + + +
Fields inherited from class crawlercommons.fetcher.BaseFetcher
_defaultMaxContentSize, _maxContentSizes, _validMimeTypes, DEFAULT_MAX_CONTENT_SIZE
+  + + + + + + + + + + + + + +
+Constructor Summary
SimpleHttpFetcher(int maxThreads, + UserAgent userAgent) + +
+           
SimpleHttpFetcher(UserAgent userAgent) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidabort() + +
+          Terminate any async request being processed.
+ FetchedResultfetch(org.apache.http.client.methods.HttpRequestBase request, + java.lang.String url, + Payload payload) + +
+           
+ FetchedResultfetch(java.lang.String url) + +
+           
+ FetchedResultget(java.lang.String url, + Payload payload) + +
+          Get the content stored in the resource referenced by
+ intgetConnectionTimeout() + +
+           
+ org.apache.http.HttpVersiongetHttpVersion() + +
+           
+ intgetMaxRetryCount() + +
+           
+ intgetSocketTimeout() + +
+           
+ voidsetConnectionTimeout(int connectionTimeoutInMs) + +
+           
+ voidsetHttpVersion(org.apache.http.HttpVersion httpVersion) + +
+           
+ voidsetMaxRetryCount(int maxRetryCount) + +
+           
+ voidsetSocketTimeout(int socketTimeoutInMs) + +
+           
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.http.BaseHttpFetcher
getAcceptLanguage, getMaxConnectionsPerHost, getMaxRedirects, getMaxThreads, getMinResponseRate, getRedirectMode, getUserAgent, setAcceptLanguage, setMaxConnectionsPerHost, setMaxRedirects, setMinResponseRate, setRedirectMode
+ + + + + + + +
Methods inherited from class crawlercommons.fetcher.BaseFetcher
addValidMimeType, addValidMimeTypes, get, getDefaultMaxContentSize, getMaxContentSize, getMimeTypeFromContentType, getValidMimeTypes, setDefaultMaxContentSize, setMaxContentSize, setValidMimeTypes
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SimpleHttpFetcher

+
+public SimpleHttpFetcher(UserAgent userAgent)
+
+
+
+ +

+SimpleHttpFetcher

+
+public SimpleHttpFetcher(int maxThreads,
+                         UserAgent userAgent)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getHttpVersion

+
+public org.apache.http.HttpVersion getHttpVersion()
+
+
+
+
+
+
+ +

+setHttpVersion

+
+public void setHttpVersion(org.apache.http.HttpVersion httpVersion)
+
+
+
+
+
+
+ +

+getSocketTimeout

+
+public int getSocketTimeout()
+
+
+
+
+
+
+ +

+setSocketTimeout

+
+public void setSocketTimeout(int socketTimeoutInMs)
+
+
+
+
+
+
+ +

+getConnectionTimeout

+
+public int getConnectionTimeout()
+
+
+
+
+
+
+ +

+setConnectionTimeout

+
+public void setConnectionTimeout(int connectionTimeoutInMs)
+
+
+
+
+
+
+ +

+getMaxRetryCount

+
+public int getMaxRetryCount()
+
+
+
+
+
+
+ +

+setMaxRetryCount

+
+public void setMaxRetryCount(int maxRetryCount)
+
+
+
+
+
+
+ +

+get

+
+public FetchedResult get(java.lang.String url,
+                         Payload payload)
+                  throws BaseFetchException
+
+
Description copied from class: BaseFetcher
+
Get the content stored in the resource referenced by +

+

+
Specified by:
get in class BaseFetcher
+
+
+ +
Returns:
+
Throws: +
BaseFetchException
+
+
+
+ +

+fetch

+
+public FetchedResult fetch(java.lang.String url)
+                    throws BaseFetchException
+
+
+ +
Throws: +
BaseFetchException
+
+
+
+ +

+fetch

+
+public FetchedResult fetch(org.apache.http.client.methods.HttpRequestBase request,
+                           java.lang.String url,
+                           Payload payload)
+                    throws BaseFetchException
+
+
+ +
Throws: +
BaseFetchException
+
+
+
+ +

+abort

+
+public void abort()
+
+
Description copied from class: BaseFetcher
+
Terminate any async request being processed. +

+

+
Specified by:
abort in class BaseFetcher
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/UserAgent.html b/doc/javadoc/crawlercommons/fetcher/http/UserAgent.html new file mode 100644 index 0000000..dce416b --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/UserAgent.html @@ -0,0 +1,364 @@ + + + + + + +UserAgent (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.fetcher.http +
+Class UserAgent

+
+java.lang.Object
+  extended by crawlercommons.fetcher.http.UserAgent
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class UserAgent
extends java.lang.Object
implements java.io.Serializable
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + +
+Field Summary
+static java.lang.StringDEFAULT_BROWSER_VERSION + +
+           
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
UserAgent(java.lang.String agentName, + java.lang.String emailAddress, + java.lang.String webAddress) + +
+           
UserAgent(java.lang.String agentName, + java.lang.String emailAddress, + java.lang.String webAddress, + java.lang.String browserVersion) + +
+           
UserAgent(java.lang.String agentName, + java.lang.String emailAddress, + java.lang.String webAddress, + java.lang.String browserVersion, + java.lang.String crawlerVersion) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ java.lang.StringgetAgentName() + +
+           
+ java.lang.StringgetUserAgentString() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+DEFAULT_BROWSER_VERSION

+
+public static final java.lang.String DEFAULT_BROWSER_VERSION
+
+
+
See Also:
Constant Field Values
+
+ + + + + + + + +
+Constructor Detail
+ +

+UserAgent

+
+public UserAgent(java.lang.String agentName,
+                 java.lang.String emailAddress,
+                 java.lang.String webAddress)
+
+
+
+ +

+UserAgent

+
+public UserAgent(java.lang.String agentName,
+                 java.lang.String emailAddress,
+                 java.lang.String webAddress,
+                 java.lang.String browserVersion)
+
+
+
+ +

+UserAgent

+
+public UserAgent(java.lang.String agentName,
+                 java.lang.String emailAddress,
+                 java.lang.String webAddress,
+                 java.lang.String browserVersion,
+                 java.lang.String crawlerVersion)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getAgentName

+
+public java.lang.String getAgentName()
+
+
+
+
+
+
+
+
+
+ +

+getUserAgentString

+
+public java.lang.String getUserAgentString()
+
+
+
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.RedirectMode.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.RedirectMode.html new file mode 100644 index 0000000..2d71275 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.RedirectMode.html @@ -0,0 +1,237 @@ + + + + + + +Uses of Class crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode

+
+ + + + + + + + + +
+Packages that use BaseHttpFetcher.RedirectMode
crawlercommons.fetcher.http  
+  +

+ + + + + +
+Uses of BaseHttpFetcher.RedirectMode in crawlercommons.fetcher.http
+  +

+ + + + + + + + + + + + + +
Fields in crawlercommons.fetcher.http declared as BaseHttpFetcher.RedirectMode
+protected  BaseHttpFetcher.RedirectModeBaseHttpFetcher._redirectMode + +
+           
+static BaseHttpFetcher.RedirectModeBaseHttpFetcher.DEFAULT_REDIRECT_MODE + +
+           
+  +

+ + + + + + + + + + + + + + + + + +
Methods in crawlercommons.fetcher.http that return BaseHttpFetcher.RedirectMode
+ BaseHttpFetcher.RedirectModeBaseHttpFetcher.getRedirectMode() + +
+           
+static BaseHttpFetcher.RedirectModeBaseHttpFetcher.RedirectMode.valueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static BaseHttpFetcher.RedirectMode[]BaseHttpFetcher.RedirectMode.values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher.http with parameters of type BaseHttpFetcher.RedirectMode
+ voidBaseHttpFetcher.setRedirectMode(BaseHttpFetcher.RedirectMode mode) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.html new file mode 100644 index 0000000..2833c3d --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.html @@ -0,0 +1,244 @@ + + + + + + +Uses of Class crawlercommons.fetcher.http.BaseHttpFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.http.BaseHttpFetcher

+
+ + + + + + + + + + + + + +
+Packages that use BaseHttpFetcher
crawlercommons.fetcher.http  
crawlercommons.robots  
+  +

+ + + + + +
+Uses of BaseHttpFetcher in crawlercommons.fetcher.http
+  +

+ + + + + + + + + +
Subclasses of BaseHttpFetcher in crawlercommons.fetcher.http
+ classSimpleHttpFetcher + +
+           
+  +

+ + + + + +
+Uses of BaseHttpFetcher in crawlercommons.robots
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.robots that return BaseHttpFetcher
+static BaseHttpFetcherRobotUtils.createFetcher(BaseHttpFetcher fetcher) + +
+           
+static BaseHttpFetcherRobotUtils.createFetcher(UserAgent userAgent, + int maxThreads) + +
+           
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.robots with parameters of type BaseHttpFetcher
+static BaseHttpFetcherRobotUtils.createFetcher(BaseHttpFetcher fetcher) + +
+           
+static BaseRobotRulesRobotUtils.getRobotRules(BaseHttpFetcher fetcher, + BaseRobotsParser parser, + java.net.URL robotsUrl) + +
+          Externally visible, static method for use in tools and for testing.
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/SimpleHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/SimpleHttpFetcher.html new file mode 100644 index 0000000..9977655 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/SimpleHttpFetcher.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.fetcher.http.SimpleHttpFetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.http.SimpleHttpFetcher

+
+No usage of crawlercommons.fetcher.http.SimpleHttpFetcher +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/UserAgent.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/UserAgent.html new file mode 100644 index 0000000..97b0f42 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/UserAgent.html @@ -0,0 +1,254 @@ + + + + + + +Uses of Class crawlercommons.fetcher.http.UserAgent (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.fetcher.http.UserAgent

+
+ + + + + + + + + + + + + +
+Packages that use UserAgent
crawlercommons.fetcher.http  
crawlercommons.robots  
+  +

+ + + + + +
+Uses of UserAgent in crawlercommons.fetcher.http
+  +

+ + + + + + + + + +
Fields in crawlercommons.fetcher.http declared as UserAgent
+protected  UserAgentBaseHttpFetcher._userAgent + +
+           
+  +

+ + + + + + + + + +
Methods in crawlercommons.fetcher.http that return UserAgent
+ UserAgentBaseHttpFetcher.getUserAgent() + +
+           
+  +

+ + + + + + + + + + + + + + +
Constructors in crawlercommons.fetcher.http with parameters of type UserAgent
BaseHttpFetcher(int maxThreads, + UserAgent userAgent) + +
+           
SimpleHttpFetcher(int maxThreads, + UserAgent userAgent) + +
+           
SimpleHttpFetcher(UserAgent userAgent) + +
+           
+  +

+ + + + + +
+Uses of UserAgent in crawlercommons.robots
+  +

+ + + + + + + + + +
Methods in crawlercommons.robots with parameters of type UserAgent
+static BaseHttpFetcherRobotUtils.createFetcher(UserAgent userAgent, + int maxThreads) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-frame.html b/doc/javadoc/crawlercommons/fetcher/http/package-frame.html new file mode 100644 index 0000000..0c89c02 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/package-frame.html @@ -0,0 +1,47 @@ + + + + + + +crawlercommons.fetcher.http (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +crawlercommons.fetcher.http + + + + +
+Classes  + +
+BaseHttpFetcher +
+SimpleHttpFetcher +
+UserAgent
+ + + + + + +
+Enums  + +
+BaseHttpFetcher.RedirectMode
+ + + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-summary.html b/doc/javadoc/crawlercommons/fetcher/http/package-summary.html new file mode 100644 index 0000000..d2cfe75 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/package-summary.html @@ -0,0 +1,179 @@ + + + + + + +crawlercommons.fetcher.http (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package crawlercommons.fetcher.http +

+ + + + + + + + + + + + + + + + + +
+Class Summary
BaseHttpFetcher 
SimpleHttpFetcher 
UserAgent 
+  + +

+ + + + + + + + + +
+Enum Summary
BaseHttpFetcher.RedirectMode 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-tree.html b/doc/javadoc/crawlercommons/fetcher/http/package-tree.html new file mode 100644 index 0000000..2414eca --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/package-tree.html @@ -0,0 +1,169 @@ + + + + + + +crawlercommons.fetcher.http Class Hierarchy (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package crawlercommons.fetcher.http +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Enum Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-use.html b/doc/javadoc/crawlercommons/fetcher/http/package-use.html new file mode 100644 index 0000000..4041eb5 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/http/package-use.html @@ -0,0 +1,207 @@ + + + + + + +Uses of Package crawlercommons.fetcher.http (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
crawlercommons.fetcher.http

+
+ + + + + + + + + + + + + +
+Packages that use crawlercommons.fetcher.http
crawlercommons.fetcher.http  
crawlercommons.robots  
+  +

+ + + + + + + + + + + + + + +
+Classes in crawlercommons.fetcher.http used by crawlercommons.fetcher.http
BaseHttpFetcher + +
+           
BaseHttpFetcher.RedirectMode + +
+           
UserAgent + +
+           
+  +

+ + + + + + + + + + + +
+Classes in crawlercommons.fetcher.http used by crawlercommons.robots
BaseHttpFetcher + +
+           
UserAgent + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/package-frame.html b/doc/javadoc/crawlercommons/fetcher/package-frame.html new file mode 100644 index 0000000..55535af --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/package-frame.html @@ -0,0 +1,76 @@ + + + + + + +crawlercommons.fetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +crawlercommons.fetcher + + + + +
+Classes  + +
+BaseFetcher +
+EncodingUtils +
+EncodingUtils.ExpandedResult +
+FetchedResult +
+Payload
+ + + + + + +
+Enums  + +
+AbortedFetchReason +
+RedirectFetchException.RedirectExceptionReason
+ + + + + + +
+Exceptions  + +
+AbortedFetchException +
+BadProtocolFetchException +
+BaseFetchException +
+HttpFetchException +
+IOFetchException +
+RedirectFetchException +
+UrlFetchException
+ + + + diff --git a/doc/javadoc/crawlercommons/fetcher/package-summary.html b/doc/javadoc/crawlercommons/fetcher/package-summary.html new file mode 100644 index 0000000..08fdd62 --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/package-summary.html @@ -0,0 +1,229 @@ + + + + + + +crawlercommons.fetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package crawlercommons.fetcher +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
BaseFetcher 
EncodingUtils 
EncodingUtils.ExpandedResult 
FetchedResult 
Payload 
+  + +

+ + + + + + + + + + + + + +
+Enum Summary
AbortedFetchReason 
RedirectFetchException.RedirectExceptionReason 
+  + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Exception Summary
AbortedFetchException 
BadProtocolFetchException 
BaseFetchException 
HttpFetchException 
IOFetchException 
RedirectFetchException 
UrlFetchException 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/package-tree.html b/doc/javadoc/crawlercommons/fetcher/package-tree.html new file mode 100644 index 0000000..203e63e --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/package-tree.html @@ -0,0 +1,172 @@ + + + + + + +crawlercommons.fetcher Class Hierarchy (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package crawlercommons.fetcher +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Enum Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/fetcher/package-use.html b/doc/javadoc/crawlercommons/fetcher/package-use.html new file mode 100644 index 0000000..42264bb --- /dev/null +++ b/doc/javadoc/crawlercommons/fetcher/package-use.html @@ -0,0 +1,274 @@ + + + + + + +Uses of Package crawlercommons.fetcher (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
crawlercommons.fetcher

+
+ + + + + + + + + + + + + + + + + +
+Packages that use crawlercommons.fetcher
crawlercommons.fetcher  
crawlercommons.fetcher.file  
crawlercommons.fetcher.http  
+  +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Classes in crawlercommons.fetcher used by crawlercommons.fetcher
AbortedFetchReason + +
+           
BaseFetchException + +
+           
EncodingUtils.ExpandedResult + +
+           
FetchedResult + +
+           
Payload + +
+           
RedirectFetchException.RedirectExceptionReason + +
+           
+  +

+ + + + + + + + + + + + + + + + + +
+Classes in crawlercommons.fetcher used by crawlercommons.fetcher.file
BaseFetcher + +
+           
BaseFetchException + +
+           
FetchedResult + +
+           
Payload + +
+           
+  +

+ + + + + + + + + + + + + + + + + +
+Classes in crawlercommons.fetcher used by crawlercommons.fetcher.http
BaseFetcher + +
+           
BaseFetchException + +
+           
FetchedResult + +
+           
Payload + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/BaseRobotRules.html b/doc/javadoc/crawlercommons/robots/BaseRobotRules.html new file mode 100644 index 0000000..08c99c6 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/BaseRobotRules.html @@ -0,0 +1,448 @@ + + + + + + +BaseRobotRules (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.robots +
+Class BaseRobotRules

+
+java.lang.Object
+  extended by crawlercommons.robots.BaseRobotRules
+
+
+
Direct Known Subclasses:
SimpleRobotRules
+
+
+
+
public abstract class BaseRobotRules
extends java.lang.Object
+ + +

+Result from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay. +

+ +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+static longUNSET_CRAWL_DELAY + +
+           
+  + + + + + + + + + + +
+Constructor Summary
BaseRobotRules() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddSitemap(java.lang.String sitemap) + +
+           
+ longgetCrawlDelay() + +
+           
+ java.util.List<java.lang.String>getSitemaps() + +
+           
+abstract  booleanisAllowAll() + +
+           
+abstract  booleanisAllowed(java.lang.String url) + +
+           
+abstract  booleanisAllowNone() + +
+           
+ booleanisDeferVisits() + +
+           
+ voidsetCrawlDelay(long crawlDelay) + +
+           
+ voidsetDeferVisits(boolean deferVisits) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+UNSET_CRAWL_DELAY

+
+public static final long UNSET_CRAWL_DELAY
+
+
+
See Also:
Constant Field Values
+
+ + + + + + + + +
+Constructor Detail
+ +

+BaseRobotRules

+
+public BaseRobotRules()
+
+
+ + + + + + + + +
+Method Detail
+ +

+isAllowed

+
+public abstract boolean isAllowed(java.lang.String url)
+
+
+
+
+
+
+ +

+isAllowAll

+
+public abstract boolean isAllowAll()
+
+
+
+
+
+
+ +

+isAllowNone

+
+public abstract boolean isAllowNone()
+
+
+
+
+
+
+ +

+getCrawlDelay

+
+public long getCrawlDelay()
+
+
+
+
+
+
+ +

+setCrawlDelay

+
+public void setCrawlDelay(long crawlDelay)
+
+
+
+
+
+
+ +

+isDeferVisits

+
+public boolean isDeferVisits()
+
+
+
+
+
+
+ +

+setDeferVisits

+
+public void setDeferVisits(boolean deferVisits)
+
+
+
+
+
+
+ +

+addSitemap

+
+public void addSitemap(java.lang.String sitemap)
+
+
+
+
+
+
+ +

+getSitemaps

+
+public java.util.List<java.lang.String> getSitemaps()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/BaseRobotsParser.html b/doc/javadoc/crawlercommons/robots/BaseRobotsParser.html new file mode 100644 index 0000000..15a5b09 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/BaseRobotsParser.html @@ -0,0 +1,302 @@ + + + + + + +BaseRobotsParser (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.robots +
+Class BaseRobotsParser

+
+java.lang.Object
+  extended by crawlercommons.robots.BaseRobotsParser
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
Direct Known Subclasses:
SimpleRobotRulesParser
+
+
+
+
public abstract class BaseRobotsParser
extends java.lang.Object
implements java.io.Serializable
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + +
+Constructor Summary
BaseRobotsParser() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+abstract  BaseRobotRulesfailedFetch(int httpStatusCode) + +
+          The fetch of robots.txt failed, so return rules appropriate give the + HTTP status code.
+abstract  BaseRobotRulesparseContent(java.lang.String url, + byte[] content, + java.lang.String contentType, + java.lang.String robotName) + +
+          Parse the robots.txt file in , and return rules appropriate for + processing paths by
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+BaseRobotsParser

+
+public BaseRobotsParser()
+
+
+ + + + + + + + +
+Method Detail
+ +

+parseContent

+
+public abstract BaseRobotRules parseContent(java.lang.String url,
+                                            byte[] content,
+                                            java.lang.String contentType,
+                                            java.lang.String robotName)
+
+
Parse the robots.txt file in , and return rules appropriate for + processing paths by +

+

+
+
+
+
Parameters:
url - URL that content was fetched from (for reporting purposes)
content - raw bytes from the site's robots.txt file
contentType - HTTP response header (mime-type)
robotName - name of crawler, to be used when processing file contents + (just the name portion, w/o version or other details) +
Returns:
robot rules.
+
+
+
+ +

+failedFetch

+
+public abstract BaseRobotRules failedFetch(int httpStatusCode)
+
+
The fetch of robots.txt failed, so return rules appropriate give the + HTTP status code. +

+

+
+
+
+
Parameters:
httpStatusCode - a failure status code (NOT 2xx) +
Returns:
robot rules
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/RobotUtils.html b/doc/javadoc/crawlercommons/robots/RobotUtils.html new file mode 100644 index 0000000..3827c36 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/RobotUtils.html @@ -0,0 +1,319 @@ + + + + + + +RobotUtils (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.robots +
+Class RobotUtils

+
+java.lang.Object
+  extended by crawlercommons.robots.RobotUtils
+
+
+
+
public class RobotUtils
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
RobotUtils() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static BaseHttpFetchercreateFetcher(BaseHttpFetcher fetcher) + +
+           
+static BaseHttpFetchercreateFetcher(UserAgent userAgent, + int maxThreads) + +
+           
+static longgetMaxFetchTime() + +
+           
+static BaseRobotRulesgetRobotRules(BaseHttpFetcher fetcher, + BaseRobotsParser parser, + java.net.URL robotsUrl) + +
+          Externally visible, static method for use in tools and for testing.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+RobotUtils

+
+public RobotUtils()
+
+
+ + + + + + + + +
+Method Detail
+ +

+createFetcher

+
+public static BaseHttpFetcher createFetcher(BaseHttpFetcher fetcher)
+
+
+
+
+
+
+ +

+createFetcher

+
+public static BaseHttpFetcher createFetcher(UserAgent userAgent,
+                                            int maxThreads)
+
+
+
+
+
+
+ +

+getMaxFetchTime

+
+public static long getMaxFetchTime()
+
+
+
+
+
+
+ +

+getRobotRules

+
+public static BaseRobotRules getRobotRules(BaseHttpFetcher fetcher,
+                                           BaseRobotsParser parser,
+                                           java.net.URL robotsUrl)
+
+
Externally visible, static method for use in tools and for testing. + Fetch the indicated robots.txt file, parse it, and generate rules. +

+

+
Parameters:
fetcher - Fetcher for downloading robots.txt file
robotsUrl - URL to robots.txt file +
Returns:
Robot rules
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRule.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRule.html new file mode 100644 index 0000000..01c43e9 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRule.html @@ -0,0 +1,249 @@ + + + + + + +SimpleRobotRules.RobotRule (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.robots +
+Class SimpleRobotRules.RobotRule

+
+java.lang.Object
+  extended by crawlercommons.robots.SimpleRobotRules.RobotRule
+
+
+
Enclosing class:
SimpleRobotRules
+
+
+
+
protected class SimpleRobotRules.RobotRule
extends java.lang.Object
+ + +

+Single rule that maps from a path prefix to an allow flag. +

+ +

+


+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
SimpleRobotRules.RobotRule(java.util.regex.Pattern pattern, + boolean allow) + +
+           
SimpleRobotRules.RobotRule(java.lang.String prefix, + boolean allow) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SimpleRobotRules.RobotRule

+
+public SimpleRobotRules.RobotRule(java.lang.String prefix,
+                                  boolean allow)
+
+
+
+ +

+SimpleRobotRules.RobotRule

+
+public SimpleRobotRules.RobotRule(java.util.regex.Pattern pattern,
+                                  boolean allow)
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRulesMode.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRulesMode.html new file mode 100644 index 0000000..99e2b1e --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRulesMode.html @@ -0,0 +1,341 @@ + + + + + + +SimpleRobotRules.RobotRulesMode (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.robots +
+Enum SimpleRobotRules.RobotRulesMode

+
+java.lang.Object
+  extended by java.lang.Enum<SimpleRobotRules.RobotRulesMode>
+      extended by crawlercommons.robots.SimpleRobotRules.RobotRulesMode
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<SimpleRobotRules.RobotRulesMode>
+
+
+
Enclosing class:
SimpleRobotRules
+
+
+
+
public static enum SimpleRobotRules.RobotRulesMode
extends java.lang.Enum<SimpleRobotRules.RobotRulesMode>
+ + +

+


+ +

+ + + + + + + + + + + + + + + + +
+Enum Constant Summary
ALLOW_ALL + +
+           
ALLOW_NONE + +
+           
ALLOW_SOME + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static SimpleRobotRules.RobotRulesModevalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static SimpleRobotRules.RobotRulesMode[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+ALLOW_ALL

+
+public static final SimpleRobotRules.RobotRulesMode ALLOW_ALL
+
+
+
+
+
+ +

+ALLOW_NONE

+
+public static final SimpleRobotRules.RobotRulesMode ALLOW_NONE
+
+
+
+
+
+ +

+ALLOW_SOME

+
+public static final SimpleRobotRules.RobotRulesMode ALLOW_SOME
+
+
+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static SimpleRobotRules.RobotRulesMode[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (SimpleRobotRules.RobotRulesMode c : SimpleRobotRules.RobotRulesMode.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static SimpleRobotRules.RobotRulesMode valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRules.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.html new file mode 100644 index 0000000..8e6e326 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.html @@ -0,0 +1,417 @@ + + + + + + +SimpleRobotRules (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.robots +
+Class SimpleRobotRules

+
+java.lang.Object
+  extended by crawlercommons.robots.BaseRobotRules
+      extended by crawlercommons.robots.SimpleRobotRules
+
+
+
+
public class SimpleRobotRules
extends BaseRobotRules
+ + +

+Result from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay. +

+ +

+


+ +

+ + + + + + + + + + + + + + + +
+Nested Class Summary
+protected  classSimpleRobotRules.RobotRule + +
+          Single rule that maps from a path prefix to an allow flag.
+static classSimpleRobotRules.RobotRulesMode + +
+           
+ + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class crawlercommons.robots.BaseRobotRules
UNSET_CRAWL_DELAY
+  + + + + + + + + + + + + + +
+Constructor Summary
SimpleRobotRules() + +
+           
SimpleRobotRules(SimpleRobotRules.RobotRulesMode mode) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddRule(java.lang.String prefix, + boolean allow) + +
+           
+ voidclearRules() + +
+           
+ booleanisAllowAll() + +
+          Is our ruleset set up to allow all access?
+ booleanisAllowed(java.lang.String url) + +
+           
+ booleanisAllowNone() + +
+          Is our ruleset set up to disallow all access?
+ + + + + + + +
Methods inherited from class crawlercommons.robots.BaseRobotRules
addSitemap, getCrawlDelay, getSitemaps, isDeferVisits, setCrawlDelay, setDeferVisits
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SimpleRobotRules

+
+public SimpleRobotRules()
+
+
+
+ +

+SimpleRobotRules

+
+public SimpleRobotRules(SimpleRobotRules.RobotRulesMode mode)
+
+
+ + + + + + + + +
+Method Detail
+ +

+clearRules

+
+public void clearRules()
+
+
+
+
+
+
+ +

+addRule

+
+public void addRule(java.lang.String prefix,
+                    boolean allow)
+
+
+
+
+
+
+ +

+isAllowed

+
+public boolean isAllowed(java.lang.String url)
+
+
+
Specified by:
isAllowed in class BaseRobotRules
+
+
+
+
+
+
+ +

+isAllowAll

+
+public boolean isAllowAll()
+
+
Is our ruleset set up to allow all access? +

+

+
Specified by:
isAllowAll in class BaseRobotRules
+
+
+ +
Returns:
true if all URLs are allowed.
+
+
+
+ +

+isAllowNone

+
+public boolean isAllowNone()
+
+
Is our ruleset set up to disallow all access? +

+

+
Specified by:
isAllowNone in class BaseRobotRules
+
+
+ +
Returns:
true if no URLs are allowed.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRulesParser.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRulesParser.html new file mode 100644 index 0000000..4659242 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRulesParser.html @@ -0,0 +1,321 @@ + + + + + + +SimpleRobotRulesParser (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.robots +
+Class SimpleRobotRulesParser

+
+java.lang.Object
+  extended by crawlercommons.robots.BaseRobotsParser
+      extended by crawlercommons.robots.SimpleRobotRulesParser
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class SimpleRobotRulesParser
extends BaseRobotsParser
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + +
+Constructor Summary
SimpleRobotRulesParser() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ BaseRobotRulesfailedFetch(int httpStatusCode) + +
+          The fetch of robots.txt failed, so return rules appropriate give the + HTTP status code.
+ intgetNumWarnings() + +
+           
+ BaseRobotRulesparseContent(java.lang.String url, + byte[] content, + java.lang.String contentType, + java.lang.String robotName) + +
+          Parse the robots.txt file in , and return rules appropriate for + processing paths by
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SimpleRobotRulesParser

+
+public SimpleRobotRulesParser()
+
+
+ + + + + + + + +
+Method Detail
+ +

+failedFetch

+
+public BaseRobotRules failedFetch(int httpStatusCode)
+
+
Description copied from class: BaseRobotsParser
+
The fetch of robots.txt failed, so return rules appropriate give the + HTTP status code. +

+

+
Specified by:
failedFetch in class BaseRobotsParser
+
+
+
Parameters:
httpStatusCode - a failure status code (NOT 2xx) +
Returns:
robot rules
+
+
+
+ +

+parseContent

+
+public BaseRobotRules parseContent(java.lang.String url,
+                                   byte[] content,
+                                   java.lang.String contentType,
+                                   java.lang.String robotName)
+
+
Description copied from class: BaseRobotsParser
+
Parse the robots.txt file in , and return rules appropriate for + processing paths by +

+

+
Specified by:
parseContent in class BaseRobotsParser
+
+
+
Parameters:
url - URL that content was fetched from (for reporting purposes)
content - raw bytes from the site's robots.txt file
contentType - HTTP response header (mime-type)
robotName - name of crawler, to be used when processing file contents + (just the name portion, w/o version or other details) +
Returns:
robot rules.
+
+
+
+ +

+getNumWarnings

+
+public int getNumWarnings()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/class-use/BaseRobotRules.html b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotRules.html new file mode 100644 index 0000000..01648e0 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotRules.html @@ -0,0 +1,239 @@ + + + + + + +Uses of Class crawlercommons.robots.BaseRobotRules (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.robots.BaseRobotRules

+
+ + + + + + + + + +
+Packages that use BaseRobotRules
crawlercommons.robots  
+  +

+ + + + + +
+Uses of BaseRobotRules in crawlercommons.robots
+  +

+ + + + + + + + + +
Subclasses of BaseRobotRules in crawlercommons.robots
+ classSimpleRobotRules + +
+          Result from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay.
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in crawlercommons.robots that return BaseRobotRules
+ BaseRobotRulesSimpleRobotRulesParser.failedFetch(int httpStatusCode) + +
+           
+abstract  BaseRobotRulesBaseRobotsParser.failedFetch(int httpStatusCode) + +
+          The fetch of robots.txt failed, so return rules appropriate give the + HTTP status code.
+static BaseRobotRulesRobotUtils.getRobotRules(BaseHttpFetcher fetcher, + BaseRobotsParser parser, + java.net.URL robotsUrl) + +
+          Externally visible, static method for use in tools and for testing.
+ BaseRobotRulesSimpleRobotRulesParser.parseContent(java.lang.String url, + byte[] content, + java.lang.String contentType, + java.lang.String robotName) + +
+           
+abstract  BaseRobotRulesBaseRobotsParser.parseContent(java.lang.String url, + byte[] content, + java.lang.String contentType, + java.lang.String robotName) + +
+          Parse the robots.txt file in , and return rules appropriate for + processing paths by
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/class-use/BaseRobotsParser.html b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotsParser.html new file mode 100644 index 0000000..992995d --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotsParser.html @@ -0,0 +1,198 @@ + + + + + + +Uses of Class crawlercommons.robots.BaseRobotsParser (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.robots.BaseRobotsParser

+
+ + + + + + + + + +
+Packages that use BaseRobotsParser
crawlercommons.robots  
+  +

+ + + + + +
+Uses of BaseRobotsParser in crawlercommons.robots
+  +

+ + + + + + + + + +
Subclasses of BaseRobotsParser in crawlercommons.robots
+ classSimpleRobotRulesParser + +
+           
+  +

+ + + + + + + + + +
Methods in crawlercommons.robots with parameters of type BaseRobotsParser
+static BaseRobotRulesRobotUtils.getRobotRules(BaseHttpFetcher fetcher, + BaseRobotsParser parser, + java.net.URL robotsUrl) + +
+          Externally visible, static method for use in tools and for testing.
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/class-use/RobotUtils.html b/doc/javadoc/crawlercommons/robots/class-use/RobotUtils.html new file mode 100644 index 0000000..45a870b --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/class-use/RobotUtils.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.robots.RobotUtils (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.robots.RobotUtils

+
+No usage of crawlercommons.robots.RobotUtils +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRule.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRule.html new file mode 100644 index 0000000..dd5dad8 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRule.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.robots.SimpleRobotRules.RobotRule (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.robots.SimpleRobotRules.RobotRule

+
+No usage of crawlercommons.robots.SimpleRobotRules.RobotRule +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRulesMode.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRulesMode.html new file mode 100644 index 0000000..419adfe --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRulesMode.html @@ -0,0 +1,203 @@ + + + + + + +Uses of Class crawlercommons.robots.SimpleRobotRules.RobotRulesMode (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.robots.SimpleRobotRules.RobotRulesMode

+
+ + + + + + + + + +
+Packages that use SimpleRobotRules.RobotRulesMode
crawlercommons.robots  
+  +

+ + + + + +
+Uses of SimpleRobotRules.RobotRulesMode in crawlercommons.robots
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.robots that return SimpleRobotRules.RobotRulesMode
+static SimpleRobotRules.RobotRulesModeSimpleRobotRules.RobotRulesMode.valueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static SimpleRobotRules.RobotRulesMode[]SimpleRobotRules.RobotRulesMode.values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+  +

+ + + + + + + + +
Constructors in crawlercommons.robots with parameters of type SimpleRobotRules.RobotRulesMode
SimpleRobotRules(SimpleRobotRules.RobotRulesMode mode) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.html new file mode 100644 index 0000000..6355a6b --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.robots.SimpleRobotRules (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.robots.SimpleRobotRules

+
+No usage of crawlercommons.robots.SimpleRobotRules +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRulesParser.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRulesParser.html new file mode 100644 index 0000000..bacdb12 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRulesParser.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.robots.SimpleRobotRulesParser (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.robots.SimpleRobotRulesParser

+
+No usage of crawlercommons.robots.SimpleRobotRulesParser +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/package-frame.html b/doc/javadoc/crawlercommons/robots/package-frame.html new file mode 100644 index 0000000..adf4f38 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/package-frame.html @@ -0,0 +1,51 @@ + + + + + + +crawlercommons.robots (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +crawlercommons.robots + + + + +
+Classes  + +
+BaseRobotRules +
+BaseRobotsParser +
+RobotUtils +
+SimpleRobotRules +
+SimpleRobotRulesParser
+ + + + + + +
+Enums  + +
+SimpleRobotRules.RobotRulesMode
+ + + + diff --git a/doc/javadoc/crawlercommons/robots/package-summary.html b/doc/javadoc/crawlercommons/robots/package-summary.html new file mode 100644 index 0000000..7d7abe0 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/package-summary.html @@ -0,0 +1,189 @@ + + + + + + +crawlercommons.robots (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package crawlercommons.robots +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
BaseRobotRulesResult from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay.
BaseRobotsParser 
RobotUtils 
SimpleRobotRulesResult from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay.
SimpleRobotRulesParser 
+  + +

+ + + + + + + + + +
+Enum Summary
SimpleRobotRules.RobotRulesMode 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/package-tree.html b/doc/javadoc/crawlercommons/robots/package-tree.html new file mode 100644 index 0000000..ff1ead3 --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/package-tree.html @@ -0,0 +1,168 @@ + + + + + + +crawlercommons.robots Class Hierarchy (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package crawlercommons.robots +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Enum Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/robots/package-use.html b/doc/javadoc/crawlercommons/robots/package-use.html new file mode 100644 index 0000000..0ece2eb --- /dev/null +++ b/doc/javadoc/crawlercommons/robots/package-use.html @@ -0,0 +1,183 @@ + + + + + + +Uses of Package crawlercommons.robots (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
crawlercommons.robots

+
+ + + + + + + + + +
+Packages that use crawlercommons.robots
crawlercommons.robots  
+  +

+ + + + + + + + + + + + + + +
+Classes in crawlercommons.robots used by crawlercommons.robots
BaseRobotRules + +
+          Result from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay.
BaseRobotsParser + +
+           
SimpleRobotRules.RobotRulesMode + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.SitemapType.html b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.SitemapType.html new file mode 100644 index 0000000..fc21bb9 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.SitemapType.html @@ -0,0 +1,377 @@ + + + + + + +AbstractSiteMap.SitemapType (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Enum AbstractSiteMap.SitemapType

+
+java.lang.Object
+  extended by java.lang.Enum<AbstractSiteMap.SitemapType>
+      extended by crawlercommons.sitemaps.AbstractSiteMap.SitemapType
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<AbstractSiteMap.SitemapType>
+
+
+
Enclosing class:
AbstractSiteMap
+
+
+
+
public static enum AbstractSiteMap.SitemapType
extends java.lang.Enum<AbstractSiteMap.SitemapType>
+ + +

+Various Sitemap types +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + +
+Enum Constant Summary
ATOM + +
+           
INDEX + +
+           
RSS + +
+           
TEXT + +
+           
XML + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static AbstractSiteMap.SitemapTypevalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static AbstractSiteMap.SitemapType[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+INDEX

+
+public static final AbstractSiteMap.SitemapType INDEX
+
+
+
+
+
+ +

+XML

+
+public static final AbstractSiteMap.SitemapType XML
+
+
+
+
+
+ +

+ATOM

+
+public static final AbstractSiteMap.SitemapType ATOM
+
+
+
+
+
+ +

+RSS

+
+public static final AbstractSiteMap.SitemapType RSS
+
+
+
+
+
+ +

+TEXT

+
+public static final AbstractSiteMap.SitemapType TEXT
+
+
+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static AbstractSiteMap.SitemapType[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (AbstractSiteMap.SitemapType c : AbstractSiteMap.SitemapType.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static AbstractSiteMap.SitemapType valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.html b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.html new file mode 100644 index 0000000..5cf3824 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.html @@ -0,0 +1,438 @@ + + + + + + +AbstractSiteMap (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Class AbstractSiteMap

+
+java.lang.Object
+  extended by crawlercommons.sitemaps.AbstractSiteMap
+
+
+
Direct Known Subclasses:
SiteMap, SiteMapIndex
+
+
+
+
public abstract class AbstractSiteMap
extends java.lang.Object
+ + +

+SiteMap or SiteMapIndex +

+ +

+


+ +

+ + + + + + + + + + + +
+Nested Class Summary
+static classAbstractSiteMap.SitemapType + +
+          Various Sitemap types
+  + + + + + + + + + + +
+Constructor Summary
AbstractSiteMap() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static java.util.DateconvertToDate(java.lang.String date) + +
+          Convert the given date (given in an acceptable DateFormat), null if the + date is not in the correct format.
+static java.text.DateFormatgetFullDateFormat() + +
+           
+ java.util.DategetLastModified() + +
+           
+ AbstractSiteMap.SitemapTypegetType() + +
+           
+ java.net.URLgetUrl() + +
+           
+ booleanisIndex() + +
+           
+ booleanisProcessed() + +
+           
+ voidsetLastModified(java.util.Date lastModified) + +
+           
+ voidsetLastModified(java.lang.String lastModified) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+AbstractSiteMap

+
+public AbstractSiteMap()
+
+
+ + + + + + + + +
+Method Detail
+ +

+getFullDateFormat

+
+public static java.text.DateFormat getFullDateFormat()
+
+
+
+
+
+
+ +

+isIndex

+
+public boolean isIndex()
+
+
+
+
+
+
+ +

+getUrl

+
+public java.net.URL getUrl()
+
+
+ +
Returns:
the URL of the Sitemap
+
+
+
+ +

+getType

+
+public AbstractSiteMap.SitemapType getType()
+
+
+ +
Returns:
the Sitemap type
+
+
+
+ +

+isProcessed

+
+public boolean isProcessed()
+
+
+ +
Returns:
true if the Sitemap has been processed i.e it contains at least + one SiteMapURL
+
+
+
+ +

+setLastModified

+
+public void setLastModified(java.util.Date lastModified)
+
+
+
Parameters:
lastModified - - the lastModified to set
+
+
+
+ +

+setLastModified

+
+public void setLastModified(java.lang.String lastModified)
+
+
+
Parameters:
lastModified - - the lastModified to set
+
+
+
+ +

+getLastModified

+
+public java.util.Date getLastModified()
+
+
+ +
Returns:
the lastModified date of the Sitemap
+
+
+
+ +

+convertToDate

+
+public static java.util.Date convertToDate(java.lang.String date)
+
+
Convert the given date (given in an acceptable DateFormat), null if the + date is not in the correct format. +

+

+
Parameters:
date - - the date to be parsed +
Returns:
the Date equivalent
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMap.html b/doc/javadoc/crawlercommons/sitemaps/SiteMap.html new file mode 100644 index 0000000..be3f97d --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/SiteMap.html @@ -0,0 +1,425 @@ + + + + + + +SiteMap (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Class SiteMap

+
+java.lang.Object
+  extended by crawlercommons.sitemaps.AbstractSiteMap
+      extended by crawlercommons.sitemaps.SiteMap
+
+
+
+
public class SiteMap
extends AbstractSiteMap
+ + +

+


+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class crawlercommons.sitemaps.AbstractSiteMap
AbstractSiteMap.SitemapType
+  + + + + + + + + + + + + + + + + + + + + + + + +
+Constructor Summary
SiteMap() + +
+           
SiteMap(java.lang.String url) + +
+           
SiteMap(java.lang.String url, + java.lang.String lastModified) + +
+           
SiteMap(java.net.URL url) + +
+           
SiteMap(java.net.URL url, + java.util.Date lastModified) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddSiteMapUrl(SiteMapURL url) + +
+           
+ java.lang.StringgetBaseUrl() + +
+           
+ java.util.Collection<SiteMapURL>getSiteMapUrls() + +
+           
+ booleanisIndex() + +
+           
+ java.lang.StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class crawlercommons.sitemaps.AbstractSiteMap
convertToDate, getFullDateFormat, getLastModified, getType, getUrl, isProcessed, setLastModified, setLastModified
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SiteMap

+
+public SiteMap()
+
+
+
+ +

+SiteMap

+
+public SiteMap(java.net.URL url)
+
+
+
+ +

+SiteMap

+
+public SiteMap(java.lang.String url)
+
+
+
+ +

+SiteMap

+
+public SiteMap(java.net.URL url,
+               java.util.Date lastModified)
+
+
+
+ +

+SiteMap

+
+public SiteMap(java.lang.String url,
+               java.lang.String lastModified)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getSiteMapUrls

+
+public java.util.Collection<SiteMapURL> getSiteMapUrls()
+
+
+ +
Returns:
the Collection of SitemapUrls in this Sitemap.
+
+
+
+ +

+toString

+
+public java.lang.String toString()
+
+
+
Overrides:
toString in class java.lang.Object
+
+
+
+
+
+
+ +

+getBaseUrl

+
+public java.lang.String getBaseUrl()
+
+
+ +
Returns:
the baseUrl for this Sitemap.
+
+
+
+ +

+addSiteMapUrl

+
+public void addSiteMapUrl(SiteMapURL url)
+
+
+
Parameters:
url - The SitemapUrl to be added to the Sitemap.
+
+
+
+ +

+isIndex

+
+public boolean isIndex()
+
+
+
Overrides:
isIndex in class AbstractSiteMap
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapIndex.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapIndex.html new file mode 100644 index 0000000..33f1b4d --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapIndex.html @@ -0,0 +1,405 @@ + + + + + + +SiteMapIndex (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Class SiteMapIndex

+
+java.lang.Object
+  extended by crawlercommons.sitemaps.AbstractSiteMap
+      extended by crawlercommons.sitemaps.SiteMapIndex
+
+
+
+
public class SiteMapIndex
extends AbstractSiteMap
+ + +

+


+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class crawlercommons.sitemaps.AbstractSiteMap
AbstractSiteMap.SitemapType
+  + + + + + + + + + + + + + + +
+Constructor Summary
SiteMapIndex() + +
+           
SiteMapIndex(java.net.URL url) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ AbstractSiteMapgetSitemap(java.net.URL url) + +
+          Returns the Sitemap that has the given URL.
+ java.util.Collection<AbstractSiteMap>getSitemaps() + +
+           
+ booleanhasUnprocessedSitemap() + +
+           
+ booleanisIndex() + +
+           
+ AbstractSiteMapnextUnprocessedSitemap() + +
+           
+ java.lang.StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class crawlercommons.sitemaps.AbstractSiteMap
convertToDate, getFullDateFormat, getLastModified, getType, getUrl, isProcessed, setLastModified, setLastModified
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SiteMapIndex

+
+public SiteMapIndex()
+
+
+
+ +

+SiteMapIndex

+
+public SiteMapIndex(java.net.URL url)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getSitemaps

+
+public java.util.Collection<AbstractSiteMap> getSitemaps()
+
+
+ +
Returns:
a Collection of Sitemaps in this Sitemap Index.
+
+
+
+ +

+getSitemap

+
+public AbstractSiteMap getSitemap(java.net.URL url)
+
+
Returns the Sitemap that has the given URL. Returns null if the URL + cannot be found. +

+

+
Parameters:
url - - The Sitemap's URL +
Returns:
SiteMap corresponding to the URL or null
+
+
+
+ +

+hasUnprocessedSitemap

+
+public boolean hasUnprocessedSitemap()
+
+
+ +
Returns:
true if there are Sitemaps in this index that have not been + processed yet, false otherwise.
+
+
+
+ +

+nextUnprocessedSitemap

+
+public AbstractSiteMap nextUnprocessedSitemap()
+
+
+ +
Returns:
an unprocessed Sitemap or null if no unprocessed Sitemaps could + be found.
+
+
+
+ +

+toString

+
+public java.lang.String toString()
+
+
+
Overrides:
toString in class java.lang.Object
+
+
+
+
+
+
+ +

+isIndex

+
+public boolean isIndex()
+
+
+
Overrides:
isIndex in class AbstractSiteMap
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapParser.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapParser.html new file mode 100644 index 0000000..efe3e30 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapParser.html @@ -0,0 +1,352 @@ + + + + + + +SiteMapParser (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Class SiteMapParser

+
+java.lang.Object
+  extended by crawlercommons.sitemaps.SiteMapParser
+
+
+
+
public class SiteMapParser
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + +
+Field Summary
+static org.slf4j.LoggerLOG + +
+           
+static intMAX_BYTES_ALLOWED + +
+          Sitemap docs must be limited to 10MB (10,485,760 bytes)
+  + + + + + + + + + + +
+Constructor Summary
SiteMapParser() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ AbstractSiteMapparseSiteMap(java.lang.String contentType, + byte[] content, + AbstractSiteMap sitemap) + +
+          Returned a processed copy of an unprocessed sitemap object, i.e.
+ AbstractSiteMapparseSiteMap(java.lang.String contentType, + byte[] content, + java.net.URL url) + +
+          Returns a SiteMap or SiteMapIndex given a content type, byte content and + the URL of a sitemap
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+LOG

+
+public static final org.slf4j.Logger LOG
+
+
+
+
+
+ +

+MAX_BYTES_ALLOWED

+
+public static int MAX_BYTES_ALLOWED
+
+
Sitemap docs must be limited to 10MB (10,485,760 bytes) +

+

+
+
+ + + + + + + + +
+Constructor Detail
+ +

+SiteMapParser

+
+public SiteMapParser()
+
+
+ + + + + + + + +
+Method Detail
+ +

+parseSiteMap

+
+public AbstractSiteMap parseSiteMap(java.lang.String contentType,
+                                    byte[] content,
+                                    AbstractSiteMap sitemap)
+                             throws UnknownFormatException,
+                                    java.io.IOException
+
+
Returned a processed copy of an unprocessed sitemap object, i.e. transfer the value of + getLastModified and sets the original sitemap to processed. +

+

+ +
Throws: +
UnknownFormatException +
java.io.IOException
+
+
+
+ +

+parseSiteMap

+
+public AbstractSiteMap parseSiteMap(java.lang.String contentType,
+                                    byte[] content,
+                                    java.net.URL url)
+                             throws UnknownFormatException,
+                                    java.io.IOException
+
+
Returns a SiteMap or SiteMapIndex given a content type, byte content and + the URL of a sitemap +

+

+ +
Throws: +
UnknownFormatException +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.ChangeFrequency.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.ChangeFrequency.html new file mode 100644 index 0000000..40aa22d --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.ChangeFrequency.html @@ -0,0 +1,409 @@ + + + + + + +SiteMapURL.ChangeFrequency (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Enum SiteMapURL.ChangeFrequency

+
+java.lang.Object
+  extended by java.lang.Enum<SiteMapURL.ChangeFrequency>
+      extended by crawlercommons.sitemaps.SiteMapURL.ChangeFrequency
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<SiteMapURL.ChangeFrequency>
+
+
+
Enclosing class:
SiteMapURL
+
+
+
+
public static enum SiteMapURL.ChangeFrequency
extends java.lang.Enum<SiteMapURL.ChangeFrequency>
+ + +

+Allowed change frequencies +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Enum Constant Summary
ALWAYS + +
+           
DAILY + +
+           
HOURLY + +
+           
MONTHLY + +
+           
NEVER + +
+           
WEEKLY + +
+           
YEARLY + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static SiteMapURL.ChangeFrequencyvalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static SiteMapURL.ChangeFrequency[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+ALWAYS

+
+public static final SiteMapURL.ChangeFrequency ALWAYS
+
+
+
+
+
+ +

+HOURLY

+
+public static final SiteMapURL.ChangeFrequency HOURLY
+
+
+
+
+
+ +

+DAILY

+
+public static final SiteMapURL.ChangeFrequency DAILY
+
+
+
+
+
+ +

+WEEKLY

+
+public static final SiteMapURL.ChangeFrequency WEEKLY
+
+
+
+
+
+ +

+MONTHLY

+
+public static final SiteMapURL.ChangeFrequency MONTHLY
+
+
+
+
+
+ +

+YEARLY

+
+public static final SiteMapURL.ChangeFrequency YEARLY
+
+
+
+
+
+ +

+NEVER

+
+public static final SiteMapURL.ChangeFrequency NEVER
+
+
+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static SiteMapURL.ChangeFrequency[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (SiteMapURL.ChangeFrequency c : SiteMapURL.ChangeFrequency.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static SiteMapURL.ChangeFrequency valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.html new file mode 100644 index 0000000..5b0bd1f --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.html @@ -0,0 +1,594 @@ + + + + + + +SiteMapURL (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Class SiteMapURL

+
+java.lang.Object
+  extended by crawlercommons.sitemaps.SiteMapURL
+
+
+
+
public class SiteMapURL
extends java.lang.Object
+ + +

+The SitemapUrl class represents a URL found in a Sitemap. +

+ +

+

+
Author:
+
fmccown
+
+
+ +

+ + + + + + + + + + + +
+Nested Class Summary
+static classSiteMapURL.ChangeFrequency + +
+          Allowed change frequencies
+  + + + + + + + + + + + + + + + + + + + +
+Constructor Summary
SiteMapURL(java.lang.String url) + +
+           
SiteMapURL(java.lang.String url, + java.lang.String lastModified, + java.lang.String changeFreq, + java.lang.String priority) + +
+           
SiteMapURL(java.net.URL url) + +
+           
SiteMapURL(java.net.URL url, + java.util.Date lastModified, + SiteMapURL.ChangeFrequency changeFreq, + double priority) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ SiteMapURL.ChangeFrequencygetChangeFrequency() + +
+          Return the URL's change frequency
+ java.util.DategetLastModified() + +
+          Return when this URL was last modified.
+ doublegetPriority() + +
+          Return this URL's priority (a value between [0.0 - 1.0]).
+ java.net.URLgetUrl() + +
+          Return the URL.
+ voidsetChangeFrequency(SiteMapURL.ChangeFrequency changeFreq) + +
+          Set the URL's change frequency
+ voidsetChangeFrequency(java.lang.String changeFreq) + +
+          Set the URL's change frequency
+ voidsetLastModified(java.util.Date lastModified) + +
+          Set when this URL was last modified.
+ voidsetLastModified(java.lang.String lastModified) + +
+          Set when this URL was last modified.
+ voidsetPriority(double priority) + +
+          Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the + given priority is out of range).
+ voidsetPriority(java.lang.String priority) + +
+          Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the + given priority is out of range).
+ voidsetUrl(java.lang.String url) + +
+          Set the URL.
+ voidsetUrl(java.net.URL url) + +
+          Set the URL.
+ java.lang.StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SiteMapURL

+
+public SiteMapURL(java.lang.String url)
+
+
+
+ +

+SiteMapURL

+
+public SiteMapURL(java.net.URL url)
+
+
+
+ +

+SiteMapURL

+
+public SiteMapURL(java.lang.String url,
+                  java.lang.String lastModified,
+                  java.lang.String changeFreq,
+                  java.lang.String priority)
+
+
+
+ +

+SiteMapURL

+
+public SiteMapURL(java.net.URL url,
+                  java.util.Date lastModified,
+                  SiteMapURL.ChangeFrequency changeFreq,
+                  double priority)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getUrl

+
+public java.net.URL getUrl()
+
+
Return the URL. +

+

+ +
Returns:
URL
+
+
+
+ +

+setUrl

+
+public void setUrl(java.net.URL url)
+
+
Set the URL. +

+

+
Parameters:
url -
+
+
+
+ +

+setUrl

+
+public void setUrl(java.lang.String url)
+
+
Set the URL. +

+

+
Parameters:
url -
+
+
+
+ +

+getLastModified

+
+public java.util.Date getLastModified()
+
+
Return when this URL was last modified. +

+

+ +
Returns:
last modified date
+
+
+
+ +

+setLastModified

+
+public void setLastModified(java.lang.String lastModified)
+
+
Set when this URL was last modified. +

+

+
Parameters:
lastModified -
+
+
+
+ +

+setLastModified

+
+public void setLastModified(java.util.Date lastModified)
+
+
Set when this URL was last modified. +

+

+
Parameters:
lastModified -
+
+
+
+ +

+getPriority

+
+public double getPriority()
+
+
Return this URL's priority (a value between [0.0 - 1.0]). +

+

+ +
Returns:
URL's priority (a value between [0.0 - 1.0])
+
+
+
+ +

+setPriority

+
+public void setPriority(double priority)
+
+
Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the + given priority is out of range). +

+

+
Parameters:
priority -
+
+
+
+ +

+setPriority

+
+public void setPriority(java.lang.String priority)
+
+
Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the + given priority is out of range). +

+

+
Parameters:
priority -
+
+
+
+ +

+getChangeFrequency

+
+public SiteMapURL.ChangeFrequency getChangeFrequency()
+
+
Return the URL's change frequency +

+

+ +
Returns:
the URL's change frequency
+
+
+
+ +

+setChangeFrequency

+
+public void setChangeFrequency(SiteMapURL.ChangeFrequency changeFreq)
+
+
Set the URL's change frequency +

+

+
Parameters:
changeFreq -
+
+
+
+ +

+setChangeFrequency

+
+public void setChangeFrequency(java.lang.String changeFreq)
+
+
Set the URL's change frequency +

+

+
Parameters:
changeFreq -
+
+
+
+ +

+toString

+
+public java.lang.String toString()
+
+
+
Overrides:
toString in class java.lang.Object
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/UnknownFormatException.html b/doc/javadoc/crawlercommons/sitemaps/UnknownFormatException.html new file mode 100644 index 0000000..db79670 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/UnknownFormatException.html @@ -0,0 +1,282 @@ + + + + + + +UnknownFormatException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.sitemaps +
+Class UnknownFormatException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by crawlercommons.sitemaps.UnknownFormatException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class UnknownFormatException
extends java.lang.Exception
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
UnknownFormatException() + +
+           
UnknownFormatException(java.lang.String err) + +
+           
+  + + + + + + + + + + + +
+Method Summary
+ java.lang.StringgetError() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+UnknownFormatException

+
+public UnknownFormatException()
+
+
+
+ +

+UnknownFormatException

+
+public UnknownFormatException(java.lang.String err)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getError

+
+public java.lang.String getError()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.SitemapType.html b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.SitemapType.html new file mode 100644 index 0000000..4db7d87 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.SitemapType.html @@ -0,0 +1,197 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.AbstractSiteMap.SitemapType (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.AbstractSiteMap.SitemapType

+
+ + + + + + + + + +
+Packages that use AbstractSiteMap.SitemapType
crawlercommons.sitemaps  
+  +

+ + + + + +
+Uses of AbstractSiteMap.SitemapType in crawlercommons.sitemaps
+  +

+ + + + + + + + + + + + + + + + + +
Methods in crawlercommons.sitemaps that return AbstractSiteMap.SitemapType
+ AbstractSiteMap.SitemapTypeAbstractSiteMap.getType() + +
+           
+static AbstractSiteMap.SitemapTypeAbstractSiteMap.SitemapType.valueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static AbstractSiteMap.SitemapType[]AbstractSiteMap.SitemapType.values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.html b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.html new file mode 100644 index 0000000..928aeaf --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.html @@ -0,0 +1,267 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.AbstractSiteMap (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.AbstractSiteMap

+
+ + + + + + + + + +
+Packages that use AbstractSiteMap
crawlercommons.sitemaps  
+  +

+ + + + + +
+Uses of AbstractSiteMap in crawlercommons.sitemaps
+  +

+ + + + + + + + + + + + + +
Subclasses of AbstractSiteMap in crawlercommons.sitemaps
+ classSiteMap + +
+           
+ classSiteMapIndex + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + +
Methods in crawlercommons.sitemaps that return AbstractSiteMap
+ AbstractSiteMapSiteMapIndex.getSitemap(java.net.URL url) + +
+          Returns the Sitemap that has the given URL.
+ AbstractSiteMapSiteMapIndex.nextUnprocessedSitemap() + +
+           
+ AbstractSiteMapSiteMapParser.parseSiteMap(java.lang.String contentType, + byte[] content, + AbstractSiteMap sitemap) + +
+          Returned a processed copy of an unprocessed sitemap object, i.e.
+ AbstractSiteMapSiteMapParser.parseSiteMap(java.lang.String contentType, + byte[] content, + java.net.URL url) + +
+          Returns a SiteMap or SiteMapIndex given a content type, byte content and + the URL of a sitemap
+  +

+ + + + + + + + + +
Methods in crawlercommons.sitemaps that return types with arguments of type AbstractSiteMap
+ java.util.Collection<AbstractSiteMap>SiteMapIndex.getSitemaps() + +
+           
+  +

+ + + + + + + + + +
Methods in crawlercommons.sitemaps with parameters of type AbstractSiteMap
+ AbstractSiteMapSiteMapParser.parseSiteMap(java.lang.String contentType, + byte[] content, + AbstractSiteMap sitemap) + +
+          Returned a processed copy of an unprocessed sitemap object, i.e.
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMap.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMap.html new file mode 100644 index 0000000..f457601 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMap.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.SiteMap (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.SiteMap

+
+No usage of crawlercommons.sitemaps.SiteMap +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapIndex.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapIndex.html new file mode 100644 index 0000000..9d07cc3 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapIndex.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.SiteMapIndex (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.SiteMapIndex

+
+No usage of crawlercommons.sitemaps.SiteMapIndex +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapParser.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapParser.html new file mode 100644 index 0000000..3d99635 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapParser.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.SiteMapParser (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.SiteMapParser

+
+No usage of crawlercommons.sitemaps.SiteMapParser +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.ChangeFrequency.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.ChangeFrequency.html new file mode 100644 index 0000000..f13f2fe --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.ChangeFrequency.html @@ -0,0 +1,230 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.SiteMapURL.ChangeFrequency (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.SiteMapURL.ChangeFrequency

+
+ + + + + + + + + +
+Packages that use SiteMapURL.ChangeFrequency
crawlercommons.sitemaps  
+  +

+ + + + + +
+Uses of SiteMapURL.ChangeFrequency in crawlercommons.sitemaps
+  +

+ + + + + + + + + + + + + + + + + +
Methods in crawlercommons.sitemaps that return SiteMapURL.ChangeFrequency
+ SiteMapURL.ChangeFrequencySiteMapURL.getChangeFrequency() + +
+          Return the URL's change frequency
+static SiteMapURL.ChangeFrequencySiteMapURL.ChangeFrequency.valueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static SiteMapURL.ChangeFrequency[]SiteMapURL.ChangeFrequency.values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+  +

+ + + + + + + + + +
Methods in crawlercommons.sitemaps with parameters of type SiteMapURL.ChangeFrequency
+ voidSiteMapURL.setChangeFrequency(SiteMapURL.ChangeFrequency changeFreq) + +
+          Set the URL's change frequency
+  +

+ + + + + + + + +
Constructors in crawlercommons.sitemaps with parameters of type SiteMapURL.ChangeFrequency
SiteMapURL(java.net.URL url, + java.util.Date lastModified, + SiteMapURL.ChangeFrequency changeFreq, + double priority) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.html new file mode 100644 index 0000000..691739e --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.html @@ -0,0 +1,196 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.SiteMapURL (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.SiteMapURL

+
+ + + + + + + + + +
+Packages that use SiteMapURL
crawlercommons.sitemaps  
+  +

+ + + + + +
+Uses of SiteMapURL in crawlercommons.sitemaps
+  +

+ + + + + + + + + +
Methods in crawlercommons.sitemaps that return types with arguments of type SiteMapURL
+ java.util.Collection<SiteMapURL>SiteMap.getSiteMapUrls() + +
+           
+  +

+ + + + + + + + + +
Methods in crawlercommons.sitemaps with parameters of type SiteMapURL
+ voidSiteMap.addSiteMapUrl(SiteMapURL url) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/UnknownFormatException.html b/doc/javadoc/crawlercommons/sitemaps/class-use/UnknownFormatException.html new file mode 100644 index 0000000..a2d5f2c --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/class-use/UnknownFormatException.html @@ -0,0 +1,193 @@ + + + + + + +Uses of Class crawlercommons.sitemaps.UnknownFormatException (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.sitemaps.UnknownFormatException

+
+ + + + + + + + + +
+Packages that use UnknownFormatException
crawlercommons.sitemaps  
+  +

+ + + + + +
+Uses of UnknownFormatException in crawlercommons.sitemaps
+  +

+ + + + + + + + + + + + + +
Methods in crawlercommons.sitemaps that throw UnknownFormatException
+ AbstractSiteMapSiteMapParser.parseSiteMap(java.lang.String contentType, + byte[] content, + AbstractSiteMap sitemap) + +
+          Returned a processed copy of an unprocessed sitemap object, i.e.
+ AbstractSiteMapSiteMapParser.parseSiteMap(java.lang.String contentType, + byte[] content, + java.net.URL url) + +
+          Returns a SiteMap or SiteMapIndex given a content type, byte content and + the URL of a sitemap
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/package-frame.html b/doc/javadoc/crawlercommons/sitemaps/package-frame.html new file mode 100644 index 0000000..284935a --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/package-frame.html @@ -0,0 +1,64 @@ + + + + + + +crawlercommons.sitemaps (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +crawlercommons.sitemaps + + + + +
+Classes  + +
+AbstractSiteMap +
+SiteMap +
+SiteMapIndex +
+SiteMapParser +
+SiteMapURL
+ + + + + + +
+Enums  + +
+AbstractSiteMap.SitemapType +
+SiteMapURL.ChangeFrequency
+ + + + + + +
+Exceptions  + +
+UnknownFormatException
+ + + + diff --git a/doc/javadoc/crawlercommons/sitemaps/package-summary.html b/doc/javadoc/crawlercommons/sitemaps/package-summary.html new file mode 100644 index 0000000..1cc290e --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/package-summary.html @@ -0,0 +1,205 @@ + + + + + + +crawlercommons.sitemaps (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package crawlercommons.sitemaps +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
AbstractSiteMapSiteMap or SiteMapIndex
SiteMap 
SiteMapIndex 
SiteMapParser 
SiteMapURLThe SitemapUrl class represents a URL found in a Sitemap.
+  + +

+ + + + + + + + + + + + + +
+Enum Summary
AbstractSiteMap.SitemapTypeVarious Sitemap types
SiteMapURL.ChangeFrequencyAllowed change frequencies
+  + +

+ + + + + + + + + +
+Exception Summary
UnknownFormatException 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/package-tree.html b/doc/javadoc/crawlercommons/sitemaps/package-tree.html new file mode 100644 index 0000000..36c1fb7 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/package-tree.html @@ -0,0 +1,170 @@ + + + + + + +crawlercommons.sitemaps Class Hierarchy (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package crawlercommons.sitemaps +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Enum Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/sitemaps/package-use.html b/doc/javadoc/crawlercommons/sitemaps/package-use.html new file mode 100644 index 0000000..1a4f802 --- /dev/null +++ b/doc/javadoc/crawlercommons/sitemaps/package-use.html @@ -0,0 +1,194 @@ + + + + + + +Uses of Package crawlercommons.sitemaps (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
crawlercommons.sitemaps

+
+ + + + + + + + + +
+Packages that use crawlercommons.sitemaps
crawlercommons.sitemaps  
+  +

+ + + + + + + + + + + + + + + + + + + + +
+Classes in crawlercommons.sitemaps used by crawlercommons.sitemaps
AbstractSiteMap + +
+          SiteMap or SiteMapIndex
AbstractSiteMap.SitemapType + +
+          Various Sitemap types
SiteMapURL + +
+          The SitemapUrl class represents a URL found in a Sitemap.
SiteMapURL.ChangeFrequency + +
+          Allowed change frequencies
UnknownFormatException + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/EffectiveTldFinder.EffectiveTLD.html b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.EffectiveTLD.html new file mode 100644 index 0000000..3b9f873 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.EffectiveTLD.html @@ -0,0 +1,315 @@ + + + + + + +EffectiveTldFinder.EffectiveTLD (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.url +
+Class EffectiveTldFinder.EffectiveTLD

+
+java.lang.Object
+  extended by crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+
+
Enclosing class:
EffectiveTldFinder
+
+
+
+
public static class EffectiveTldFinder.EffectiveTLD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
EffectiveTldFinder.EffectiveTLD(java.lang.String line) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ java.lang.StringgetDomain() + +
+           
+ booleanisException() + +
+           
+ booleanisWild() + +
+           
+ java.lang.StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+EffectiveTldFinder.EffectiveTLD

+
+public EffectiveTldFinder.EffectiveTLD(java.lang.String line)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getDomain

+
+public java.lang.String getDomain()
+
+
+
+
+
+
+ +

+isWild

+
+public boolean isWild()
+
+
+
+
+
+
+ +

+isException

+
+public boolean isException()
+
+
+
+
+
+
+ +

+toString

+
+public java.lang.String toString()
+
+
+
Overrides:
toString in class java.lang.Object
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/EffectiveTldFinder.html b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.html new file mode 100644 index 0000000..650cd62 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.html @@ -0,0 +1,478 @@ + + + + + + +EffectiveTldFinder (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.url +
+Class EffectiveTldFinder

+
+java.lang.Object
+  extended by crawlercommons.url.EffectiveTldFinder
+
+
+
+
public class EffectiveTldFinder
extends java.lang.Object
+ + +

+Given a URL's hostname, there are determining the actual domain requires + knowledge of the various domain registrars and their assignment policies. + The best publicly available knowledge of this is maintained by the Mozilla + developers; this class uses their data file format. For more information, see +

+ + This class just needs "effective_tld_names.dat" in the classpath. If you want + to configure it with other data, call EffectiveTldFinder.getInstance.initialize(is) + and have at it. +

+ +

+


+ +

+ + + + + + + + + + + +
+Nested Class Summary
+static classEffectiveTldFinder.EffectiveTLD + +
+           
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+static java.lang.StringCOMMENT + +
+           
+static charDOT + +
+           
+static java.lang.StringDOT_REGEX + +
+           
+static java.lang.StringETLD_DATA + +
+           
+static java.lang.StringEXCEPTION + +
+           
+static java.lang.StringWILD_CARD + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static java.lang.StringgetAssignedDomain(java.lang.String hostname) + +
+          This method uses the effective TLD to determine which component of + a FQDN is the NIC-assigned domain name.
+static EffectiveTldFinder.EffectiveTLDgetEffectiveTLD(java.lang.String hostname) + +
+           
+static java.util.Map<java.lang.String,EffectiveTldFinder.EffectiveTLD>getEffectiveTLDs() + +
+           
+static EffectiveTldFindergetInstance() + +
+           
+ booleaninitialize(java.io.InputStream effective_tld_data_stream) + +
+           
+ booleanisConfigured() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+ETLD_DATA

+
+public static final java.lang.String ETLD_DATA
+
+
+
See Also:
Constant Field Values
+
+
+ +

+COMMENT

+
+public static final java.lang.String COMMENT
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DOT_REGEX

+
+public static final java.lang.String DOT_REGEX
+
+
+
See Also:
Constant Field Values
+
+
+ +

+EXCEPTION

+
+public static final java.lang.String EXCEPTION
+
+
+
See Also:
Constant Field Values
+
+
+ +

+WILD_CARD

+
+public static final java.lang.String WILD_CARD
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DOT

+
+public static final char DOT
+
+
+
See Also:
Constant Field Values
+
+ + + + + + + + +
+Method Detail
+ +

+getInstance

+
+public static EffectiveTldFinder getInstance()
+
+
+
+
+
+
+ +

+initialize

+
+public boolean initialize(java.io.InputStream effective_tld_data_stream)
+
+
+
+
+
+
+ +

+getEffectiveTLDs

+
+public static java.util.Map<java.lang.String,EffectiveTldFinder.EffectiveTLD> getEffectiveTLDs()
+
+
+
+
+
+
+ +

+getEffectiveTLD

+
+public static EffectiveTldFinder.EffectiveTLD getEffectiveTLD(java.lang.String hostname)
+
+
+
Parameters:
hostname - +
Returns:
the Effective TLD
+
+
+
+ +

+getAssignedDomain

+
+public static java.lang.String getAssignedDomain(java.lang.String hostname)
+
+
This method uses the effective TLD to determine which component of + a FQDN is the NIC-assigned domain name. +

+

+
Parameters:
hostname - +
Returns:
the NIC-assigned domain name
+
+
+
+ +

+isConfigured

+
+public boolean isConfigured()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/PaidLevelDomain.html b/doc/javadoc/crawlercommons/url/PaidLevelDomain.html new file mode 100644 index 0000000..8887ea0 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/PaidLevelDomain.html @@ -0,0 +1,282 @@ + + + + + + +PaidLevelDomain (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +crawlercommons.url +
+Class PaidLevelDomain

+
+java.lang.Object
+  extended by crawlercommons.url.PaidLevelDomain
+
+
+
+
public class PaidLevelDomain
extends java.lang.Object
+ + +

+Routines to extract the PLD (paid-level domain, as per the IRLbot paper) from a hostname or URL. +

+ +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
PaidLevelDomain() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static java.lang.StringgetPLD(java.lang.String hostname) + +
+          Extract the PLD (paid-level domain) from the hostname.
+static java.lang.StringgetPLD(java.net.URL url) + +
+          Extract the PLD (paid-level domain) from the URL.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+PaidLevelDomain

+
+public PaidLevelDomain()
+
+
+ + + + + + + + +
+Method Detail
+ +

+getPLD

+
+public static java.lang.String getPLD(java.lang.String hostname)
+
+
Extract the PLD (paid-level domain) from the hostname. If the format isn't recognized, + the original hostname is returned. +

+

+
Parameters:
hostname - - hostname from URL, e.g. www.domain.com.it +
Returns:
- PLD, e.g. domain.com.it
+
+
+
+ +

+getPLD

+
+public static java.lang.String getPLD(java.net.URL url)
+
+
Extract the PLD (paid-level domain) from the URL. +

+

+
Parameters:
url - - Valid URL, e.g. http://www.domain.com.it +
Returns:
- PLD e.g. domain.com.it
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.EffectiveTLD.html b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.EffectiveTLD.html new file mode 100644 index 0000000..7fdb951 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.EffectiveTLD.html @@ -0,0 +1,196 @@ + + + + + + +Uses of Class crawlercommons.url.EffectiveTldFinder.EffectiveTLD (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.url.EffectiveTldFinder.EffectiveTLD

+
+ + + + + + + + + +
+Packages that use EffectiveTldFinder.EffectiveTLD
crawlercommons.url  
+  +

+ + + + + +
+Uses of EffectiveTldFinder.EffectiveTLD in crawlercommons.url
+  +

+ + + + + + + + + +
Methods in crawlercommons.url that return EffectiveTldFinder.EffectiveTLD
+static EffectiveTldFinder.EffectiveTLDEffectiveTldFinder.getEffectiveTLD(java.lang.String hostname) + +
+           
+  +

+ + + + + + + + + +
Methods in crawlercommons.url that return types with arguments of type EffectiveTldFinder.EffectiveTLD
+static java.util.Map<java.lang.String,EffectiveTldFinder.EffectiveTLD>EffectiveTldFinder.getEffectiveTLDs() + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.html b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.html new file mode 100644 index 0000000..65addb1 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.html @@ -0,0 +1,180 @@ + + + + + + +Uses of Class crawlercommons.url.EffectiveTldFinder (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.url.EffectiveTldFinder

+
+ + + + + + + + + +
+Packages that use EffectiveTldFinder
crawlercommons.url  
+  +

+ + + + + +
+Uses of EffectiveTldFinder in crawlercommons.url
+  +

+ + + + + + + + + +
Methods in crawlercommons.url that return EffectiveTldFinder
+static EffectiveTldFinderEffectiveTldFinder.getInstance() + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/class-use/PaidLevelDomain.html b/doc/javadoc/crawlercommons/url/class-use/PaidLevelDomain.html new file mode 100644 index 0000000..84a42b5 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/class-use/PaidLevelDomain.html @@ -0,0 +1,144 @@ + + + + + + +Uses of Class crawlercommons.url.PaidLevelDomain (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
crawlercommons.url.PaidLevelDomain

+
+No usage of crawlercommons.url.PaidLevelDomain +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/package-frame.html b/doc/javadoc/crawlercommons/url/package-frame.html new file mode 100644 index 0000000..3722d2d --- /dev/null +++ b/doc/javadoc/crawlercommons/url/package-frame.html @@ -0,0 +1,36 @@ + + + + + + +crawlercommons.url (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + +crawlercommons.url + + + + +
+Classes  + +
+EffectiveTldFinder +
+EffectiveTldFinder.EffectiveTLD +
+PaidLevelDomain
+ + + + diff --git a/doc/javadoc/crawlercommons/url/package-summary.html b/doc/javadoc/crawlercommons/url/package-summary.html new file mode 100644 index 0000000..92a08b2 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/package-summary.html @@ -0,0 +1,166 @@ + + + + + + +crawlercommons.url (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package crawlercommons.url +

+ + + + + + + + + + + + + + + + + +
+Class Summary
EffectiveTldFinderGiven a URL's hostname, there are determining the actual domain requires + knowledge of the various domain registrars and their assignment policies.
EffectiveTldFinder.EffectiveTLD 
PaidLevelDomainRoutines to extract the PLD (paid-level domain, as per the IRLbot paper) from a hostname or URL.
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/package-tree.html b/doc/javadoc/crawlercommons/url/package-tree.html new file mode 100644 index 0000000..e75ed47 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/package-tree.html @@ -0,0 +1,153 @@ + + + + + + +crawlercommons.url Class Hierarchy (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package crawlercommons.url +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/crawlercommons/url/package-use.html b/doc/javadoc/crawlercommons/url/package-use.html new file mode 100644 index 0000000..1332d72 --- /dev/null +++ b/doc/javadoc/crawlercommons/url/package-use.html @@ -0,0 +1,177 @@ + + + + + + +Uses of Package crawlercommons.url (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
crawlercommons.url

+
+ + + + + + + + + +
+Packages that use crawlercommons.url
crawlercommons.url  
+  +

+ + + + + + + + + + + +
+Classes in crawlercommons.url used by crawlercommons.url
EffectiveTldFinder + +
+          Given a URL's hostname, there are determining the actual domain requires + knowledge of the various domain registrars and their assignment policies.
EffectiveTldFinder.EffectiveTLD + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/deprecated-list.html b/doc/javadoc/deprecated-list.html new file mode 100644 index 0000000..4631eaf --- /dev/null +++ b/doc/javadoc/deprecated-list.html @@ -0,0 +1,146 @@ + + + + + + +Deprecated List (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Deprecated API

+
+
+Contents + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/help-doc.html b/doc/javadoc/help-doc.html new file mode 100644 index 0000000..3175885 --- /dev/null +++ b/doc/javadoc/help-doc.html @@ -0,0 +1,223 @@ + + + + + + +API Help (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+How This API Document Is Organized

+
+This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.

+Overview

+
+ +

+The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

+

+Package

+
+ +

+Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:

+
+

+Class/Interface

+
+ +

+Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

+Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
+ +

+Annotation Type

+
+ +

+Each annotation type has its own separate page with the following sections:

+
+ +

+Enum

+
+ +

+Each enum has its own separate page with the following sections:

+
+

+Use

+
+Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.
+

+Tree (Class Hierarchy)

+
+There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object. +
+

+Deprecated API

+
+The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
+

+Index

+
+The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
+

+Prev/Next

+These links take you to the next or previous class, interface, package, or related page.

+Frames/No Frames

+These links show and hide the HTML frames. All pages are available with or without frames. +

+

+Serialized Form

+Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description. +

+

+Constant Field Values

+The Constant Field Values page lists the static final fields and their values. +

+ + +This help file applies to API documentation generated using the standard doclet. + +
+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/index-all.html b/doc/javadoc/index-all.html new file mode 100644 index 0000000..9f52bab --- /dev/null +++ b/doc/javadoc/index-all.html @@ -0,0 +1,1047 @@ + + + + + + +Index (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I K L M N P R S T U V W _
+

+A

+
+
abort() - +Method in class crawlercommons.fetcher.BaseFetcher +
Terminate any async request being processed. +
abort() - +Method in class crawlercommons.fetcher.file.SimpleFileFetcher +
  +
abort() - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
AbortedFetchException - Exception in crawlercommons.fetcher
 
AbortedFetchException() - +Constructor for exception crawlercommons.fetcher.AbortedFetchException +
  +
AbortedFetchException(String, AbortedFetchReason) - +Constructor for exception crawlercommons.fetcher.AbortedFetchException +
  +
AbortedFetchException(String, String, AbortedFetchReason) - +Constructor for exception crawlercommons.fetcher.AbortedFetchException +
  +
AbortedFetchReason - Enum in crawlercommons.fetcher
 
AbstractSiteMap - Class in crawlercommons.sitemaps
SiteMap or SiteMapIndex
AbstractSiteMap() - +Constructor for class crawlercommons.sitemaps.AbstractSiteMap +
  +
AbstractSiteMap.SitemapType - Enum in crawlercommons.sitemaps
Various Sitemap types
addRule(String, boolean) - +Method in class crawlercommons.robots.SimpleRobotRules +
  +
addSitemap(String) - +Method in class crawlercommons.robots.BaseRobotRules +
  +
addSiteMapUrl(SiteMapURL) - +Method in class crawlercommons.sitemaps.SiteMap +
  +
addValidMimeType(String) - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
addValidMimeTypes(Set<String>) - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
+
+

+B

+
+
BadProtocolFetchException - Exception in crawlercommons.fetcher
 
BadProtocolFetchException() - +Constructor for exception crawlercommons.fetcher.BadProtocolFetchException +
  +
BadProtocolFetchException(String) - +Constructor for exception crawlercommons.fetcher.BadProtocolFetchException +
  +
BaseFetcher - Class in crawlercommons.fetcher
 
BaseFetcher() - +Constructor for class crawlercommons.fetcher.BaseFetcher +
  +
BaseFetchException - Exception in crawlercommons.fetcher
 
BaseFetchException() - +Constructor for exception crawlercommons.fetcher.BaseFetchException +
  +
BaseFetchException(String) - +Constructor for exception crawlercommons.fetcher.BaseFetchException +
  +
BaseFetchException(String, String) - +Constructor for exception crawlercommons.fetcher.BaseFetchException +
  +
BaseFetchException(String, Exception) - +Constructor for exception crawlercommons.fetcher.BaseFetchException +
  +
BaseFetchException(String, String, Exception) - +Constructor for exception crawlercommons.fetcher.BaseFetchException +
  +
BaseHttpFetcher - Class in crawlercommons.fetcher.http
 
BaseHttpFetcher(int, UserAgent) - +Constructor for class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
BaseHttpFetcher.RedirectMode - Enum in crawlercommons.fetcher.http
 
BaseRobotRules - Class in crawlercommons.robots
Result from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay.
BaseRobotRules() - +Constructor for class crawlercommons.robots.BaseRobotRules +
  +
BaseRobotsParser - Class in crawlercommons.robots
 
BaseRobotsParser() - +Constructor for class crawlercommons.robots.BaseRobotsParser +
  +
+
+

+C

+
+
clear() - +Method in class crawlercommons.fetcher.Payload +
  +
clearRules() - +Method in class crawlercommons.robots.SimpleRobotRules +
  +
COMMENT - +Static variable in class crawlercommons.url.EffectiveTldFinder +
  +
compareToBase(BaseFetchException) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
containsKey(Object) - +Method in class crawlercommons.fetcher.Payload +
  +
containsValue(Object) - +Method in class crawlercommons.fetcher.Payload +
  +
convertToDate(String) - +Static method in class crawlercommons.sitemaps.AbstractSiteMap +
Convert the given date (given in an acceptable DateFormat), null if the + date is not in the correct format. +
crawlercommons.fetcher - package crawlercommons.fetcher
 
crawlercommons.fetcher.file - package crawlercommons.fetcher.file
 
crawlercommons.fetcher.http - package crawlercommons.fetcher.http
 
crawlercommons.robots - package crawlercommons.robots
 
crawlercommons.sitemaps - package crawlercommons.sitemaps
 
crawlercommons.url - package crawlercommons.url
 
createFetcher(BaseHttpFetcher) - +Static method in class crawlercommons.robots.RobotUtils +
  +
createFetcher(UserAgent, int) - +Static method in class crawlercommons.robots.RobotUtils +
  +
+
+

+D

+
+
DEFAULT_ACCEPT_LANGUAGE - +Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
DEFAULT_BROWSER_VERSION - +Static variable in class crawlercommons.fetcher.http.UserAgent +
  +
DEFAULT_MAX_CONNECTIONS_PER_HOST - +Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
DEFAULT_MAX_CONTENT_SIZE - +Static variable in class crawlercommons.fetcher.BaseFetcher +
  +
DEFAULT_MAX_REDIRECTS - +Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
DEFAULT_MIN_RESPONSE_RATE - +Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
DEFAULT_REDIRECT_MODE - +Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
DOT - +Static variable in class crawlercommons.url.EffectiveTldFinder +
  +
DOT_REGEX - +Static variable in class crawlercommons.url.EffectiveTldFinder +
  +
+
+

+E

+
+
EffectiveTldFinder - Class in crawlercommons.url
Given a URL's hostname, there are determining the actual domain requires + knowledge of the various domain registrars and their assignment policies.
EffectiveTldFinder.EffectiveTLD - Class in crawlercommons.url
 
EffectiveTldFinder.EffectiveTLD(String) - +Constructor for class crawlercommons.url.EffectiveTldFinder.EffectiveTLD +
  +
EncodingUtils - Class in crawlercommons.fetcher
 
EncodingUtils() - +Constructor for class crawlercommons.fetcher.EncodingUtils +
  +
EncodingUtils.ExpandedResult - Class in crawlercommons.fetcher
 
EncodingUtils.ExpandedResult(byte[], boolean) - +Constructor for class crawlercommons.fetcher.EncodingUtils.ExpandedResult +
  +
entrySet() - +Method in class crawlercommons.fetcher.Payload +
  +
equals(Object) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
equals(Object) - +Method in class crawlercommons.fetcher.Payload +
  +
ETLD_DATA - +Static variable in class crawlercommons.url.EffectiveTldFinder +
  +
EXCEPTION - +Static variable in class crawlercommons.url.EffectiveTldFinder +
  +
+
+

+F

+
+
failedFetch(int) - +Method in class crawlercommons.robots.BaseRobotsParser +
The fetch of robots.txt failed, so return rules appropriate give the + HTTP status code. +
failedFetch(int) - +Method in class crawlercommons.robots.SimpleRobotRulesParser +
  +
fetch(String) - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
fetch(HttpRequestBase, String, Payload) - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
FetchedResult - Class in crawlercommons.fetcher
 
FetchedResult(String, String, long, Metadata, byte[], String, int, Payload, String, int, String) - +Constructor for class crawlercommons.fetcher.FetchedResult +
  +
+
+

+G

+
+
get(String) - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
get(String, Payload) - +Method in class crawlercommons.fetcher.BaseFetcher +
Get the content stored in the resource referenced by +
get(String, Payload) - +Method in class crawlercommons.fetcher.file.SimpleFileFetcher +
  +
get(String, Payload) - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
get(Object) - +Method in class crawlercommons.fetcher.Payload +
  +
getAbortReason() - +Method in exception crawlercommons.fetcher.AbortedFetchException +
  +
getAcceptLanguage() - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
getAgentName() - +Method in class crawlercommons.fetcher.http.UserAgent +
  +
getAssignedDomain(String) - +Static method in class crawlercommons.url.EffectiveTldFinder +
This method uses the effective TLD to determine which component of + a FQDN is the NIC-assigned domain name. +
getBaseUrl() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getBaseUrl() - +Method in class crawlercommons.sitemaps.SiteMap +
  +
getCause() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
getChangeFrequency() - +Method in class crawlercommons.sitemaps.SiteMapURL +
Return the URL's change frequency +
getConnectionTimeout() - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
getContent() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getContentLength() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getContentType() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getCrawlDelay() - +Method in class crawlercommons.robots.BaseRobotRules +
  +
getDefaultMaxContentSize() - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
getDomain() - +Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD +
  +
getEffectiveTLD(String) - +Static method in class crawlercommons.url.EffectiveTldFinder +
  +
getEffectiveTLDs() - +Static method in class crawlercommons.url.EffectiveTldFinder +
  +
getError() - +Method in exception crawlercommons.sitemaps.UnknownFormatException +
  +
getExpanded() - +Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult +
  +
getFetchedUrl() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getFetchTime() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getFullDateFormat() - +Static method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
getHeaders() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getHostAddress() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getHttpHeaders() - +Method in exception crawlercommons.fetcher.HttpFetchException +
  +
getHttpStatus() - +Method in exception crawlercommons.fetcher.HttpFetchException +
  +
getHttpVersion() - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
getInstance() - +Static method in class crawlercommons.url.EffectiveTldFinder +
  +
getLastModified() - +Method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
getLastModified() - +Method in class crawlercommons.sitemaps.SiteMapURL +
Return when this URL was last modified. +
getLocalizedMessage() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
getMaxConnectionsPerHost() - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
getMaxContentSize(String) - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
getMaxFetchTime() - +Static method in class crawlercommons.robots.RobotUtils +
  +
getMaxRedirects() - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
getMaxRetryCount() - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
getMaxThreads() - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
getMessage() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
getMessage() - +Method in exception crawlercommons.fetcher.HttpFetchException +
  +
getMimeTypeFromContentType(String) - +Static method in class crawlercommons.fetcher.BaseFetcher +
  +
getMinResponseRate() - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
Return the minimum response rate. +
getNewBaseUrl() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getNumRedirects() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getNumWarnings() - +Method in class crawlercommons.robots.SimpleRobotRulesParser +
  +
getPayload() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getPLD(String) - +Static method in class crawlercommons.url.PaidLevelDomain +
Extract the PLD (paid-level domain) from the hostname. +
getPLD(URL) - +Static method in class crawlercommons.url.PaidLevelDomain +
Extract the PLD (paid-level domain) from the URL. +
getPriority() - +Method in class crawlercommons.sitemaps.SiteMapURL +
Return this URL's priority (a value between [0.0 - 1.0]). +
getReason() - +Method in exception crawlercommons.fetcher.RedirectFetchException +
  +
getRedirectedUrl() - +Method in exception crawlercommons.fetcher.RedirectFetchException +
  +
getRedirectMode() - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
getResponseRate() - +Method in class crawlercommons.fetcher.FetchedResult +
  +
getRobotRules(BaseHttpFetcher, BaseRobotsParser, URL) - +Static method in class crawlercommons.robots.RobotUtils +
Externally visible, static method for use in tools and for testing. +
getSitemap(URL) - +Method in class crawlercommons.sitemaps.SiteMapIndex +
Returns the Sitemap that has the given URL. +
getSitemaps() - +Method in class crawlercommons.robots.BaseRobotRules +
  +
getSitemaps() - +Method in class crawlercommons.sitemaps.SiteMapIndex +
  +
getSiteMapUrls() - +Method in class crawlercommons.sitemaps.SiteMap +
  +
getSocketTimeout() - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
getStackTrace() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
getType() - +Method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
getUrl() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
getUrl() - +Method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
getUrl() - +Method in class crawlercommons.sitemaps.SiteMapURL +
Return the URL. +
getUserAgent() - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
getUserAgentString() - +Method in class crawlercommons.fetcher.http.UserAgent +
  +
getValidMimeTypes() - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
+
+

+H

+
+
hashCode() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
hashCode() - +Method in class crawlercommons.fetcher.Payload +
  +
hasUnprocessedSitemap() - +Method in class crawlercommons.sitemaps.SiteMapIndex +
  +
HttpFetchException - Exception in crawlercommons.fetcher
 
HttpFetchException() - +Constructor for exception crawlercommons.fetcher.HttpFetchException +
  +
HttpFetchException(String, String, int, Metadata) - +Constructor for exception crawlercommons.fetcher.HttpFetchException +
  +
+
+

+I

+
+
initCause(Throwable) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
initialize(InputStream) - +Method in class crawlercommons.url.EffectiveTldFinder +
  +
IOFetchException - Exception in crawlercommons.fetcher
 
IOFetchException() - +Constructor for exception crawlercommons.fetcher.IOFetchException +
  +
IOFetchException(String, IOException) - +Constructor for exception crawlercommons.fetcher.IOFetchException +
  +
isAllowAll() - +Method in class crawlercommons.robots.BaseRobotRules +
  +
isAllowAll() - +Method in class crawlercommons.robots.SimpleRobotRules +
Is our ruleset set up to allow all access? +
isAllowed(String) - +Method in class crawlercommons.robots.BaseRobotRules +
  +
isAllowed(String) - +Method in class crawlercommons.robots.SimpleRobotRules +
  +
isAllowNone() - +Method in class crawlercommons.robots.BaseRobotRules +
  +
isAllowNone() - +Method in class crawlercommons.robots.SimpleRobotRules +
Is our ruleset set up to disallow all access? +
isConfigured() - +Method in class crawlercommons.url.EffectiveTldFinder +
  +
isDeferVisits() - +Method in class crawlercommons.robots.BaseRobotRules +
  +
isEmpty() - +Method in class crawlercommons.fetcher.Payload +
  +
isException() - +Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD +
  +
isIndex() - +Method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
isIndex() - +Method in class crawlercommons.sitemaps.SiteMap +
  +
isIndex() - +Method in class crawlercommons.sitemaps.SiteMapIndex +
  +
isProcessed() - +Method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
isTruncated() - +Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult +
  +
isWild() - +Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD +
  +
+
+

+K

+
+
keySet() - +Method in class crawlercommons.fetcher.Payload +
  +
+
+

+L

+
+
LOG - +Static variable in class crawlercommons.sitemaps.SiteMapParser +
  +
+
+

+M

+
+
MAX_BYTES_ALLOWED - +Static variable in class crawlercommons.sitemaps.SiteMapParser +
Sitemap docs must be limited to 10MB (10,485,760 bytes) +
+
+

+N

+
+
nextUnprocessedSitemap() - +Method in class crawlercommons.sitemaps.SiteMapIndex +
  +
NO_MIN_RESPONSE_RATE - +Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
NO_REDIRECTS - +Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
+
+

+P

+
+
PaidLevelDomain - Class in crawlercommons.url
Routines to extract the PLD (paid-level domain, as per the IRLbot paper) from a hostname or URL.
PaidLevelDomain() - +Constructor for class crawlercommons.url.PaidLevelDomain +
  +
parseContent(String, byte[], String, String) - +Method in class crawlercommons.robots.BaseRobotsParser +
Parse the robots.txt file in , and return rules appropriate for + processing paths by +
parseContent(String, byte[], String, String) - +Method in class crawlercommons.robots.SimpleRobotRulesParser +
  +
parseSiteMap(String, byte[], AbstractSiteMap) - +Method in class crawlercommons.sitemaps.SiteMapParser +
Returned a processed copy of an unprocessed sitemap object, i.e. +
parseSiteMap(String, byte[], URL) - +Method in class crawlercommons.sitemaps.SiteMapParser +
Returns a SiteMap or SiteMapIndex given a content type, byte content and + the URL of a sitemap +
Payload - Class in crawlercommons.fetcher
 
Payload() - +Constructor for class crawlercommons.fetcher.Payload +
  +
printStackTrace() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
printStackTrace(PrintStream) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
printStackTrace(PrintWriter) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
processDeflateEncoded(byte[]) - +Static method in class crawlercommons.fetcher.EncodingUtils +
  +
processDeflateEncoded(byte[], int) - +Static method in class crawlercommons.fetcher.EncodingUtils +
  +
processGzipEncoded(byte[]) - +Static method in class crawlercommons.fetcher.EncodingUtils +
  +
processGzipEncoded(byte[], int) - +Static method in class crawlercommons.fetcher.EncodingUtils +
  +
put(String, Object) - +Method in class crawlercommons.fetcher.Payload +
  +
putAll(Map<? extends String, ? extends Object>) - +Method in class crawlercommons.fetcher.Payload +
  +
+
+

+R

+
+
readBaseFields(DataInput) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
RedirectFetchException - Exception in crawlercommons.fetcher
 
RedirectFetchException() - +Constructor for exception crawlercommons.fetcher.RedirectFetchException +
  +
RedirectFetchException(String, String, RedirectFetchException.RedirectExceptionReason) - +Constructor for exception crawlercommons.fetcher.RedirectFetchException +
  +
RedirectFetchException.RedirectExceptionReason - Enum in crawlercommons.fetcher
 
remove(Object) - +Method in class crawlercommons.fetcher.Payload +
  +
RobotUtils - Class in crawlercommons.robots
 
RobotUtils() - +Constructor for class crawlercommons.robots.RobotUtils +
  +
+
+

+S

+
+
setAcceptLanguage(String) - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
setChangeFrequency(SiteMapURL.ChangeFrequency) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set the URL's change frequency +
setChangeFrequency(String) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set the URL's change frequency +
setConnectionTimeout(int) - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
setCrawlDelay(long) - +Method in class crawlercommons.robots.BaseRobotRules +
  +
setDefaultMaxContentSize(int) - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
setDeferVisits(boolean) - +Method in class crawlercommons.robots.BaseRobotRules +
  +
setExpanded(byte[]) - +Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult +
  +
setHttpVersion(HttpVersion) - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
setLastModified(Date) - +Method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
setLastModified(String) - +Method in class crawlercommons.sitemaps.AbstractSiteMap +
  +
setLastModified(String) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set when this URL was last modified. +
setLastModified(Date) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set when this URL was last modified. +
setMaxConnectionsPerHost(int) - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
setMaxContentSize(String, int) - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
setMaxRedirects(int) - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
setMaxRetryCount(int) - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
setMinResponseRate(int) - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
setPayload(Payload) - +Method in class crawlercommons.fetcher.FetchedResult +
  +
setPriority(double) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the + given priority is out of range). +
setPriority(String) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the + given priority is out of range). +
setRedirectMode(BaseHttpFetcher.RedirectMode) - +Method in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
setSocketTimeout(int) - +Method in class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
setStackTrace(StackTraceElement[]) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
setTruncated(boolean) - +Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult +
  +
setUrl(URL) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set the URL. +
setUrl(String) - +Method in class crawlercommons.sitemaps.SiteMapURL +
Set the URL. +
setValidMimeTypes(Set<String>) - +Method in class crawlercommons.fetcher.BaseFetcher +
  +
SimpleFileFetcher - Class in crawlercommons.fetcher.file
 
SimpleFileFetcher() - +Constructor for class crawlercommons.fetcher.file.SimpleFileFetcher +
  +
SimpleHttpFetcher - Class in crawlercommons.fetcher.http
 
SimpleHttpFetcher(UserAgent) - +Constructor for class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
SimpleHttpFetcher(int, UserAgent) - +Constructor for class crawlercommons.fetcher.http.SimpleHttpFetcher +
  +
SimpleRobotRules - Class in crawlercommons.robots
Result from parsing a single robots.txt file - which means we + get a set of rules, and a crawl-delay.
SimpleRobotRules() - +Constructor for class crawlercommons.robots.SimpleRobotRules +
  +
SimpleRobotRules(SimpleRobotRules.RobotRulesMode) - +Constructor for class crawlercommons.robots.SimpleRobotRules +
  +
SimpleRobotRules.RobotRule - Class in crawlercommons.robots
Single rule that maps from a path prefix to an allow flag.
SimpleRobotRules.RobotRule(String, boolean) - +Constructor for class crawlercommons.robots.SimpleRobotRules.RobotRule +
  +
SimpleRobotRules.RobotRule(Pattern, boolean) - +Constructor for class crawlercommons.robots.SimpleRobotRules.RobotRule +
  +
SimpleRobotRules.RobotRulesMode - Enum in crawlercommons.robots
 
SimpleRobotRulesParser - Class in crawlercommons.robots
 
SimpleRobotRulesParser() - +Constructor for class crawlercommons.robots.SimpleRobotRulesParser +
  +
SiteMap - Class in crawlercommons.sitemaps
 
SiteMap() - +Constructor for class crawlercommons.sitemaps.SiteMap +
  +
SiteMap(URL) - +Constructor for class crawlercommons.sitemaps.SiteMap +
  +
SiteMap(String) - +Constructor for class crawlercommons.sitemaps.SiteMap +
  +
SiteMap(URL, Date) - +Constructor for class crawlercommons.sitemaps.SiteMap +
  +
SiteMap(String, String) - +Constructor for class crawlercommons.sitemaps.SiteMap +
  +
SiteMapIndex - Class in crawlercommons.sitemaps
 
SiteMapIndex() - +Constructor for class crawlercommons.sitemaps.SiteMapIndex +
  +
SiteMapIndex(URL) - +Constructor for class crawlercommons.sitemaps.SiteMapIndex +
  +
SiteMapParser - Class in crawlercommons.sitemaps
 
SiteMapParser() - +Constructor for class crawlercommons.sitemaps.SiteMapParser +
  +
SiteMapURL - Class in crawlercommons.sitemaps
The SitemapUrl class represents a URL found in a Sitemap.
SiteMapURL(String) - +Constructor for class crawlercommons.sitemaps.SiteMapURL +
  +
SiteMapURL(URL) - +Constructor for class crawlercommons.sitemaps.SiteMapURL +
  +
SiteMapURL(String, String, String, String) - +Constructor for class crawlercommons.sitemaps.SiteMapURL +
  +
SiteMapURL(URL, Date, SiteMapURL.ChangeFrequency, double) - +Constructor for class crawlercommons.sitemaps.SiteMapURL +
  +
SiteMapURL.ChangeFrequency - Enum in crawlercommons.sitemaps
Allowed change frequencies
size() - +Method in class crawlercommons.fetcher.Payload +
  +
+
+

+T

+
+
toString() - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
toString() - +Method in class crawlercommons.sitemaps.SiteMap +
  +
toString() - +Method in class crawlercommons.sitemaps.SiteMapIndex +
  +
toString() - +Method in class crawlercommons.sitemaps.SiteMapURL +
  +
toString() - +Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD +
  +
+
+

+U

+
+
UnknownFormatException - Exception in crawlercommons.sitemaps
 
UnknownFormatException() - +Constructor for exception crawlercommons.sitemaps.UnknownFormatException +
  +
UnknownFormatException(String) - +Constructor for exception crawlercommons.sitemaps.UnknownFormatException +
  +
UNSET_CRAWL_DELAY - +Static variable in class crawlercommons.robots.BaseRobotRules +
  +
UrlFetchException - Exception in crawlercommons.fetcher
 
UrlFetchException() - +Constructor for exception crawlercommons.fetcher.UrlFetchException +
  +
UrlFetchException(String, String) - +Constructor for exception crawlercommons.fetcher.UrlFetchException +
  +
UserAgent - Class in crawlercommons.fetcher.http
 
UserAgent(String, String, String) - +Constructor for class crawlercommons.fetcher.http.UserAgent +
  +
UserAgent(String, String, String, String) - +Constructor for class crawlercommons.fetcher.http.UserAgent +
  +
UserAgent(String, String, String, String, String) - +Constructor for class crawlercommons.fetcher.http.UserAgent +
  +
+
+

+V

+
+
valueOf(String) - +Static method in enum crawlercommons.fetcher.AbortedFetchReason +
Returns the enum constant of this type with the specified name. +
valueOf(String) - +Static method in enum crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode +
Returns the enum constant of this type with the specified name. +
valueOf(String) - +Static method in enum crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason +
Returns the enum constant of this type with the specified name. +
valueOf(String) - +Static method in enum crawlercommons.robots.SimpleRobotRules.RobotRulesMode +
Returns the enum constant of this type with the specified name. +
valueOf(String) - +Static method in enum crawlercommons.sitemaps.AbstractSiteMap.SitemapType +
Returns the enum constant of this type with the specified name. +
valueOf(String) - +Static method in enum crawlercommons.sitemaps.SiteMapURL.ChangeFrequency +
Returns the enum constant of this type with the specified name. +
values() - +Static method in enum crawlercommons.fetcher.AbortedFetchReason +
Returns an array containing the constants of this enum type, in +the order they are declared. +
values() - +Static method in enum crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode +
Returns an array containing the constants of this enum type, in +the order they are declared. +
values() - +Method in class crawlercommons.fetcher.Payload +
  +
values() - +Static method in enum crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason +
Returns an array containing the constants of this enum type, in +the order they are declared. +
values() - +Static method in enum crawlercommons.robots.SimpleRobotRules.RobotRulesMode +
Returns an array containing the constants of this enum type, in +the order they are declared. +
values() - +Static method in enum crawlercommons.sitemaps.AbstractSiteMap.SitemapType +
Returns an array containing the constants of this enum type, in +the order they are declared. +
values() - +Static method in enum crawlercommons.sitemaps.SiteMapURL.ChangeFrequency +
Returns an array containing the constants of this enum type, in +the order they are declared. +
+
+

+W

+
+
WILD_CARD - +Static variable in class crawlercommons.url.EffectiveTldFinder +
  +
writeBaseFields(DataOutput) - +Method in exception crawlercommons.fetcher.BaseFetchException +
  +
+
+

+_

+
+
_acceptLanguage - +Variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
_defaultMaxContentSize - +Variable in class crawlercommons.fetcher.BaseFetcher +
  +
_maxConnectionsPerHost - +Variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
_maxContentSizes - +Variable in class crawlercommons.fetcher.BaseFetcher +
  +
_maxRedirects - +Variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
_maxThreads - +Variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
_minResponseRate - +Variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
_redirectMode - +Variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
_userAgent - +Variable in class crawlercommons.fetcher.http.BaseHttpFetcher +
  +
_validMimeTypes - +Variable in class crawlercommons.fetcher.BaseFetcher +
  +
+
+A B C D E F G H I K L M N P R S T U V W _ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/index.html b/doc/javadoc/index.html new file mode 100644 index 0000000..5d8354d --- /dev/null +++ b/doc/javadoc/index.html @@ -0,0 +1,39 @@ + + + + + + +crawlercommons 0.2-SNAPSHOT API + + + + + + + + + + + +<H2> +Frame Alert</H2> + +<P> +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. +<BR> +Link to<A HREF="overview-summary.html">Non-frame version.</A> + + + diff --git a/doc/javadoc/overview-frame.html b/doc/javadoc/overview-frame.html new file mode 100644 index 0000000..a59de77 --- /dev/null +++ b/doc/javadoc/overview-frame.html @@ -0,0 +1,52 @@ + + + + + + +Overview List (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + + + + +
+
+ + + + + +
All Classes +

+ +Packages +
+crawlercommons.fetcher +
+crawlercommons.fetcher.file +
+crawlercommons.fetcher.http +
+crawlercommons.robots +
+crawlercommons.sitemaps +
+crawlercommons.url +
+

+ +

+  + + diff --git a/doc/javadoc/overview-summary.html b/doc/javadoc/overview-summary.html new file mode 100644 index 0000000..ab2f352 --- /dev/null +++ b/doc/javadoc/overview-summary.html @@ -0,0 +1,176 @@ + + + + + + +Overview (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+crawlercommons 0.2-SNAPSHOT API +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Crawler-commons
crawlercommons.fetcher 
crawlercommons.fetcher.file 
crawlercommons.fetcher.http 
crawlercommons.robots 
crawlercommons.sitemaps 
crawlercommons.url 
+ +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/overview-tree.html b/doc/javadoc/overview-tree.html new file mode 100644 index 0000000..9a010a5 --- /dev/null +++ b/doc/javadoc/overview-tree.html @@ -0,0 +1,183 @@ + + + + + + +Class Hierarchy (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For All Packages

+
+
+
Package Hierarchies:
crawlercommons.fetcher, crawlercommons.fetcher.file, crawlercommons.fetcher.http, crawlercommons.robots, crawlercommons.sitemaps, crawlercommons.url
+
+

+Class Hierarchy +

+ +

+Enum Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/package-list b/doc/javadoc/package-list new file mode 100644 index 0000000..b45f53c --- /dev/null +++ b/doc/javadoc/package-list @@ -0,0 +1,6 @@ +crawlercommons.fetcher +crawlercommons.fetcher.file +crawlercommons.fetcher.http +crawlercommons.robots +crawlercommons.sitemaps +crawlercommons.url diff --git a/doc/javadoc/resources/inherit.gif b/doc/javadoc/resources/inherit.gif new file mode 100644 index 0000000..c814867 Binary files /dev/null and b/doc/javadoc/resources/inherit.gif differ diff --git a/doc/javadoc/serialized-form.html b/doc/javadoc/serialized-form.html new file mode 100644 index 0000000..911e1de --- /dev/null +++ b/doc/javadoc/serialized-form.html @@ -0,0 +1,671 @@ + + + + + + +Serialized Form (crawlercommons 0.2-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Serialized Form

+
+
+ + + + + +
+Package crawlercommons.fetcher
+ +

+ + + + + +
+Class crawlercommons.fetcher.AbortedFetchException extends BaseFetchException implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_abortReason

+
+AbortedFetchReason _abortReason
+
+
+
+
+ +

+ + + + + +
+Class crawlercommons.fetcher.BadProtocolFetchException extends BaseFetchException implements Serializable
+ +

+ +

+ + + + + +
+Class crawlercommons.fetcher.BaseFetcher extends java.lang.Object implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_maxContentSizes

+
+java.util.Map<K,V> _maxContentSizes
+
+
+
+
+
+

+_defaultMaxContentSize

+
+int _defaultMaxContentSize
+
+
+
+
+
+

+_validMimeTypes

+
+java.util.Set<E> _validMimeTypes
+
+
+
+
+ +

+ + + + + +
+Class crawlercommons.fetcher.BaseFetchException extends java.lang.Exception implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_url

+
+java.lang.String _url
+
+
+
+
+
+

+_exception

+
+java.lang.Exception _exception
+
+
+
+
+ +

+ + + + + +
+Class crawlercommons.fetcher.HttpFetchException extends BaseFetchException implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_httpStatus

+
+int _httpStatus
+
+
+
+
+
+

+_httpHeaders

+
+org.apache.tika.metadata.Metadata _httpHeaders
+
+
+
+
+ +

+ + + + + +
+Class crawlercommons.fetcher.IOFetchException extends BaseFetchException implements Serializable
+ +

+ +

+ + + + + +
+Class crawlercommons.fetcher.RedirectFetchException extends BaseFetchException implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_redirectedUrl

+
+java.lang.String _redirectedUrl
+
+
+
+
+
+

+_reason

+
+RedirectFetchException.RedirectExceptionReason _reason
+
+
+
+
+ +

+ + + + + +
+Class crawlercommons.fetcher.UrlFetchException extends BaseFetchException implements Serializable
+ +

+


+ + + + + +
+Package crawlercommons.fetcher.file
+ +

+ + + + + +
+Class crawlercommons.fetcher.file.SimpleFileFetcher extends BaseFetcher implements Serializable
+ +

+


+ + + + + +
+Package crawlercommons.fetcher.http
+ +

+ + + + + +
+Class crawlercommons.fetcher.http.BaseHttpFetcher extends BaseFetcher implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_maxThreads

+
+int _maxThreads
+
+
+
+
+
+

+_userAgent

+
+UserAgent _userAgent
+
+
+
+
+
+

+_maxRedirects

+
+int _maxRedirects
+
+
+
+
+
+

+_maxConnectionsPerHost

+
+int _maxConnectionsPerHost
+
+
+
+
+
+

+_minResponseRate

+
+int _minResponseRate
+
+
+
+
+
+

+_acceptLanguage

+
+java.lang.String _acceptLanguage
+
+
+
+
+
+

+_redirectMode

+
+BaseHttpFetcher.RedirectMode _redirectMode
+
+
+
+
+ +

+ + + + + +
+Class crawlercommons.fetcher.http.SimpleHttpFetcher extends BaseHttpFetcher implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_httpVersion

+
+org.apache.http.HttpVersion _httpVersion
+
+
+
+
+
+

+_socketTimeout

+
+int _socketTimeout
+
+
+
+
+
+

+_connectionTimeout

+
+int _connectionTimeout
+
+
+
+
+
+

+_maxRetryCount

+
+int _maxRetryCount
+
+
+
+
+ +

+ + + + + +
+Class crawlercommons.fetcher.http.UserAgent extends java.lang.Object implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_agentName

+
+java.lang.String _agentName
+
+
+
+
+
+

+_emailAddress

+
+java.lang.String _emailAddress
+
+
+
+
+
+

+_webAddress

+
+java.lang.String _webAddress
+
+
+
+
+
+

+_browserVersion

+
+java.lang.String _browserVersion
+
+
+
+
+
+

+_crawlerVersion

+
+java.lang.String _crawlerVersion
+
+
+
+
+
+ + + + + +
+Package crawlercommons.robots
+ +

+ + + + + +
+Class crawlercommons.robots.BaseRobotsParser extends java.lang.Object implements Serializable
+ +

+ +

+ + + + + +
+Class crawlercommons.robots.SimpleRobotRulesParser extends BaseRobotsParser implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+_numWarnings

+
+int _numWarnings
+
+
+
+
+
+ + + + + +
+Package crawlercommons.sitemaps
+ +

+ + + + + +
+Class crawlercommons.sitemaps.UnknownFormatException extends java.lang.Exception implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+error

+
+java.lang.String error
+
+
+
+
+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/javadoc/stylesheet.css b/doc/javadoc/stylesheet.css new file mode 100644 index 0000000..6ea9e51 --- /dev/null +++ b/doc/javadoc/stylesheet.css @@ -0,0 +1,29 @@ +/* Javadoc style sheet */ + +/* Define colors, fonts and other style attributes here to override the defaults */ + +/* Page background color */ +body { background-color: #FFFFFF; color:#000000 } + +/* Headings */ +h1 { font-size: 145% } + +/* Table colors */ +.TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */ +.TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */ +.TableRowColor { background: #FFFFFF; color:#000000 } /* White */ + +/* Font used in left-hand frame lists */ +.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } + +/* Navigation bar fonts and colors */ +.NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */ +.NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */ +.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;} +.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;} + +.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} +.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} +