diff --git a/doc/javadoc/allclasses-frame.html b/doc/javadoc/allclasses-frame.html
new file mode 100644
index 0000000..e3f731d
--- /dev/null
+++ b/doc/javadoc/allclasses-frame.html
@@ -0,0 +1,101 @@
+
+
+
+
+
+
+All Classes (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+All Classes
+
+
+
+
+
+
diff --git a/doc/javadoc/allclasses-noframe.html b/doc/javadoc/allclasses-noframe.html
new file mode 100644
index 0000000..aecab4c
--- /dev/null
+++ b/doc/javadoc/allclasses-noframe.html
@@ -0,0 +1,101 @@
+
+
+
+
+
+
+All Classes (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+All Classes
+
+
+
+
+
+
diff --git a/doc/javadoc/constant-values.html b/doc/javadoc/constant-values.html
new file mode 100644
index 0000000..13141e9
--- /dev/null
+++ b/doc/javadoc/constant-values.html
@@ -0,0 +1,326 @@
+
+
+
+
+
+
+Constant Field Values (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Constant Field Values
+
+
+Contents
+
+
+
+
+
+crawlercommons.fetcher.*
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots.*
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.url.*
+
+
+
+
+
+
+
+crawlercommons.url.EffectiveTldFinder
+
+
+
+public static final java.lang.String
+COMMENT
+"//"
+
+
+
+public static final char
+DOT
+46
+
+
+
+public static final java.lang.String
+DOT_REGEX
+"\\."
+
+
+
+public static final java.lang.String
+ETLD_DATA
+"/effective_tld_names.dat"
+
+
+
+public static final java.lang.String
+EXCEPTION
+"!"
+
+
+
+public static final java.lang.String
+WILD_CARD
+"*."
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/AbortedFetchException.html b/doc/javadoc/crawlercommons/fetcher/AbortedFetchException.html
new file mode 100644
index 0000000..6f66ef8
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/AbortedFetchException.html
@@ -0,0 +1,312 @@
+
+
+
+
+
+
+AbortedFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class AbortedFetchException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.fetcher.BaseFetchException
+ crawlercommons.fetcher.AbortedFetchException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class AbortedFetchException extends BaseFetchException
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.BaseFetchException
+
+
+compareToBase , equals , getCause , getLocalizedMessage , getMessage , getStackTrace , getUrl , hashCode , initCause , printStackTrace , printStackTrace , printStackTrace , readBaseFields , setStackTrace , toString , writeBaseFields
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+AbortedFetchException
+
+public AbortedFetchException ()
+
+
+
+
+
+AbortedFetchException
+
+public AbortedFetchException (java.lang.String url,
+ AbortedFetchReason abortReason)
+
+
+
+
+
+AbortedFetchException
+
+public AbortedFetchException (java.lang.String url,
+ java.lang.String msg,
+ AbortedFetchReason abortReason)
+
+
+
+
+
+
+
+
+
+getAbortReason
+
+public AbortedFetchReason getAbortReason ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/AbortedFetchReason.html b/doc/javadoc/crawlercommons/fetcher/AbortedFetchReason.html
new file mode 100644
index 0000000..675468a
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/AbortedFetchReason.html
@@ -0,0 +1,354 @@
+
+
+
+
+
+
+AbortedFetchReason (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Enum AbortedFetchReason
+
+java.lang.Object
+ java.lang.Enum<AbortedFetchReason >
+ crawlercommons.fetcher.AbortedFetchReason
+
+
+All Implemented Interfaces: java.io.Serializable, java.lang.Comparable<AbortedFetchReason >
+
+
+
+public enum AbortedFetchReason extends java.lang.Enum<AbortedFetchReason >
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+static AbortedFetchReason
+valueOf (java.lang.String name)
+
+
+ Returns the enum constant of this type with the specified name.
+
+
+
+static AbortedFetchReason []
+values ()
+
+
+ Returns an array containing the constants of this enum type, in
+the order they are declared.
+
+
+
+
+
+Methods inherited from class java.lang.Enum
+
+
+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Enum Constant Detail
+
+
+
+
+SLOW_RESPONSE_RATE
+
+public static final AbortedFetchReason SLOW_RESPONSE_RATE
+
+
+
+
+
+
+
+INVALID_MIMETYPE
+
+public static final AbortedFetchReason INVALID_MIMETYPE
+
+
+
+
+
+
+
+INTERRUPTED
+
+public static final AbortedFetchReason INTERRUPTED
+
+
+
+
+
+
+
+CONTENT_SIZE
+
+public static final AbortedFetchReason CONTENT_SIZE
+
+
+
+
+
+
+
+
+
+
+
+values
+
+public static AbortedFetchReason [] values ()
+
+Returns an array containing the constants of this enum type, in
+the order they are declared. This method may be used to iterate
+over the constants as follows:
+
+for (AbortedFetchReason c : AbortedFetchReason.values())
+ System.out.println(c);
+
+
+
+
+Returns: an array containing the constants of this enum type, in
+the order they are declared
+
+
+
+
+
+valueOf
+
+public static AbortedFetchReason valueOf (java.lang.String name)
+
+Returns the enum constant of this type with the specified name.
+The string must match exactly an identifier used to declare an
+enum constant in this type. (Extraneous whitespace characters are
+not permitted.)
+
+
+Parameters: name
- the name of the enum constant to be returned.
+Returns: the enum constant with the specified name
+ Throws:
+java.lang.IllegalArgumentException
- if this enum type has no constant
+with the specified name
+java.lang.NullPointerException
- if the argument is null
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/BadProtocolFetchException.html b/doc/javadoc/crawlercommons/fetcher/BadProtocolFetchException.html
new file mode 100644
index 0000000..10efa46
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/BadProtocolFetchException.html
@@ -0,0 +1,264 @@
+
+
+
+
+
+
+BadProtocolFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class BadProtocolFetchException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.fetcher.BaseFetchException
+ crawlercommons.fetcher.BadProtocolFetchException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class BadProtocolFetchException extends BaseFetchException
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.BaseFetchException
+
+
+compareToBase , equals , getCause , getLocalizedMessage , getMessage , getStackTrace , getUrl , hashCode , initCause , printStackTrace , printStackTrace , printStackTrace , readBaseFields , setStackTrace , toString , writeBaseFields
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+BadProtocolFetchException
+
+public BadProtocolFetchException ()
+
+
+
+
+
+BadProtocolFetchException
+
+public BadProtocolFetchException (java.lang.String url)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/BaseFetchException.html b/doc/javadoc/crawlercommons/fetcher/BaseFetchException.html
new file mode 100644
index 0000000..a75b347
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/BaseFetchException.html
@@ -0,0 +1,672 @@
+
+
+
+
+
+
+BaseFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class BaseFetchException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.fetcher.BaseFetchException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+Direct Known Subclasses: AbortedFetchException , BadProtocolFetchException , HttpFetchException , IOFetchException , RedirectFetchException , UrlFetchException
+
+
+
+public abstract class BaseFetchException extends java.lang.Exception
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+BaseFetchException
+
+protected BaseFetchException ()
+
+
+
+
+
+BaseFetchException
+
+protected BaseFetchException (java.lang.String url)
+
+
+
+
+
+BaseFetchException
+
+protected BaseFetchException (java.lang.String url,
+ java.lang.String msg)
+
+
+
+
+
+BaseFetchException
+
+protected BaseFetchException (java.lang.String url,
+ java.lang.Exception e)
+
+
+
+
+
+BaseFetchException
+
+protected BaseFetchException (java.lang.String url,
+ java.lang.String msg,
+ java.lang.Exception e)
+
+
+
+
+
+
+
+
+
+getUrl
+
+public java.lang.String getUrl ()
+
+
+
+
+
+
+
+
+compareToBase
+
+protected int compareToBase (BaseFetchException e)
+
+
+
+
+
+
+
+
+equals
+
+public boolean equals (java.lang.Object obj)
+
+
+Overrides: equals
in class java.lang.Object
+
+
+
+
+
+
+
+
+getCause
+
+public java.lang.Throwable getCause ()
+
+
+Overrides: getCause
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+getLocalizedMessage
+
+public java.lang.String getLocalizedMessage ()
+
+
+Overrides: getLocalizedMessage
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+getMessage
+
+public java.lang.String getMessage ()
+
+
+Overrides: getMessage
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+getStackTrace
+
+public java.lang.StackTraceElement[] getStackTrace ()
+
+
+Overrides: getStackTrace
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+hashCode
+
+public int hashCode ()
+
+
+Overrides: hashCode
in class java.lang.Object
+
+
+
+
+
+
+
+
+initCause
+
+public java.lang.Throwable initCause (java.lang.Throwable cause)
+
+
+Overrides: initCause
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+printStackTrace
+
+public void printStackTrace ()
+
+
+Overrides: printStackTrace
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+printStackTrace
+
+public void printStackTrace (java.io.PrintStream s)
+
+
+Overrides: printStackTrace
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+printStackTrace
+
+public void printStackTrace (java.io.PrintWriter s)
+
+
+Overrides: printStackTrace
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+setStackTrace
+
+public void setStackTrace (java.lang.StackTraceElement[] stackTrace)
+
+
+Overrides: setStackTrace
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+toString
+
+public java.lang.String toString ()
+
+
+Overrides: toString
in class java.lang.Throwable
+
+
+
+
+
+
+
+
+readBaseFields
+
+protected void readBaseFields (java.io.DataInput input)
+ throws java.io.IOException
+
+
+
+Throws:
+java.io.IOException
+
+
+
+
+
+writeBaseFields
+
+protected void writeBaseFields (java.io.DataOutput output)
+ throws java.io.IOException
+
+
+
+Throws:
+java.io.IOException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/BaseFetcher.html b/doc/javadoc/crawlercommons/fetcher/BaseFetcher.html
new file mode 100644
index 0000000..73232df
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/BaseFetcher.html
@@ -0,0 +1,610 @@
+
+
+
+
+
+
+BaseFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class BaseFetcher
+
+java.lang.Object
+ crawlercommons.fetcher.BaseFetcher
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+Direct Known Subclasses: BaseHttpFetcher , SimpleFileFetcher
+
+
+
+public abstract class BaseFetcher extends java.lang.Objectimplements java.io.Serializable
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+DEFAULT_MAX_CONTENT_SIZE
+
+public static final int DEFAULT_MAX_CONTENT_SIZE
+
+
+See Also: Constant Field Values
+
+
+
+
+_maxContentSizes
+
+protected java.util.Map<java.lang.String,java.lang.Integer> _maxContentSizes
+
+
+
+
+
+
+
+_defaultMaxContentSize
+
+protected int _defaultMaxContentSize
+
+
+
+
+
+
+
+_validMimeTypes
+
+protected java.util.Set<java.lang.String> _validMimeTypes
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+BaseFetcher
+
+public BaseFetcher ()
+
+
+
+
+
+
+
+
+
+setDefaultMaxContentSize
+
+public void setDefaultMaxContentSize (int defaultMaxContentSize)
+
+
+
+
+
+
+
+
+
+
+
+getDefaultMaxContentSize
+
+public int getDefaultMaxContentSize ()
+
+
+
+
+
+
+
+
+
+
+
+setMaxContentSize
+
+public void setMaxContentSize (java.lang.String mimeType,
+ int maxContentSize)
+
+
+
+
+
+
+
+
+
+
+
+getMaxContentSize
+
+public int getMaxContentSize (java.lang.String mimeType)
+
+
+
+
+
+
+
+
+
+
+
+getValidMimeTypes
+
+public java.util.Set<java.lang.String> getValidMimeTypes ()
+
+
+
+
+
+
+
+
+
+
+
+setValidMimeTypes
+
+public void setValidMimeTypes (java.util.Set<java.lang.String> validMimeTypes)
+
+
+
+
+
+
+
+
+
+
+
+addValidMimeTypes
+
+public void addValidMimeTypes (java.util.Set<java.lang.String> validMimeTypes)
+
+
+
+
+
+
+
+
+
+
+
+addValidMimeType
+
+public void addValidMimeType (java.lang.String validMimeType)
+
+
+
+
+
+
+
+
+
+
+
+get
+
+public FetchedResult get (java.lang.String url)
+ throws BaseFetchException
+
+
+
+
+
+
+Throws:
+BaseFetchException
+
+
+
+
+
+getMimeTypeFromContentType
+
+protected static java.lang.String getMimeTypeFromContentType (java.lang.String contentType)
+
+
+
+
+
+
+
+
+
+
+
+get
+
+public abstract FetchedResult get (java.lang.String url,
+ Payload payload)
+ throws BaseFetchException
+
+Get the content stored in the resource referenced by
+
+
+
+
+
+Parameters: url
- payload
-
+Returns:
+ Throws:
+BaseFetchException
+
+
+
+
+
+abort
+
+public abstract void abort ()
+
+Terminate any async request being processed.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/EncodingUtils.ExpandedResult.html b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.ExpandedResult.html
new file mode 100644
index 0000000..92d1410
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.ExpandedResult.html
@@ -0,0 +1,314 @@
+
+
+
+
+
+
+EncodingUtils.ExpandedResult (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class EncodingUtils.ExpandedResult
+
+java.lang.Object
+ crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+
+Enclosing class: EncodingUtils
+
+
+
+public static class EncodingUtils.ExpandedResult extends java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+EncodingUtils.ExpandedResult
+
+public EncodingUtils.ExpandedResult (byte[] expanded,
+ boolean isTruncated)
+
+
+
+
+
+
+
+
+
+getExpanded
+
+public byte[] getExpanded ()
+
+
+
+
+
+
+
+
+setExpanded
+
+public void setExpanded (byte[] expanded)
+
+
+
+
+
+
+
+
+isTruncated
+
+public boolean isTruncated ()
+
+
+
+
+
+
+
+
+setTruncated
+
+public void setTruncated (boolean isTruncated)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/EncodingUtils.html b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.html
new file mode 100644
index 0000000..1daf7ce
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/EncodingUtils.html
@@ -0,0 +1,342 @@
+
+
+
+
+
+
+EncodingUtils (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class EncodingUtils
+
+java.lang.Object
+ crawlercommons.fetcher.EncodingUtils
+
+
+
+public class EncodingUtils extends java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+EncodingUtils
+
+public EncodingUtils ()
+
+
+
+
+
+
+
+
+
+processGzipEncoded
+
+public static byte[] processGzipEncoded (byte[] compressed)
+ throws java.io.IOException
+
+
+
+Throws:
+java.io.IOException
+
+
+
+
+
+processGzipEncoded
+
+public static EncodingUtils.ExpandedResult processGzipEncoded (byte[] compressed,
+ int sizeLimit)
+ throws java.io.IOException
+
+
+
+Throws:
+java.io.IOException
+
+
+
+
+
+processDeflateEncoded
+
+public static byte[] processDeflateEncoded (byte[] content)
+ throws java.io.IOException
+
+
+
+Throws:
+java.io.IOException
+
+
+
+
+
+processDeflateEncoded
+
+public static byte[] processDeflateEncoded (byte[] compressed,
+ int sizeLimit)
+ throws java.io.IOException
+
+
+
+Throws:
+java.io.IOException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/FetchedResult.html b/doc/javadoc/crawlercommons/fetcher/FetchedResult.html
new file mode 100644
index 0000000..3589ab3
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/FetchedResult.html
@@ -0,0 +1,500 @@
+
+
+
+
+
+
+FetchedResult (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class FetchedResult
+
+java.lang.Object
+ crawlercommons.fetcher.FetchedResult
+
+
+
+public class FetchedResult extends java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+
+Constructor Summary
+
+
+FetchedResult (java.lang.String baseUrl,
+ java.lang.String redirectedUrl,
+ long fetchTime,
+ org.apache.tika.metadata.Metadata headers,
+ byte[] content,
+ java.lang.String contentType,
+ int responseRate,
+ Payload payload,
+ java.lang.String newBaseUrl,
+ int numRedirects,
+ java.lang.String hostAddress)
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+FetchedResult
+
+public FetchedResult (java.lang.String baseUrl,
+ java.lang.String redirectedUrl,
+ long fetchTime,
+ org.apache.tika.metadata.Metadata headers,
+ byte[] content,
+ java.lang.String contentType,
+ int responseRate,
+ Payload payload,
+ java.lang.String newBaseUrl,
+ int numRedirects,
+ java.lang.String hostAddress)
+
+
+
+
+
+
+
+
+
+getPayload
+
+public Payload getPayload ()
+
+
+
+
+
+
+
+
+setPayload
+
+public void setPayload (Payload payload)
+
+
+
+
+
+
+
+
+getBaseUrl
+
+public java.lang.String getBaseUrl ()
+
+
+
+
+
+
+
+
+getFetchedUrl
+
+public java.lang.String getFetchedUrl ()
+
+
+
+
+
+
+
+
+getFetchTime
+
+public long getFetchTime ()
+
+
+
+
+
+
+
+
+getContent
+
+public byte[] getContent ()
+
+
+
+
+
+
+
+
+getContentLength
+
+public int getContentLength ()
+
+
+
+
+
+
+
+
+getContentType
+
+public java.lang.String getContentType ()
+
+
+
+
+
+
+
+
+getResponseRate
+
+public int getResponseRate ()
+
+
+
+
+
+
+
+
+getHeaders
+
+public org.apache.tika.metadata.Metadata getHeaders ()
+
+
+
+
+
+
+
+
+getNewBaseUrl
+
+public java.lang.String getNewBaseUrl ()
+
+
+
+
+
+
+
+
+getNumRedirects
+
+public int getNumRedirects ()
+
+
+
+
+
+
+
+
+getHostAddress
+
+public java.lang.String getHostAddress ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/HttpFetchException.html b/doc/javadoc/crawlercommons/fetcher/HttpFetchException.html
new file mode 100644
index 0000000..9b31771
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/HttpFetchException.html
@@ -0,0 +1,339 @@
+
+
+
+
+
+
+HttpFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class HttpFetchException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.fetcher.BaseFetchException
+ crawlercommons.fetcher.HttpFetchException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class HttpFetchException extends BaseFetchException
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+Constructor Summary
+
+
+HttpFetchException ()
+
+
+
+
+
+HttpFetchException (java.lang.String url,
+ java.lang.String msg,
+ int httpStatus,
+ org.apache.tika.metadata.Metadata httpHeaders)
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.BaseFetchException
+
+
+compareToBase , equals , getCause , getLocalizedMessage , getStackTrace , getUrl , hashCode , initCause , printStackTrace , printStackTrace , printStackTrace , readBaseFields , setStackTrace , toString , writeBaseFields
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+HttpFetchException
+
+public HttpFetchException ()
+
+
+
+
+
+HttpFetchException
+
+public HttpFetchException (java.lang.String url,
+ java.lang.String msg,
+ int httpStatus,
+ org.apache.tika.metadata.Metadata httpHeaders)
+
+
+
+
+
+
+
+
+
+getHttpStatus
+
+public int getHttpStatus ()
+
+
+
+
+
+
+
+
+getHttpHeaders
+
+public org.apache.tika.metadata.Metadata getHttpHeaders ()
+
+
+
+
+
+
+
+
+getMessage
+
+public java.lang.String getMessage ()
+
+
+Overrides: getMessage
in class BaseFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/IOFetchException.html b/doc/javadoc/crawlercommons/fetcher/IOFetchException.html
new file mode 100644
index 0000000..e2cb6ee
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/IOFetchException.html
@@ -0,0 +1,266 @@
+
+
+
+
+
+
+IOFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class IOFetchException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.fetcher.BaseFetchException
+ crawlercommons.fetcher.IOFetchException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class IOFetchException extends BaseFetchException
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.BaseFetchException
+
+
+compareToBase , equals , getCause , getLocalizedMessage , getMessage , getStackTrace , getUrl , hashCode , initCause , printStackTrace , printStackTrace , printStackTrace , readBaseFields , setStackTrace , toString , writeBaseFields
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+IOFetchException
+
+public IOFetchException ()
+
+
+
+
+
+IOFetchException
+
+public IOFetchException (java.lang.String url,
+ java.io.IOException e)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/Payload.html b/doc/javadoc/crawlercommons/fetcher/Payload.html
new file mode 100644
index 0000000..5db6f03
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/Payload.html
@@ -0,0 +1,565 @@
+
+
+
+
+
+
+Payload (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class Payload
+
+java.lang.Object
+ crawlercommons.fetcher.Payload
+
+
+All Implemented Interfaces: java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+public class Payload extends java.lang.Objectimplements java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+
+
+
+
+Nested Class Summary
+
+
+
+
+
+Nested classes/interfaces inherited from interface java.util.Map
+
+
+java.util.Map.Entry<K,V>
+
+
+
+
+
+
+
+
+
+
+Constructor Summary
+
+
+Payload ()
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ void
+clear ()
+
+
+
+
+
+
+ boolean
+containsKey (java.lang.Object key)
+
+
+
+
+
+
+ boolean
+containsValue (java.lang.Object value)
+
+
+
+
+
+
+ java.util.Set<java.util.Map.Entry<java.lang.String,java.lang.Object>>
+entrySet ()
+
+
+
+
+
+
+ boolean
+equals (java.lang.Object o)
+
+
+
+
+
+
+ java.lang.Object
+get (java.lang.Object key)
+
+
+
+
+
+
+ int
+hashCode ()
+
+
+
+
+
+
+ boolean
+isEmpty ()
+
+
+
+
+
+
+ java.util.Set<java.lang.String>
+keySet ()
+
+
+
+
+
+
+ java.lang.Object
+put (java.lang.String key,
+ java.lang.Object value)
+
+
+
+
+
+
+ void
+putAll (java.util.Map<? extends java.lang.String,? extends java.lang.Object> m)
+
+
+
+
+
+
+ java.lang.Object
+remove (java.lang.Object key)
+
+
+
+
+
+
+ int
+size ()
+
+
+
+
+
+
+ java.util.Collection<java.lang.Object>
+values ()
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+Payload
+
+public Payload ()
+
+
+
+
+
+
+
+
+
+clear
+
+public void clear ()
+
+
+Specified by: clear
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+containsKey
+
+public boolean containsKey (java.lang.Object key)
+
+
+Specified by: containsKey
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+containsValue
+
+public boolean containsValue (java.lang.Object value)
+
+
+Specified by: containsValue
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+entrySet
+
+public java.util.Set<java.util.Map.Entry<java.lang.String,java.lang.Object>> entrySet ()
+
+
+Specified by: entrySet
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+equals
+
+public boolean equals (java.lang.Object o)
+
+
+Specified by: equals
in interface java.util.Map<java.lang.String,java.lang.Object>
Overrides: equals
in class java.lang.Object
+
+
+
+
+
+
+
+
+get
+
+public java.lang.Object get (java.lang.Object key)
+
+
+Specified by: get
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+hashCode
+
+public int hashCode ()
+
+
+Specified by: hashCode
in interface java.util.Map<java.lang.String,java.lang.Object>
Overrides: hashCode
in class java.lang.Object
+
+
+
+
+
+
+
+
+isEmpty
+
+public boolean isEmpty ()
+
+
+Specified by: isEmpty
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+keySet
+
+public java.util.Set<java.lang.String> keySet ()
+
+
+Specified by: keySet
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+put
+
+public java.lang.Object put (java.lang.String key,
+ java.lang.Object value)
+
+
+Specified by: put
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+putAll
+
+public void putAll (java.util.Map<? extends java.lang.String,? extends java.lang.Object> m)
+
+
+Specified by: putAll
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+remove
+
+public java.lang.Object remove (java.lang.Object key)
+
+
+Specified by: remove
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+size
+
+public int size ()
+
+
+Specified by: size
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+values
+
+public java.util.Collection<java.lang.Object> values ()
+
+
+Specified by: values
in interface java.util.Map<java.lang.String,java.lang.Object>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.RedirectExceptionReason.html b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.RedirectExceptionReason.html
new file mode 100644
index 0000000..199b966
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.RedirectExceptionReason.html
@@ -0,0 +1,341 @@
+
+
+
+
+
+
+RedirectFetchException.RedirectExceptionReason (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Enum RedirectFetchException.RedirectExceptionReason
+
+java.lang.Object
+ java.lang.Enum<RedirectFetchException.RedirectExceptionReason >
+ crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason
+
+
+All Implemented Interfaces: java.io.Serializable, java.lang.Comparable<RedirectFetchException.RedirectExceptionReason >
+
+
+Enclosing class: RedirectFetchException
+
+
+
+public static enum RedirectFetchException.RedirectExceptionReason extends java.lang.Enum<RedirectFetchException.RedirectExceptionReason >
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Enum
+
+
+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Enum Constant Detail
+
+
+
+
+TOO_MANY_REDIRECTS
+
+public static final RedirectFetchException.RedirectExceptionReason TOO_MANY_REDIRECTS
+
+
+
+
+
+
+
+PERM_REDIRECT_DISALLOWED
+
+public static final RedirectFetchException.RedirectExceptionReason PERM_REDIRECT_DISALLOWED
+
+
+
+
+
+
+
+TEMP_REDIRECT_DISALLOWED
+
+public static final RedirectFetchException.RedirectExceptionReason TEMP_REDIRECT_DISALLOWED
+
+
+
+
+
+
+
+
+
+
+
+values
+
+public static RedirectFetchException.RedirectExceptionReason [] values ()
+
+Returns an array containing the constants of this enum type, in
+the order they are declared. This method may be used to iterate
+over the constants as follows:
+
+for (RedirectFetchException.RedirectExceptionReason c : RedirectFetchException.RedirectExceptionReason.values())
+ System.out.println(c);
+
+
+
+
+Returns: an array containing the constants of this enum type, in
+the order they are declared
+
+
+
+
+
+valueOf
+
+public static RedirectFetchException.RedirectExceptionReason valueOf (java.lang.String name)
+
+Returns the enum constant of this type with the specified name.
+The string must match exactly an identifier used to declare an
+enum constant in this type. (Extraneous whitespace characters are
+not permitted.)
+
+
+Parameters: name
- the name of the enum constant to be returned.
+Returns: the enum constant with the specified name
+ Throws:
+java.lang.IllegalArgumentException
- if this enum type has no constant
+with the specified name
+java.lang.NullPointerException
- if the argument is null
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.html b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.html
new file mode 100644
index 0000000..ef95987
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/RedirectFetchException.html
@@ -0,0 +1,332 @@
+
+
+
+
+
+
+RedirectFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class RedirectFetchException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.fetcher.BaseFetchException
+ crawlercommons.fetcher.RedirectFetchException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class RedirectFetchException extends BaseFetchException
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.BaseFetchException
+
+
+compareToBase , equals , getCause , getLocalizedMessage , getMessage , getStackTrace , getUrl , hashCode , initCause , printStackTrace , printStackTrace , printStackTrace , readBaseFields , setStackTrace , toString , writeBaseFields
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+RedirectFetchException
+
+public RedirectFetchException ()
+
+
+
+
+
+RedirectFetchException
+
+public RedirectFetchException (java.lang.String url,
+ java.lang.String redirectedUrl,
+ RedirectFetchException.RedirectExceptionReason reason)
+
+
+
+
+
+
+
+
+
+getRedirectedUrl
+
+public java.lang.String getRedirectedUrl ()
+
+
+
+
+
+
+
+
+getReason
+
+public RedirectFetchException.RedirectExceptionReason getReason ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/UrlFetchException.html b/doc/javadoc/crawlercommons/fetcher/UrlFetchException.html
new file mode 100644
index 0000000..6621651
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/UrlFetchException.html
@@ -0,0 +1,266 @@
+
+
+
+
+
+
+UrlFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+Class UrlFetchException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.fetcher.BaseFetchException
+ crawlercommons.fetcher.UrlFetchException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class UrlFetchException extends BaseFetchException
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.BaseFetchException
+
+
+compareToBase , equals , getCause , getLocalizedMessage , getMessage , getStackTrace , getUrl , hashCode , initCause , printStackTrace , printStackTrace , printStackTrace , readBaseFields , setStackTrace , toString , writeBaseFields
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, finalize, getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+UrlFetchException
+
+public UrlFetchException ()
+
+
+
+
+
+UrlFetchException
+
+public UrlFetchException (java.lang.String url,
+ java.lang.String msg)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchException.html
new file mode 100644
index 0000000..b9bb7c7
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchException.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.AbortedFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.AbortedFetchException
+
+No usage of crawlercommons.fetcher.AbortedFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchReason.html b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchReason.html
new file mode 100644
index 0000000..6b6bf57
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/AbortedFetchReason.html
@@ -0,0 +1,220 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.AbortedFetchReason (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.AbortedFetchReason
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/BadProtocolFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/BadProtocolFetchException.html
new file mode 100644
index 0000000..4e9a442
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/BadProtocolFetchException.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.BadProtocolFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.BadProtocolFetchException
+
+No usage of crawlercommons.fetcher.BadProtocolFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetchException.html
new file mode 100644
index 0000000..3d3a337
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetchException.html
@@ -0,0 +1,339 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.BaseFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.BaseFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetcher.html b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetcher.html
new file mode 100644
index 0000000..aa9f90f
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/BaseFetcher.html
@@ -0,0 +1,217 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.BaseFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.BaseFetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.ExpandedResult.html b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.ExpandedResult.html
new file mode 100644
index 0000000..5377c00
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.ExpandedResult.html
@@ -0,0 +1,181 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.EncodingUtils.ExpandedResult (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.html b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.html
new file mode 100644
index 0000000..b907d90
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/EncodingUtils.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.EncodingUtils (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.EncodingUtils
+
+No usage of crawlercommons.fetcher.EncodingUtils
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/FetchedResult.html b/doc/javadoc/crawlercommons/fetcher/class-use/FetchedResult.html
new file mode 100644
index 0000000..26da49d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/FetchedResult.html
@@ -0,0 +1,267 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.FetchedResult (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.FetchedResult
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/HttpFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/HttpFetchException.html
new file mode 100644
index 0000000..2b0bc7b
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/HttpFetchException.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.HttpFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.HttpFetchException
+
+No usage of crawlercommons.fetcher.HttpFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/IOFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/IOFetchException.html
new file mode 100644
index 0000000..3f2e2cc
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/IOFetchException.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.IOFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.IOFetchException
+
+No usage of crawlercommons.fetcher.IOFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/Payload.html b/doc/javadoc/crawlercommons/fetcher/class-use/Payload.html
new file mode 100644
index 0000000..d8e67b4
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/Payload.html
@@ -0,0 +1,299 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.Payload (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.Payload
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Constructors in crawlercommons.fetcher with parameters of type Payload
+
+
+FetchedResult (java.lang.String baseUrl,
+ java.lang.String redirectedUrl,
+ long fetchTime,
+ org.apache.tika.metadata.Metadata headers,
+ byte[] content,
+ java.lang.String contentType,
+ int responseRate,
+ Payload payload,
+ java.lang.String newBaseUrl,
+ int numRedirects,
+ java.lang.String hostAddress)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.RedirectExceptionReason.html b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.RedirectExceptionReason.html
new file mode 100644
index 0000000..b60c83a
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.RedirectExceptionReason.html
@@ -0,0 +1,213 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.html
new file mode 100644
index 0000000..35cd4c0
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/RedirectFetchException.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.RedirectFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.RedirectFetchException
+
+No usage of crawlercommons.fetcher.RedirectFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/class-use/UrlFetchException.html b/doc/javadoc/crawlercommons/fetcher/class-use/UrlFetchException.html
new file mode 100644
index 0000000..61fd4df
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/class-use/UrlFetchException.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.UrlFetchException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.UrlFetchException
+
+No usage of crawlercommons.fetcher.UrlFetchException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/file/SimpleFileFetcher.html b/doc/javadoc/crawlercommons/fetcher/file/SimpleFileFetcher.html
new file mode 100644
index 0000000..769fe39
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/file/SimpleFileFetcher.html
@@ -0,0 +1,322 @@
+
+
+
+
+
+
+SimpleFileFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher.file
+
+Class SimpleFileFetcher
+
+java.lang.Object
+ crawlercommons.fetcher.BaseFetcher
+ crawlercommons.fetcher.file.SimpleFileFetcher
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class SimpleFileFetcher extends BaseFetcher
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ void
+abort ()
+
+
+ Terminate any async request being processed.
+
+
+
+ FetchedResult
+get (java.lang.String url,
+ Payload payload)
+
+
+ Get the content stored in the resource referenced by
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SimpleFileFetcher
+
+public SimpleFileFetcher ()
+
+
+
+
+
+
+
+
+
+get
+
+public FetchedResult get (java.lang.String url,
+ Payload payload)
+ throws BaseFetchException
+
+Description copied from class: BaseFetcher
+Get the content stored in the resource referenced by
+
+
+Specified by: get
in class BaseFetcher
+
+
+
+Returns:
+ Throws:
+BaseFetchException
+
+
+
+
+
+abort
+
+public void abort ()
+
+Description copied from class: BaseFetcher
+Terminate any async request being processed.
+
+
+Specified by: abort
in class BaseFetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/file/class-use/SimpleFileFetcher.html b/doc/javadoc/crawlercommons/fetcher/file/class-use/SimpleFileFetcher.html
new file mode 100644
index 0000000..fc757e0
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/file/class-use/SimpleFileFetcher.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.file.SimpleFileFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.file.SimpleFileFetcher
+
+No usage of crawlercommons.fetcher.file.SimpleFileFetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-frame.html b/doc/javadoc/crawlercommons/fetcher/file/package-frame.html
new file mode 100644
index 0000000..74cac2d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/file/package-frame.html
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+crawlercommons.fetcher.file (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher.file
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-summary.html b/doc/javadoc/crawlercommons/fetcher/file/package-summary.html
new file mode 100644
index 0000000..2c7781f
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/file/package-summary.html
@@ -0,0 +1,157 @@
+
+
+
+
+
+
+crawlercommons.fetcher.file (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.fetcher.file
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-tree.html b/doc/javadoc/crawlercommons/fetcher/file/package-tree.html
new file mode 100644
index 0000000..fb0f400
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/file/package-tree.html
@@ -0,0 +1,156 @@
+
+
+
+
+
+
+crawlercommons.fetcher.file Class Hierarchy (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hierarchy For Package crawlercommons.fetcher.file
+
+
+
+Package Hierarchies: All Packages
+
+
+Class Hierarchy
+
+
+java.lang.Object
+crawlercommons.fetcher.BaseFetcher (implements java.io.Serializable)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/file/package-use.html b/doc/javadoc/crawlercommons/fetcher/file/package-use.html
new file mode 100644
index 0000000..e17fd25
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/file/package-use.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Package crawlercommons.fetcher.file (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Package crawlercommons.fetcher.file
+
+No usage of crawlercommons.fetcher.file
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.RedirectMode.html b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.RedirectMode.html
new file mode 100644
index 0000000..eeeeea1
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.RedirectMode.html
@@ -0,0 +1,341 @@
+
+
+
+
+
+
+BaseHttpFetcher.RedirectMode (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher.http
+
+Enum BaseHttpFetcher.RedirectMode
+
+java.lang.Object
+ java.lang.Enum<BaseHttpFetcher.RedirectMode >
+ crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode
+
+
+All Implemented Interfaces: java.io.Serializable, java.lang.Comparable<BaseHttpFetcher.RedirectMode >
+
+
+Enclosing class: BaseHttpFetcher
+
+
+
+public static enum BaseHttpFetcher.RedirectMode extends java.lang.Enum<BaseHttpFetcher.RedirectMode >
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Enum
+
+
+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Enum Constant Detail
+
+
+
+
+FOLLOW_ALL
+
+public static final BaseHttpFetcher.RedirectMode FOLLOW_ALL
+
+
+
+
+
+
+
+FOLLOW_TEMP
+
+public static final BaseHttpFetcher.RedirectMode FOLLOW_TEMP
+
+
+
+
+
+
+
+FOLLOW_NONE
+
+public static final BaseHttpFetcher.RedirectMode FOLLOW_NONE
+
+
+
+
+
+
+
+
+
+
+
+values
+
+public static BaseHttpFetcher.RedirectMode [] values ()
+
+Returns an array containing the constants of this enum type, in
+the order they are declared. This method may be used to iterate
+over the constants as follows:
+
+for (BaseHttpFetcher.RedirectMode c : BaseHttpFetcher.RedirectMode.values())
+ System.out.println(c);
+
+
+
+
+Returns: an array containing the constants of this enum type, in
+the order they are declared
+
+
+
+
+
+valueOf
+
+public static BaseHttpFetcher.RedirectMode valueOf (java.lang.String name)
+
+Returns the enum constant of this type with the specified name.
+The string must match exactly an identifier used to declare an
+enum constant in this type. (Extraneous whitespace characters are
+not permitted.)
+
+
+Parameters: name
- the name of the enum constant to be returned.
+Returns: the enum constant with the specified name
+ Throws:
+java.lang.IllegalArgumentException
- if this enum type has no constant
+with the specified name
+java.lang.NullPointerException
- if the argument is null
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.html
new file mode 100644
index 0000000..6141bd6
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/BaseHttpFetcher.html
@@ -0,0 +1,781 @@
+
+
+
+
+
+
+BaseHttpFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher.http
+
+Class BaseHttpFetcher
+
+java.lang.Object
+ crawlercommons.fetcher.BaseFetcher
+ crawlercommons.fetcher.http.BaseHttpFetcher
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+Direct Known Subclasses: SimpleHttpFetcher
+
+
+
+public abstract class BaseHttpFetcher extends BaseFetcher
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.BaseFetcher
+
+
+abort , addValidMimeType , addValidMimeTypes , get , get , getDefaultMaxContentSize , getMaxContentSize , getMimeTypeFromContentType , getValidMimeTypes , setDefaultMaxContentSize , setMaxContentSize , setValidMimeTypes
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+NO_MIN_RESPONSE_RATE
+
+public static final int NO_MIN_RESPONSE_RATE
+
+
+See Also: Constant Field Values
+
+
+
+
+NO_REDIRECTS
+
+public static final int NO_REDIRECTS
+
+
+See Also: Constant Field Values
+
+
+
+
+DEFAULT_MIN_RESPONSE_RATE
+
+public static final int DEFAULT_MIN_RESPONSE_RATE
+
+
+See Also: Constant Field Values
+
+
+
+
+DEFAULT_MAX_CONNECTIONS_PER_HOST
+
+public static final int DEFAULT_MAX_CONNECTIONS_PER_HOST
+
+
+See Also: Constant Field Values
+
+
+
+
+DEFAULT_MAX_REDIRECTS
+
+public static final int DEFAULT_MAX_REDIRECTS
+
+
+See Also: Constant Field Values
+
+
+
+
+DEFAULT_ACCEPT_LANGUAGE
+
+public static final java.lang.String DEFAULT_ACCEPT_LANGUAGE
+
+
+See Also: Constant Field Values
+
+
+
+
+DEFAULT_REDIRECT_MODE
+
+public static final BaseHttpFetcher.RedirectMode DEFAULT_REDIRECT_MODE
+
+
+
+
+
+
+
+_maxThreads
+
+protected int _maxThreads
+
+
+
+
+
+
+
+_userAgent
+
+protected UserAgent _userAgent
+
+
+
+
+
+
+
+_maxRedirects
+
+protected int _maxRedirects
+
+
+
+
+
+
+
+_maxConnectionsPerHost
+
+protected int _maxConnectionsPerHost
+
+
+
+
+
+
+
+_minResponseRate
+
+protected int _minResponseRate
+
+
+
+
+
+
+
+_acceptLanguage
+
+protected java.lang.String _acceptLanguage
+
+
+
+
+
+
+
+_redirectMode
+
+protected BaseHttpFetcher.RedirectMode _redirectMode
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+BaseHttpFetcher
+
+public BaseHttpFetcher (int maxThreads,
+ UserAgent userAgent)
+
+
+
+
+
+
+
+
+
+getMaxThreads
+
+public int getMaxThreads ()
+
+
+
+
+
+
+
+
+getUserAgent
+
+public UserAgent getUserAgent ()
+
+
+
+
+
+
+
+
+setMaxConnectionsPerHost
+
+public void setMaxConnectionsPerHost (int maxConnectionsPerHost)
+
+
+
+
+
+
+
+
+getMaxConnectionsPerHost
+
+public int getMaxConnectionsPerHost ()
+
+
+
+
+
+
+
+
+setMinResponseRate
+
+public void setMinResponseRate (int minResponseRate)
+
+
+
+
+
+
+
+
+getMinResponseRate
+
+public int getMinResponseRate ()
+
+Return the minimum response rate. If the speed at which bytes are being returned
+ from the server drops below this, the fetch of that page will be aborted.
+
+
+
+Returns: bytes/second
+
+
+
+
+
+setAcceptLanguage
+
+public void setAcceptLanguage (java.lang.String acceptLanguage)
+
+
+
+
+
+
+
+
+getAcceptLanguage
+
+public java.lang.String getAcceptLanguage ()
+
+
+
+
+
+
+
+
+setMaxRedirects
+
+public void setMaxRedirects (int maxRedirects)
+
+
+
+
+
+
+
+
+getMaxRedirects
+
+public int getMaxRedirects ()
+
+
+
+
+
+
+
+
+setRedirectMode
+
+public void setRedirectMode (BaseHttpFetcher.RedirectMode mode)
+
+
+
+
+
+
+
+
+getRedirectMode
+
+public BaseHttpFetcher.RedirectMode getRedirectMode ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/SimpleHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/SimpleHttpFetcher.html
new file mode 100644
index 0000000..2b4346f
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/SimpleHttpFetcher.html
@@ -0,0 +1,576 @@
+
+
+
+
+
+
+SimpleHttpFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher.http
+
+Class SimpleHttpFetcher
+
+java.lang.Object
+ crawlercommons.fetcher.BaseFetcher
+ crawlercommons.fetcher.http.BaseHttpFetcher
+ crawlercommons.fetcher.http.SimpleHttpFetcher
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class SimpleHttpFetcher extends BaseHttpFetcher
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+Nested Class Summary
+
+
+
+
+
+
+
+
+
+
+
+
+Fields inherited from class crawlercommons.fetcher.http.BaseHttpFetcher
+
+
+_acceptLanguage , _maxConnectionsPerHost , _maxRedirects , _maxThreads , _minResponseRate , _redirectMode , _userAgent , DEFAULT_ACCEPT_LANGUAGE , DEFAULT_MAX_CONNECTIONS_PER_HOST , DEFAULT_MAX_REDIRECTS , DEFAULT_MIN_RESPONSE_RATE , DEFAULT_REDIRECT_MODE , NO_MIN_RESPONSE_RATE , NO_REDIRECTS
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class crawlercommons.fetcher.http.BaseHttpFetcher
+
+
+getAcceptLanguage , getMaxConnectionsPerHost , getMaxRedirects , getMaxThreads , getMinResponseRate , getRedirectMode , getUserAgent , setAcceptLanguage , setMaxConnectionsPerHost , setMaxRedirects , setMinResponseRate , setRedirectMode
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SimpleHttpFetcher
+
+public SimpleHttpFetcher (UserAgent userAgent)
+
+
+
+
+
+SimpleHttpFetcher
+
+public SimpleHttpFetcher (int maxThreads,
+ UserAgent userAgent)
+
+
+
+
+
+
+
+
+
+getHttpVersion
+
+public org.apache.http.HttpVersion getHttpVersion ()
+
+
+
+
+
+
+
+
+setHttpVersion
+
+public void setHttpVersion (org.apache.http.HttpVersion httpVersion)
+
+
+
+
+
+
+
+
+getSocketTimeout
+
+public int getSocketTimeout ()
+
+
+
+
+
+
+
+
+setSocketTimeout
+
+public void setSocketTimeout (int socketTimeoutInMs)
+
+
+
+
+
+
+
+
+getConnectionTimeout
+
+public int getConnectionTimeout ()
+
+
+
+
+
+
+
+
+setConnectionTimeout
+
+public void setConnectionTimeout (int connectionTimeoutInMs)
+
+
+
+
+
+
+
+
+getMaxRetryCount
+
+public int getMaxRetryCount ()
+
+
+
+
+
+
+
+
+setMaxRetryCount
+
+public void setMaxRetryCount (int maxRetryCount)
+
+
+
+
+
+
+
+
+get
+
+public FetchedResult get (java.lang.String url,
+ Payload payload)
+ throws BaseFetchException
+
+Description copied from class: BaseFetcher
+Get the content stored in the resource referenced by
+
+
+Specified by: get
in class BaseFetcher
+
+
+
+Returns:
+ Throws:
+BaseFetchException
+
+
+
+
+
+fetch
+
+public FetchedResult fetch (java.lang.String url)
+ throws BaseFetchException
+
+
+
+Throws:
+BaseFetchException
+
+
+
+
+
+fetch
+
+public FetchedResult fetch (org.apache.http.client.methods.HttpRequestBase request,
+ java.lang.String url,
+ Payload payload)
+ throws BaseFetchException
+
+
+
+Throws:
+BaseFetchException
+
+
+
+
+
+abort
+
+public void abort ()
+
+Description copied from class: BaseFetcher
+Terminate any async request being processed.
+
+
+Specified by: abort
in class BaseFetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/UserAgent.html b/doc/javadoc/crawlercommons/fetcher/http/UserAgent.html
new file mode 100644
index 0000000..dce416b
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/UserAgent.html
@@ -0,0 +1,364 @@
+
+
+
+
+
+
+UserAgent (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher.http
+
+Class UserAgent
+
+java.lang.Object
+ crawlercommons.fetcher.http.UserAgent
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class UserAgent extends java.lang.Objectimplements java.io.Serializable
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Constructor Summary
+
+
+UserAgent (java.lang.String agentName,
+ java.lang.String emailAddress,
+ java.lang.String webAddress)
+
+
+
+
+
+UserAgent (java.lang.String agentName,
+ java.lang.String emailAddress,
+ java.lang.String webAddress,
+ java.lang.String browserVersion)
+
+
+
+
+
+UserAgent (java.lang.String agentName,
+ java.lang.String emailAddress,
+ java.lang.String webAddress,
+ java.lang.String browserVersion,
+ java.lang.String crawlerVersion)
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+DEFAULT_BROWSER_VERSION
+
+public static final java.lang.String DEFAULT_BROWSER_VERSION
+
+
+See Also: Constant Field Values
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+UserAgent
+
+public UserAgent (java.lang.String agentName,
+ java.lang.String emailAddress,
+ java.lang.String webAddress)
+
+
+
+
+
+UserAgent
+
+public UserAgent (java.lang.String agentName,
+ java.lang.String emailAddress,
+ java.lang.String webAddress,
+ java.lang.String browserVersion)
+
+
+
+
+
+UserAgent
+
+public UserAgent (java.lang.String agentName,
+ java.lang.String emailAddress,
+ java.lang.String webAddress,
+ java.lang.String browserVersion,
+ java.lang.String crawlerVersion)
+
+
+
+
+
+
+
+
+
+getAgentName
+
+public java.lang.String getAgentName ()
+
+
+
+
+
+
+
+
+
+
+
+getUserAgentString
+
+public java.lang.String getUserAgentString ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.RedirectMode.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.RedirectMode.html
new file mode 100644
index 0000000..2d71275
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.RedirectMode.html
@@ -0,0 +1,237 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.html
new file mode 100644
index 0000000..2833c3d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/BaseHttpFetcher.html
@@ -0,0 +1,244 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.BaseHttpFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.BaseHttpFetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/SimpleHttpFetcher.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/SimpleHttpFetcher.html
new file mode 100644
index 0000000..9977655
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/SimpleHttpFetcher.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.SimpleHttpFetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+No usage of crawlercommons.fetcher.http.SimpleHttpFetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/class-use/UserAgent.html b/doc/javadoc/crawlercommons/fetcher/http/class-use/UserAgent.html
new file mode 100644
index 0000000..97b0f42
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/class-use/UserAgent.html
@@ -0,0 +1,254 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.UserAgent (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.fetcher.http.UserAgent
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-frame.html b/doc/javadoc/crawlercommons/fetcher/http/package-frame.html
new file mode 100644
index 0000000..0c89c02
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/package-frame.html
@@ -0,0 +1,47 @@
+
+
+
+
+
+
+crawlercommons.fetcher.http (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher.http
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-summary.html b/doc/javadoc/crawlercommons/fetcher/http/package-summary.html
new file mode 100644
index 0000000..d2cfe75
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/package-summary.html
@@ -0,0 +1,179 @@
+
+
+
+
+
+
+crawlercommons.fetcher.http (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.fetcher.http
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-tree.html b/doc/javadoc/crawlercommons/fetcher/http/package-tree.html
new file mode 100644
index 0000000..2414eca
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/package-tree.html
@@ -0,0 +1,169 @@
+
+
+
+
+
+
+crawlercommons.fetcher.http Class Hierarchy (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hierarchy For Package crawlercommons.fetcher.http
+
+
+
+Package Hierarchies: All Packages
+
+
+Class Hierarchy
+
+
+java.lang.Object
+crawlercommons.fetcher.BaseFetcher (implements java.io.Serializable)
+
+ crawlercommons.fetcher.http.UserAgent (implements java.io.Serializable)
+
+
+
+Enum Hierarchy
+
+
+java.lang.Object
+java.lang.Enum<E> (implements java.lang.Comparable<T>, java.io.Serializable)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/http/package-use.html b/doc/javadoc/crawlercommons/fetcher/http/package-use.html
new file mode 100644
index 0000000..4041eb5
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/http/package-use.html
@@ -0,0 +1,207 @@
+
+
+
+
+
+
+Uses of Package crawlercommons.fetcher.http (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Package crawlercommons.fetcher.http
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/package-frame.html b/doc/javadoc/crawlercommons/fetcher/package-frame.html
new file mode 100644
index 0000000..55535af
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/package-frame.html
@@ -0,0 +1,76 @@
+
+
+
+
+
+
+crawlercommons.fetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.fetcher
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/package-summary.html b/doc/javadoc/crawlercommons/fetcher/package-summary.html
new file mode 100644
index 0000000..08fdd62
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/package-summary.html
@@ -0,0 +1,229 @@
+
+
+
+
+
+
+crawlercommons.fetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.fetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/package-tree.html b/doc/javadoc/crawlercommons/fetcher/package-tree.html
new file mode 100644
index 0000000..203e63e
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/package-tree.html
@@ -0,0 +1,172 @@
+
+
+
+
+
+
+crawlercommons.fetcher Class Hierarchy (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hierarchy For Package crawlercommons.fetcher
+
+
+
+Package Hierarchies: All Packages
+
+
+Class Hierarchy
+
+
+
+Enum Hierarchy
+
+
+java.lang.Object
+java.lang.Enum<E> (implements java.lang.Comparable<T>, java.io.Serializable)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/fetcher/package-use.html b/doc/javadoc/crawlercommons/fetcher/package-use.html
new file mode 100644
index 0000000..42264bb
--- /dev/null
+++ b/doc/javadoc/crawlercommons/fetcher/package-use.html
@@ -0,0 +1,274 @@
+
+
+
+
+
+
+Uses of Package crawlercommons.fetcher (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Package crawlercommons.fetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/BaseRobotRules.html b/doc/javadoc/crawlercommons/robots/BaseRobotRules.html
new file mode 100644
index 0000000..08c99c6
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/BaseRobotRules.html
@@ -0,0 +1,448 @@
+
+
+
+
+
+
+BaseRobotRules (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+Class BaseRobotRules
+
+java.lang.Object
+ crawlercommons.robots.BaseRobotRules
+
+
+Direct Known Subclasses: SimpleRobotRules
+
+
+
+public abstract class BaseRobotRules extends java.lang.Object
+
+
+
+Result from parsing a single robots.txt file - which means we
+ get a set of rules, and a crawl-delay.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+UNSET_CRAWL_DELAY
+
+public static final long UNSET_CRAWL_DELAY
+
+
+See Also: Constant Field Values
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+BaseRobotRules
+
+public BaseRobotRules ()
+
+
+
+
+
+
+
+
+
+isAllowed
+
+public abstract boolean isAllowed (java.lang.String url)
+
+
+
+
+
+
+
+
+isAllowAll
+
+public abstract boolean isAllowAll ()
+
+
+
+
+
+
+
+
+isAllowNone
+
+public abstract boolean isAllowNone ()
+
+
+
+
+
+
+
+
+getCrawlDelay
+
+public long getCrawlDelay ()
+
+
+
+
+
+
+
+
+setCrawlDelay
+
+public void setCrawlDelay (long crawlDelay)
+
+
+
+
+
+
+
+
+isDeferVisits
+
+public boolean isDeferVisits ()
+
+
+
+
+
+
+
+
+setDeferVisits
+
+public void setDeferVisits (boolean deferVisits)
+
+
+
+
+
+
+
+
+addSitemap
+
+public void addSitemap (java.lang.String sitemap)
+
+
+
+
+
+
+
+
+getSitemaps
+
+public java.util.List<java.lang.String> getSitemaps ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/BaseRobotsParser.html b/doc/javadoc/crawlercommons/robots/BaseRobotsParser.html
new file mode 100644
index 0000000..15a5b09
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/BaseRobotsParser.html
@@ -0,0 +1,302 @@
+
+
+
+
+
+
+BaseRobotsParser (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+Class BaseRobotsParser
+
+java.lang.Object
+ crawlercommons.robots.BaseRobotsParser
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+Direct Known Subclasses: SimpleRobotRulesParser
+
+
+
+public abstract class BaseRobotsParser extends java.lang.Objectimplements java.io.Serializable
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+abstract BaseRobotRules
+failedFetch (int httpStatusCode)
+
+
+ The fetch of robots.txt failed, so return rules appropriate give the
+ HTTP status code.
+
+
+
+abstract BaseRobotRules
+parseContent (java.lang.String url,
+ byte[] content,
+ java.lang.String contentType,
+ java.lang.String robotName)
+
+
+ Parse the robots.txt file in , and return rules appropriate for
+ processing paths by
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+BaseRobotsParser
+
+public BaseRobotsParser ()
+
+
+
+
+
+
+
+
+
+parseContent
+
+public abstract BaseRobotRules parseContent (java.lang.String url,
+ byte[] content,
+ java.lang.String contentType,
+ java.lang.String robotName)
+
+Parse the robots.txt file in , and return rules appropriate for
+ processing paths by
+
+
+
+
+
+Parameters: url
- URL that content was fetched from (for reporting purposes)content
- raw bytes from the site's robots.txt filecontentType
- HTTP response header (mime-type)robotName
- name of crawler, to be used when processing file contents
+ (just the name portion, w/o version or other details)
+Returns: robot rules.
+
+
+
+
+
+failedFetch
+
+public abstract BaseRobotRules failedFetch (int httpStatusCode)
+
+The fetch of robots.txt failed, so return rules appropriate give the
+ HTTP status code.
+
+
+
+
+
+Parameters: httpStatusCode
- a failure status code (NOT 2xx)
+Returns: robot rules
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/RobotUtils.html b/doc/javadoc/crawlercommons/robots/RobotUtils.html
new file mode 100644
index 0000000..3827c36
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/RobotUtils.html
@@ -0,0 +1,319 @@
+
+
+
+
+
+
+RobotUtils (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+Class RobotUtils
+
+java.lang.Object
+ crawlercommons.robots.RobotUtils
+
+
+
+public class RobotUtils extends java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+
+Constructor Summary
+
+
+RobotUtils ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+RobotUtils
+
+public RobotUtils ()
+
+
+
+
+
+
+
+
+
+createFetcher
+
+public static BaseHttpFetcher createFetcher (BaseHttpFetcher fetcher)
+
+
+
+
+
+
+
+
+createFetcher
+
+public static BaseHttpFetcher createFetcher (UserAgent userAgent,
+ int maxThreads)
+
+
+
+
+
+
+
+
+getMaxFetchTime
+
+public static long getMaxFetchTime ()
+
+
+
+
+
+
+
+
+getRobotRules
+
+public static BaseRobotRules getRobotRules (BaseHttpFetcher fetcher,
+ BaseRobotsParser parser,
+ java.net.URL robotsUrl)
+
+Externally visible, static method for use in tools and for testing.
+ Fetch the indicated robots.txt file, parse it, and generate rules.
+
+
+Parameters: fetcher
- Fetcher for downloading robots.txt filerobotsUrl
- URL to robots.txt file
+Returns: Robot rules
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRule.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRule.html
new file mode 100644
index 0000000..01c43e9
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRule.html
@@ -0,0 +1,249 @@
+
+
+
+
+
+
+SimpleRobotRules.RobotRule (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+Class SimpleRobotRules.RobotRule
+
+java.lang.Object
+ crawlercommons.robots.SimpleRobotRules.RobotRule
+
+
+Enclosing class: SimpleRobotRules
+
+
+
+protected class SimpleRobotRules.RobotRule extends java.lang.Object
+
+
+
+Single rule that maps from a path prefix to an allow flag.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SimpleRobotRules.RobotRule
+
+public SimpleRobotRules.RobotRule (java.lang.String prefix,
+ boolean allow)
+
+
+
+
+
+SimpleRobotRules.RobotRule
+
+public SimpleRobotRules.RobotRule (java.util.regex.Pattern pattern,
+ boolean allow)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRulesMode.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRulesMode.html
new file mode 100644
index 0000000..99e2b1e
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.RobotRulesMode.html
@@ -0,0 +1,341 @@
+
+
+
+
+
+
+SimpleRobotRules.RobotRulesMode (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+Enum SimpleRobotRules.RobotRulesMode
+
+java.lang.Object
+ java.lang.Enum<SimpleRobotRules.RobotRulesMode >
+ crawlercommons.robots.SimpleRobotRules.RobotRulesMode
+
+
+All Implemented Interfaces: java.io.Serializable, java.lang.Comparable<SimpleRobotRules.RobotRulesMode >
+
+
+Enclosing class: SimpleRobotRules
+
+
+
+public static enum SimpleRobotRules.RobotRulesMode extends java.lang.Enum<SimpleRobotRules.RobotRulesMode >
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Enum
+
+
+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Enum Constant Detail
+
+
+
+
+ALLOW_ALL
+
+public static final SimpleRobotRules.RobotRulesMode ALLOW_ALL
+
+
+
+
+
+
+
+ALLOW_NONE
+
+public static final SimpleRobotRules.RobotRulesMode ALLOW_NONE
+
+
+
+
+
+
+
+ALLOW_SOME
+
+public static final SimpleRobotRules.RobotRulesMode ALLOW_SOME
+
+
+
+
+
+
+
+
+
+
+
+values
+
+public static SimpleRobotRules.RobotRulesMode [] values ()
+
+Returns an array containing the constants of this enum type, in
+the order they are declared. This method may be used to iterate
+over the constants as follows:
+
+for (SimpleRobotRules.RobotRulesMode c : SimpleRobotRules.RobotRulesMode.values())
+ System.out.println(c);
+
+
+
+
+Returns: an array containing the constants of this enum type, in
+the order they are declared
+
+
+
+
+
+valueOf
+
+public static SimpleRobotRules.RobotRulesMode valueOf (java.lang.String name)
+
+Returns the enum constant of this type with the specified name.
+The string must match exactly an identifier used to declare an
+enum constant in this type. (Extraneous whitespace characters are
+not permitted.)
+
+
+Parameters: name
- the name of the enum constant to be returned.
+Returns: the enum constant with the specified name
+ Throws:
+java.lang.IllegalArgumentException
- if this enum type has no constant
+with the specified name
+java.lang.NullPointerException
- if the argument is null
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRules.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.html
new file mode 100644
index 0000000..8e6e326
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRules.html
@@ -0,0 +1,417 @@
+
+
+
+
+
+
+SimpleRobotRules (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+Class SimpleRobotRules
+
+java.lang.Object
+ crawlercommons.robots.BaseRobotRules
+ crawlercommons.robots.SimpleRobotRules
+
+
+
+public class SimpleRobotRules extends BaseRobotRules
+
+
+
+Result from parsing a single robots.txt file - which means we
+ get a set of rules, and a crawl-delay.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ void
+addRule (java.lang.String prefix,
+ boolean allow)
+
+
+
+
+
+
+ void
+clearRules ()
+
+
+
+
+
+
+ boolean
+isAllowAll ()
+
+
+ Is our ruleset set up to allow all access?
+
+
+
+ boolean
+isAllowed (java.lang.String url)
+
+
+
+
+
+
+ boolean
+isAllowNone ()
+
+
+ Is our ruleset set up to disallow all access?
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SimpleRobotRules
+
+public SimpleRobotRules ()
+
+
+
+
+
+SimpleRobotRules
+
+public SimpleRobotRules (SimpleRobotRules.RobotRulesMode mode)
+
+
+
+
+
+
+
+
+
+clearRules
+
+public void clearRules ()
+
+
+
+
+
+
+
+
+addRule
+
+public void addRule (java.lang.String prefix,
+ boolean allow)
+
+
+
+
+
+
+
+
+isAllowed
+
+public boolean isAllowed (java.lang.String url)
+
+
+Specified by: isAllowed
in class BaseRobotRules
+
+
+
+
+
+
+
+
+isAllowAll
+
+public boolean isAllowAll ()
+
+Is our ruleset set up to allow all access?
+
+
+Specified by: isAllowAll
in class BaseRobotRules
+
+
+
+Returns: true if all URLs are allowed.
+
+
+
+
+
+isAllowNone
+
+public boolean isAllowNone ()
+
+Is our ruleset set up to disallow all access?
+
+
+Specified by: isAllowNone
in class BaseRobotRules
+
+
+
+Returns: true if no URLs are allowed.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/SimpleRobotRulesParser.html b/doc/javadoc/crawlercommons/robots/SimpleRobotRulesParser.html
new file mode 100644
index 0000000..4659242
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/SimpleRobotRulesParser.html
@@ -0,0 +1,321 @@
+
+
+
+
+
+
+SimpleRobotRulesParser (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+Class SimpleRobotRulesParser
+
+java.lang.Object
+ crawlercommons.robots.BaseRobotsParser
+ crawlercommons.robots.SimpleRobotRulesParser
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class SimpleRobotRulesParser extends BaseRobotsParser
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ BaseRobotRules
+failedFetch (int httpStatusCode)
+
+
+ The fetch of robots.txt failed, so return rules appropriate give the
+ HTTP status code.
+
+
+
+ int
+getNumWarnings ()
+
+
+
+
+
+
+ BaseRobotRules
+parseContent (java.lang.String url,
+ byte[] content,
+ java.lang.String contentType,
+ java.lang.String robotName)
+
+
+ Parse the robots.txt file in , and return rules appropriate for
+ processing paths by
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SimpleRobotRulesParser
+
+public SimpleRobotRulesParser ()
+
+
+
+
+
+
+
+
+
+failedFetch
+
+public BaseRobotRules failedFetch (int httpStatusCode)
+
+Description copied from class: BaseRobotsParser
+The fetch of robots.txt failed, so return rules appropriate give the
+ HTTP status code.
+
+
+Specified by: failedFetch
in class BaseRobotsParser
+
+
+Parameters: httpStatusCode
- a failure status code (NOT 2xx)
+Returns: robot rules
+
+
+
+
+
+parseContent
+
+public BaseRobotRules parseContent (java.lang.String url,
+ byte[] content,
+ java.lang.String contentType,
+ java.lang.String robotName)
+
+Description copied from class: BaseRobotsParser
+Parse the robots.txt file in , and return rules appropriate for
+ processing paths by
+
+
+Specified by: parseContent
in class BaseRobotsParser
+
+
+Parameters: url
- URL that content was fetched from (for reporting purposes)content
- raw bytes from the site's robots.txt filecontentType
- HTTP response header (mime-type)robotName
- name of crawler, to be used when processing file contents
+ (just the name portion, w/o version or other details)
+Returns: robot rules.
+
+
+
+
+
+getNumWarnings
+
+public int getNumWarnings ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/class-use/BaseRobotRules.html b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotRules.html
new file mode 100644
index 0000000..01648e0
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotRules.html
@@ -0,0 +1,239 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.BaseRobotRules (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.BaseRobotRules
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods in crawlercommons.robots that return BaseRobotRules
+
+
+
+ BaseRobotRules
+SimpleRobotRulesParser. failedFetch (int httpStatusCode)
+
+
+
+
+
+
+abstract BaseRobotRules
+BaseRobotsParser. failedFetch (int httpStatusCode)
+
+
+ The fetch of robots.txt failed, so return rules appropriate give the
+ HTTP status code.
+
+
+
+static BaseRobotRules
+RobotUtils. getRobotRules (BaseHttpFetcher fetcher,
+ BaseRobotsParser parser,
+ java.net.URL robotsUrl)
+
+
+ Externally visible, static method for use in tools and for testing.
+
+
+
+ BaseRobotRules
+SimpleRobotRulesParser. parseContent (java.lang.String url,
+ byte[] content,
+ java.lang.String contentType,
+ java.lang.String robotName)
+
+
+
+
+
+
+abstract BaseRobotRules
+BaseRobotsParser. parseContent (java.lang.String url,
+ byte[] content,
+ java.lang.String contentType,
+ java.lang.String robotName)
+
+
+ Parse the robots.txt file in , and return rules appropriate for
+ processing paths by
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/class-use/BaseRobotsParser.html b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotsParser.html
new file mode 100644
index 0000000..992995d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/class-use/BaseRobotsParser.html
@@ -0,0 +1,198 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.BaseRobotsParser (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.BaseRobotsParser
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/class-use/RobotUtils.html b/doc/javadoc/crawlercommons/robots/class-use/RobotUtils.html
new file mode 100644
index 0000000..45a870b
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/class-use/RobotUtils.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.RobotUtils (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.RobotUtils
+
+No usage of crawlercommons.robots.RobotUtils
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRule.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRule.html
new file mode 100644
index 0000000..dd5dad8
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRule.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRules.RobotRule (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRules.RobotRule
+
+No usage of crawlercommons.robots.SimpleRobotRules.RobotRule
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRulesMode.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRulesMode.html
new file mode 100644
index 0000000..419adfe
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.RobotRulesMode.html
@@ -0,0 +1,203 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRules.RobotRulesMode (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRules.RobotRulesMode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.html
new file mode 100644
index 0000000..6355a6b
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRules.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRules (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRules
+
+No usage of crawlercommons.robots.SimpleRobotRules
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRulesParser.html b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRulesParser.html
new file mode 100644
index 0000000..bacdb12
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/class-use/SimpleRobotRulesParser.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRulesParser (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.robots.SimpleRobotRulesParser
+
+No usage of crawlercommons.robots.SimpleRobotRulesParser
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/package-frame.html b/doc/javadoc/crawlercommons/robots/package-frame.html
new file mode 100644
index 0000000..adf4f38
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/package-frame.html
@@ -0,0 +1,51 @@
+
+
+
+
+
+
+crawlercommons.robots (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.robots
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/package-summary.html b/doc/javadoc/crawlercommons/robots/package-summary.html
new file mode 100644
index 0000000..7d7abe0
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/package-summary.html
@@ -0,0 +1,189 @@
+
+
+
+
+
+
+crawlercommons.robots (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.robots
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/package-tree.html b/doc/javadoc/crawlercommons/robots/package-tree.html
new file mode 100644
index 0000000..ff1ead3
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/package-tree.html
@@ -0,0 +1,168 @@
+
+
+
+
+
+
+crawlercommons.robots Class Hierarchy (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hierarchy For Package crawlercommons.robots
+
+
+
+Package Hierarchies: All Packages
+
+
+Class Hierarchy
+
+
+
+Enum Hierarchy
+
+
+java.lang.Object
+java.lang.Enum<E> (implements java.lang.Comparable<T>, java.io.Serializable)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/robots/package-use.html b/doc/javadoc/crawlercommons/robots/package-use.html
new file mode 100644
index 0000000..0ece2eb
--- /dev/null
+++ b/doc/javadoc/crawlercommons/robots/package-use.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+
+Uses of Package crawlercommons.robots (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Package crawlercommons.robots
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.SitemapType.html b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.SitemapType.html
new file mode 100644
index 0000000..fc21bb9
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.SitemapType.html
@@ -0,0 +1,377 @@
+
+
+
+
+
+
+AbstractSiteMap.SitemapType (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Enum AbstractSiteMap.SitemapType
+
+java.lang.Object
+ java.lang.Enum<AbstractSiteMap.SitemapType >
+ crawlercommons.sitemaps.AbstractSiteMap.SitemapType
+
+
+All Implemented Interfaces: java.io.Serializable, java.lang.Comparable<AbstractSiteMap.SitemapType >
+
+
+Enclosing class: AbstractSiteMap
+
+
+
+public static enum AbstractSiteMap.SitemapType extends java.lang.Enum<AbstractSiteMap.SitemapType >
+
+
+
+Various Sitemap types
+
+
+
+
+
+
+
+
+
+
+
+
+Enum Constant Summary
+
+
+ATOM
+
+
+
+
+
+INDEX
+
+
+
+
+
+RSS
+
+
+
+
+
+TEXT
+
+
+
+
+
+XML
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Enum
+
+
+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Enum Constant Detail
+
+
+
+
+INDEX
+
+public static final AbstractSiteMap.SitemapType INDEX
+
+
+
+
+
+
+
+XML
+
+public static final AbstractSiteMap.SitemapType XML
+
+
+
+
+
+
+
+ATOM
+
+public static final AbstractSiteMap.SitemapType ATOM
+
+
+
+
+
+
+
+RSS
+
+public static final AbstractSiteMap.SitemapType RSS
+
+
+
+
+
+
+
+TEXT
+
+public static final AbstractSiteMap.SitemapType TEXT
+
+
+
+
+
+
+
+
+
+
+
+values
+
+public static AbstractSiteMap.SitemapType [] values ()
+
+Returns an array containing the constants of this enum type, in
+the order they are declared. This method may be used to iterate
+over the constants as follows:
+
+for (AbstractSiteMap.SitemapType c : AbstractSiteMap.SitemapType.values())
+ System.out.println(c);
+
+
+
+
+Returns: an array containing the constants of this enum type, in
+the order they are declared
+
+
+
+
+
+valueOf
+
+public static AbstractSiteMap.SitemapType valueOf (java.lang.String name)
+
+Returns the enum constant of this type with the specified name.
+The string must match exactly an identifier used to declare an
+enum constant in this type. (Extraneous whitespace characters are
+not permitted.)
+
+
+Parameters: name
- the name of the enum constant to be returned.
+Returns: the enum constant with the specified name
+ Throws:
+java.lang.IllegalArgumentException
- if this enum type has no constant
+with the specified name
+java.lang.NullPointerException
- if the argument is null
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.html b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.html
new file mode 100644
index 0000000..5cf3824
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/AbstractSiteMap.html
@@ -0,0 +1,438 @@
+
+
+
+
+
+
+AbstractSiteMap (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Class AbstractSiteMap
+
+java.lang.Object
+ crawlercommons.sitemaps.AbstractSiteMap
+
+
+Direct Known Subclasses: SiteMap , SiteMapIndex
+
+
+
+public abstract class AbstractSiteMap extends java.lang.Object
+
+
+
+SiteMap or SiteMapIndex
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+AbstractSiteMap
+
+public AbstractSiteMap ()
+
+
+
+
+
+
+
+
+
+getFullDateFormat
+
+public static java.text.DateFormat getFullDateFormat ()
+
+
+
+
+
+
+
+
+isIndex
+
+public boolean isIndex ()
+
+
+
+
+
+
+
+
+getUrl
+
+public java.net.URL getUrl ()
+
+
+
+Returns: the URL of the Sitemap
+
+
+
+
+
+getType
+
+public AbstractSiteMap.SitemapType getType ()
+
+
+
+Returns: the Sitemap type
+
+
+
+
+
+isProcessed
+
+public boolean isProcessed ()
+
+
+
+Returns: true if the Sitemap has been processed i.e it contains at least
+ one SiteMapURL
+
+
+
+
+
+setLastModified
+
+public void setLastModified (java.util.Date lastModified)
+
+
+Parameters: lastModified
- - the lastModified to set
+
+
+
+
+
+setLastModified
+
+public void setLastModified (java.lang.String lastModified)
+
+
+Parameters: lastModified
- - the lastModified to set
+
+
+
+
+
+getLastModified
+
+public java.util.Date getLastModified ()
+
+
+
+Returns: the lastModified date of the Sitemap
+
+
+
+
+
+convertToDate
+
+public static java.util.Date convertToDate (java.lang.String date)
+
+Convert the given date (given in an acceptable DateFormat), null if the
+ date is not in the correct format.
+
+
+Parameters: date
- - the date to be parsed
+Returns: the Date equivalent
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMap.html b/doc/javadoc/crawlercommons/sitemaps/SiteMap.html
new file mode 100644
index 0000000..be3f97d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/SiteMap.html
@@ -0,0 +1,425 @@
+
+
+
+
+
+
+SiteMap (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Class SiteMap
+
+java.lang.Object
+ crawlercommons.sitemaps.AbstractSiteMap
+ crawlercommons.sitemaps.SiteMap
+
+
+
+public class SiteMap extends AbstractSiteMap
+
+
+
+
+
+
+
+
+
+
+
+
+Nested Class Summary
+
+
+
+
+
+
+
+
+
+
+
+
+Constructor Summary
+
+
+SiteMap ()
+
+
+
+
+
+SiteMap (java.lang.String url)
+
+
+
+
+
+SiteMap (java.lang.String url,
+ java.lang.String lastModified)
+
+
+
+
+
+SiteMap (java.net.URL url)
+
+
+
+
+
+SiteMap (java.net.URL url,
+ java.util.Date lastModified)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SiteMap
+
+public SiteMap ()
+
+
+
+
+
+SiteMap
+
+public SiteMap (java.net.URL url)
+
+
+
+
+
+SiteMap
+
+public SiteMap (java.lang.String url)
+
+
+
+
+
+SiteMap
+
+public SiteMap (java.net.URL url,
+ java.util.Date lastModified)
+
+
+
+
+
+SiteMap
+
+public SiteMap (java.lang.String url,
+ java.lang.String lastModified)
+
+
+
+
+
+
+
+
+
+getSiteMapUrls
+
+public java.util.Collection<SiteMapURL > getSiteMapUrls ()
+
+
+
+Returns: the Collection of SitemapUrls in this Sitemap.
+
+
+
+
+
+toString
+
+public java.lang.String toString ()
+
+
+Overrides: toString
in class java.lang.Object
+
+
+
+
+
+
+
+
+getBaseUrl
+
+public java.lang.String getBaseUrl ()
+
+
+
+Returns: the baseUrl for this Sitemap.
+
+
+
+
+
+addSiteMapUrl
+
+public void addSiteMapUrl (SiteMapURL url)
+
+
+Parameters: url
- The SitemapUrl to be added to the Sitemap.
+
+
+
+
+
+isIndex
+
+public boolean isIndex ()
+
+
+Overrides: isIndex
in class AbstractSiteMap
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapIndex.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapIndex.html
new file mode 100644
index 0000000..33f1b4d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapIndex.html
@@ -0,0 +1,405 @@
+
+
+
+
+
+
+SiteMapIndex (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Class SiteMapIndex
+
+java.lang.Object
+ crawlercommons.sitemaps.AbstractSiteMap
+ crawlercommons.sitemaps.SiteMapIndex
+
+
+
+public class SiteMapIndex extends AbstractSiteMap
+
+
+
+
+
+
+
+
+
+
+
+
+Nested Class Summary
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SiteMapIndex
+
+public SiteMapIndex ()
+
+
+
+
+
+SiteMapIndex
+
+public SiteMapIndex (java.net.URL url)
+
+
+
+
+
+
+
+
+
+getSitemaps
+
+public java.util.Collection<AbstractSiteMap > getSitemaps ()
+
+
+
+Returns: a Collection of Sitemaps in this Sitemap Index.
+
+
+
+
+
+getSitemap
+
+public AbstractSiteMap getSitemap (java.net.URL url)
+
+Returns the Sitemap that has the given URL. Returns null if the URL
+ cannot be found.
+
+
+Parameters: url
- - The Sitemap's URL
+Returns: SiteMap corresponding to the URL or null
+
+
+
+
+
+hasUnprocessedSitemap
+
+public boolean hasUnprocessedSitemap ()
+
+
+
+Returns: true if there are Sitemaps in this index that have not been
+ processed yet, false otherwise.
+
+
+
+
+
+nextUnprocessedSitemap
+
+public AbstractSiteMap nextUnprocessedSitemap ()
+
+
+
+Returns: an unprocessed Sitemap or null if no unprocessed Sitemaps could
+ be found.
+
+
+
+
+
+toString
+
+public java.lang.String toString ()
+
+
+Overrides: toString
in class java.lang.Object
+
+
+
+
+
+
+
+
+isIndex
+
+public boolean isIndex ()
+
+
+Overrides: isIndex
in class AbstractSiteMap
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapParser.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapParser.html
new file mode 100644
index 0000000..efe3e30
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapParser.html
@@ -0,0 +1,352 @@
+
+
+
+
+
+
+SiteMapParser (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Class SiteMapParser
+
+java.lang.Object
+ crawlercommons.sitemaps.SiteMapParser
+
+
+
+public class SiteMapParser extends java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+Field Summary
+
+
+
+static org.slf4j.Logger
+LOG
+
+
+
+
+
+
+static int
+MAX_BYTES_ALLOWED
+
+
+ Sitemap docs must be limited to 10MB (10,485,760 bytes)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ AbstractSiteMap
+parseSiteMap (java.lang.String contentType,
+ byte[] content,
+ AbstractSiteMap sitemap)
+
+
+ Returned a processed copy of an unprocessed sitemap object, i.e.
+
+
+
+ AbstractSiteMap
+parseSiteMap (java.lang.String contentType,
+ byte[] content,
+ java.net.URL url)
+
+
+ Returns a SiteMap or SiteMapIndex given a content type, byte content and
+ the URL of a sitemap
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+LOG
+
+public static final org.slf4j.Logger LOG
+
+
+
+
+
+
+
+MAX_BYTES_ALLOWED
+
+public static int MAX_BYTES_ALLOWED
+
+Sitemap docs must be limited to 10MB (10,485,760 bytes)
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SiteMapParser
+
+public SiteMapParser ()
+
+
+
+
+
+
+
+
+
+parseSiteMap
+
+public AbstractSiteMap parseSiteMap (java.lang.String contentType,
+ byte[] content,
+ AbstractSiteMap sitemap)
+ throws UnknownFormatException ,
+ java.io.IOException
+
+Returned a processed copy of an unprocessed sitemap object, i.e. transfer the value of
+ getLastModified and sets the original sitemap to processed.
+
+
+
+Throws:
+UnknownFormatException
+java.io.IOException
+
+
+
+
+
+parseSiteMap
+
+public AbstractSiteMap parseSiteMap (java.lang.String contentType,
+ byte[] content,
+ java.net.URL url)
+ throws UnknownFormatException ,
+ java.io.IOException
+
+Returns a SiteMap or SiteMapIndex given a content type, byte content and
+ the URL of a sitemap
+
+
+
+Throws:
+UnknownFormatException
+java.io.IOException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.ChangeFrequency.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.ChangeFrequency.html
new file mode 100644
index 0000000..40aa22d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.ChangeFrequency.html
@@ -0,0 +1,409 @@
+
+
+
+
+
+
+SiteMapURL.ChangeFrequency (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Enum SiteMapURL.ChangeFrequency
+
+java.lang.Object
+ java.lang.Enum<SiteMapURL.ChangeFrequency >
+ crawlercommons.sitemaps.SiteMapURL.ChangeFrequency
+
+
+All Implemented Interfaces: java.io.Serializable, java.lang.Comparable<SiteMapURL.ChangeFrequency >
+
+
+Enclosing class: SiteMapURL
+
+
+
+public static enum SiteMapURL.ChangeFrequency extends java.lang.Enum<SiteMapURL.ChangeFrequency >
+
+
+
+Allowed change frequencies
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Enum
+
+
+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+getClass, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Enum Constant Detail
+
+
+
+
+ALWAYS
+
+public static final SiteMapURL.ChangeFrequency ALWAYS
+
+
+
+
+
+
+
+HOURLY
+
+public static final SiteMapURL.ChangeFrequency HOURLY
+
+
+
+
+
+
+
+DAILY
+
+public static final SiteMapURL.ChangeFrequency DAILY
+
+
+
+
+
+
+
+WEEKLY
+
+public static final SiteMapURL.ChangeFrequency WEEKLY
+
+
+
+
+
+
+
+MONTHLY
+
+public static final SiteMapURL.ChangeFrequency MONTHLY
+
+
+
+
+
+
+
+YEARLY
+
+public static final SiteMapURL.ChangeFrequency YEARLY
+
+
+
+
+
+
+
+NEVER
+
+public static final SiteMapURL.ChangeFrequency NEVER
+
+
+
+
+
+
+
+
+
+
+
+values
+
+public static SiteMapURL.ChangeFrequency [] values ()
+
+Returns an array containing the constants of this enum type, in
+the order they are declared. This method may be used to iterate
+over the constants as follows:
+
+for (SiteMapURL.ChangeFrequency c : SiteMapURL.ChangeFrequency.values())
+ System.out.println(c);
+
+
+
+
+Returns: an array containing the constants of this enum type, in
+the order they are declared
+
+
+
+
+
+valueOf
+
+public static SiteMapURL.ChangeFrequency valueOf (java.lang.String name)
+
+Returns the enum constant of this type with the specified name.
+The string must match exactly an identifier used to declare an
+enum constant in this type. (Extraneous whitespace characters are
+not permitted.)
+
+
+Parameters: name
- the name of the enum constant to be returned.
+Returns: the enum constant with the specified name
+ Throws:
+java.lang.IllegalArgumentException
- if this enum type has no constant
+with the specified name
+java.lang.NullPointerException
- if the argument is null
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.html b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.html
new file mode 100644
index 0000000..5b0bd1f
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/SiteMapURL.html
@@ -0,0 +1,594 @@
+
+
+
+
+
+
+SiteMapURL (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Class SiteMapURL
+
+java.lang.Object
+ crawlercommons.sitemaps.SiteMapURL
+
+
+
+public class SiteMapURL extends java.lang.Object
+
+
+
+The SitemapUrl class represents a URL found in a Sitemap.
+
+
+
+
+Author:
+ fmccown
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Constructor Summary
+
+
+SiteMapURL (java.lang.String url)
+
+
+
+
+
+SiteMapURL (java.lang.String url,
+ java.lang.String lastModified,
+ java.lang.String changeFreq,
+ java.lang.String priority)
+
+
+
+
+
+SiteMapURL (java.net.URL url)
+
+
+
+
+
+SiteMapURL (java.net.URL url,
+ java.util.Date lastModified,
+ SiteMapURL.ChangeFrequency changeFreq,
+ double priority)
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ SiteMapURL.ChangeFrequency
+getChangeFrequency ()
+
+
+ Return the URL's change frequency
+
+
+
+ java.util.Date
+getLastModified ()
+
+
+ Return when this URL was last modified.
+
+
+
+ double
+getPriority ()
+
+
+ Return this URL's priority (a value between [0.0 - 1.0]).
+
+
+
+ java.net.URL
+getUrl ()
+
+
+ Return the URL.
+
+
+
+ void
+setChangeFrequency (SiteMapURL.ChangeFrequency changeFreq)
+
+
+ Set the URL's change frequency
+
+
+
+ void
+setChangeFrequency (java.lang.String changeFreq)
+
+
+ Set the URL's change frequency
+
+
+
+ void
+setLastModified (java.util.Date lastModified)
+
+
+ Set when this URL was last modified.
+
+
+
+ void
+setLastModified (java.lang.String lastModified)
+
+
+ Set when this URL was last modified.
+
+
+
+ void
+setPriority (double priority)
+
+
+ Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the
+ given priority is out of range).
+
+
+
+ void
+setPriority (java.lang.String priority)
+
+
+ Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the
+ given priority is out of range).
+
+
+
+ void
+setUrl (java.lang.String url)
+
+
+ Set the URL.
+
+
+
+ void
+setUrl (java.net.URL url)
+
+
+ Set the URL.
+
+
+
+ java.lang.String
+toString ()
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+SiteMapURL
+
+public SiteMapURL (java.lang.String url)
+
+
+
+
+
+SiteMapURL
+
+public SiteMapURL (java.net.URL url)
+
+
+
+
+
+SiteMapURL
+
+public SiteMapURL (java.lang.String url,
+ java.lang.String lastModified,
+ java.lang.String changeFreq,
+ java.lang.String priority)
+
+
+
+
+
+SiteMapURL
+
+public SiteMapURL (java.net.URL url,
+ java.util.Date lastModified,
+ SiteMapURL.ChangeFrequency changeFreq,
+ double priority)
+
+
+
+
+
+
+
+
+
+getUrl
+
+public java.net.URL getUrl ()
+
+Return the URL.
+
+
+
+Returns: URL
+
+
+
+
+
+setUrl
+
+public void setUrl (java.net.URL url)
+
+Set the URL.
+
+
+Parameters: url
-
+
+
+
+
+
+setUrl
+
+public void setUrl (java.lang.String url)
+
+Set the URL.
+
+
+Parameters: url
-
+
+
+
+
+
+getLastModified
+
+public java.util.Date getLastModified ()
+
+Return when this URL was last modified.
+
+
+
+Returns: last modified date
+
+
+
+
+
+setLastModified
+
+public void setLastModified (java.lang.String lastModified)
+
+Set when this URL was last modified.
+
+
+Parameters: lastModified
-
+
+
+
+
+
+setLastModified
+
+public void setLastModified (java.util.Date lastModified)
+
+Set when this URL was last modified.
+
+
+Parameters: lastModified
-
+
+
+
+
+
+getPriority
+
+public double getPriority ()
+
+Return this URL's priority (a value between [0.0 - 1.0]).
+
+
+
+Returns: URL's priority (a value between [0.0 - 1.0])
+
+
+
+
+
+setPriority
+
+public void setPriority (double priority)
+
+Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the
+ given priority is out of range).
+
+
+Parameters: priority
-
+
+
+
+
+
+setPriority
+
+public void setPriority (java.lang.String priority)
+
+Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the
+ given priority is out of range).
+
+
+Parameters: priority
-
+
+
+
+
+
+getChangeFrequency
+
+public SiteMapURL.ChangeFrequency getChangeFrequency ()
+
+Return the URL's change frequency
+
+
+
+Returns: the URL's change frequency
+
+
+
+
+
+setChangeFrequency
+
+public void setChangeFrequency (SiteMapURL.ChangeFrequency changeFreq)
+
+Set the URL's change frequency
+
+
+Parameters: changeFreq
-
+
+
+
+
+
+setChangeFrequency
+
+public void setChangeFrequency (java.lang.String changeFreq)
+
+Set the URL's change frequency
+
+
+Parameters: changeFreq
-
+
+
+
+
+
+toString
+
+public java.lang.String toString ()
+
+
+Overrides: toString
in class java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/UnknownFormatException.html b/doc/javadoc/crawlercommons/sitemaps/UnknownFormatException.html
new file mode 100644
index 0000000..db79670
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/UnknownFormatException.html
@@ -0,0 +1,282 @@
+
+
+
+
+
+
+UnknownFormatException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+Class UnknownFormatException
+
+java.lang.Object
+ java.lang.Throwable
+ java.lang.Exception
+ crawlercommons.sitemaps.UnknownFormatException
+
+
+All Implemented Interfaces: java.io.Serializable
+
+
+
+public class UnknownFormatException extends java.lang.Exception
+
+
+
+
+See Also: Serialized Form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ java.lang.String
+getError ()
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Throwable
+
+
+fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+UnknownFormatException
+
+public UnknownFormatException ()
+
+
+
+
+
+UnknownFormatException
+
+public UnknownFormatException (java.lang.String err)
+
+
+
+
+
+
+
+
+
+getError
+
+public java.lang.String getError ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.SitemapType.html b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.SitemapType.html
new file mode 100644
index 0000000..4db7d87
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.SitemapType.html
@@ -0,0 +1,197 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.AbstractSiteMap.SitemapType (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.AbstractSiteMap.SitemapType
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.html b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.html
new file mode 100644
index 0000000..928aeaf
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/AbstractSiteMap.html
@@ -0,0 +1,267 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.AbstractSiteMap (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.AbstractSiteMap
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMap.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMap.html
new file mode 100644
index 0000000..f457601
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMap.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMap (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMap
+
+No usage of crawlercommons.sitemaps.SiteMap
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapIndex.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapIndex.html
new file mode 100644
index 0000000..9d07cc3
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapIndex.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapIndex (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapIndex
+
+No usage of crawlercommons.sitemaps.SiteMapIndex
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapParser.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapParser.html
new file mode 100644
index 0000000..3d99635
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapParser.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapParser (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapParser
+
+No usage of crawlercommons.sitemaps.SiteMapParser
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.ChangeFrequency.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.ChangeFrequency.html
new file mode 100644
index 0000000..f13f2fe
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.ChangeFrequency.html
@@ -0,0 +1,230 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapURL.ChangeFrequency (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapURL.ChangeFrequency
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.html b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.html
new file mode 100644
index 0000000..691739e
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/SiteMapURL.html
@@ -0,0 +1,196 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapURL (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.SiteMapURL
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/class-use/UnknownFormatException.html b/doc/javadoc/crawlercommons/sitemaps/class-use/UnknownFormatException.html
new file mode 100644
index 0000000..a2d5f2c
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/class-use/UnknownFormatException.html
@@ -0,0 +1,193 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.UnknownFormatException (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.sitemaps.UnknownFormatException
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/package-frame.html b/doc/javadoc/crawlercommons/sitemaps/package-frame.html
new file mode 100644
index 0000000..284935a
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/package-frame.html
@@ -0,0 +1,64 @@
+
+
+
+
+
+
+crawlercommons.sitemaps (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.sitemaps
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/package-summary.html b/doc/javadoc/crawlercommons/sitemaps/package-summary.html
new file mode 100644
index 0000000..1cc290e
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/package-summary.html
@@ -0,0 +1,205 @@
+
+
+
+
+
+
+crawlercommons.sitemaps (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.sitemaps
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/package-tree.html b/doc/javadoc/crawlercommons/sitemaps/package-tree.html
new file mode 100644
index 0000000..36c1fb7
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/package-tree.html
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+crawlercommons.sitemaps Class Hierarchy (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hierarchy For Package crawlercommons.sitemaps
+
+
+
+Package Hierarchies: All Packages
+
+
+Class Hierarchy
+
+
+
+Enum Hierarchy
+
+
+java.lang.Object
+java.lang.Enum<E> (implements java.lang.Comparable<T>, java.io.Serializable)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/sitemaps/package-use.html b/doc/javadoc/crawlercommons/sitemaps/package-use.html
new file mode 100644
index 0000000..1a4f802
--- /dev/null
+++ b/doc/javadoc/crawlercommons/sitemaps/package-use.html
@@ -0,0 +1,194 @@
+
+
+
+
+
+
+Uses of Package crawlercommons.sitemaps (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Package crawlercommons.sitemaps
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/EffectiveTldFinder.EffectiveTLD.html b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.EffectiveTLD.html
new file mode 100644
index 0000000..3b9f873
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.EffectiveTLD.html
@@ -0,0 +1,315 @@
+
+
+
+
+
+
+EffectiveTldFinder.EffectiveTLD (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.url
+
+Class EffectiveTldFinder.EffectiveTLD
+
+java.lang.Object
+ crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+
+Enclosing class: EffectiveTldFinder
+
+
+
+public static class EffectiveTldFinder.EffectiveTLD extends java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+ java.lang.String
+getDomain ()
+
+
+
+
+
+
+ boolean
+isException ()
+
+
+
+
+
+
+ boolean
+isWild ()
+
+
+
+
+
+
+ java.lang.String
+toString ()
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+EffectiveTldFinder.EffectiveTLD
+
+public EffectiveTldFinder.EffectiveTLD (java.lang.String line)
+
+
+
+
+
+
+
+
+
+getDomain
+
+public java.lang.String getDomain ()
+
+
+
+
+
+
+
+
+isWild
+
+public boolean isWild ()
+
+
+
+
+
+
+
+
+isException
+
+public boolean isException ()
+
+
+
+
+
+
+
+
+toString
+
+public java.lang.String toString ()
+
+
+Overrides: toString
in class java.lang.Object
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/EffectiveTldFinder.html b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.html
new file mode 100644
index 0000000..650cd62
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/EffectiveTldFinder.html
@@ -0,0 +1,478 @@
+
+
+
+
+
+
+EffectiveTldFinder (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.url
+
+Class EffectiveTldFinder
+
+java.lang.Object
+ crawlercommons.url.EffectiveTldFinder
+
+
+
+public class EffectiveTldFinder extends java.lang.Object
+
+
+
+Given a URL's hostname, there are determining the actual domain requires
+ knowledge of the various domain registrars and their assignment policies.
+ The best publicly available knowledge of this is maintained by the Mozilla
+ developers; this class uses their data file format. For more information, see
+
+
+ This class just needs "effective_tld_names.dat" in the classpath. If you want
+ to configure it with other data, call EffectiveTldFinder.getInstance.initialize(is)
+ and have at it.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Field Summary
+
+
+
+static java.lang.String
+COMMENT
+
+
+
+
+
+
+static char
+DOT
+
+
+
+
+
+
+static java.lang.String
+DOT_REGEX
+
+
+
+
+
+
+static java.lang.String
+ETLD_DATA
+
+
+
+
+
+
+static java.lang.String
+EXCEPTION
+
+
+
+
+
+
+static java.lang.String
+WILD_CARD
+
+
+
+
+
+
+
+
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+ETLD_DATA
+
+public static final java.lang.String ETLD_DATA
+
+
+See Also: Constant Field Values
+
+
+
+
+COMMENT
+
+public static final java.lang.String COMMENT
+
+
+See Also: Constant Field Values
+
+
+
+
+DOT_REGEX
+
+public static final java.lang.String DOT_REGEX
+
+
+See Also: Constant Field Values
+
+
+
+
+EXCEPTION
+
+public static final java.lang.String EXCEPTION
+
+
+See Also: Constant Field Values
+
+
+
+
+WILD_CARD
+
+public static final java.lang.String WILD_CARD
+
+
+See Also: Constant Field Values
+
+
+
+
+DOT
+
+public static final char DOT
+
+
+See Also: Constant Field Values
+
+
+
+
+
+
+
+
+getInstance
+
+public static EffectiveTldFinder getInstance ()
+
+
+
+
+
+
+
+
+initialize
+
+public boolean initialize (java.io.InputStream effective_tld_data_stream)
+
+
+
+
+
+
+
+
+getEffectiveTLDs
+
+public static java.util.Map<java.lang.String,EffectiveTldFinder.EffectiveTLD > getEffectiveTLDs ()
+
+
+
+
+
+
+
+
+getEffectiveTLD
+
+public static EffectiveTldFinder.EffectiveTLD getEffectiveTLD (java.lang.String hostname)
+
+
+Parameters: hostname
-
+Returns: the Effective TLD
+
+
+
+
+
+getAssignedDomain
+
+public static java.lang.String getAssignedDomain (java.lang.String hostname)
+
+This method uses the effective TLD to determine which component of
+ a FQDN is the NIC-assigned domain name.
+
+
+Parameters: hostname
-
+Returns: the NIC-assigned domain name
+
+
+
+
+
+isConfigured
+
+public boolean isConfigured ()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/PaidLevelDomain.html b/doc/javadoc/crawlercommons/url/PaidLevelDomain.html
new file mode 100644
index 0000000..8887ea0
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/PaidLevelDomain.html
@@ -0,0 +1,282 @@
+
+
+
+
+
+
+PaidLevelDomain (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.url
+
+Class PaidLevelDomain
+
+java.lang.Object
+ crawlercommons.url.PaidLevelDomain
+
+
+
+public class PaidLevelDomain extends java.lang.Object
+
+
+
+Routines to extract the PLD (paid-level domain, as per the IRLbot paper) from a hostname or URL.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Method Summary
+
+
+
+static java.lang.String
+getPLD (java.lang.String hostname)
+
+
+ Extract the PLD (paid-level domain) from the hostname.
+
+
+
+static java.lang.String
+getPLD (java.net.URL url)
+
+
+ Extract the PLD (paid-level domain) from the URL.
+
+
+
+
+
+Methods inherited from class java.lang.Object
+
+
+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+
+
+
+
+
+
+
+
+
+
+
+Constructor Detail
+
+
+
+
+PaidLevelDomain
+
+public PaidLevelDomain ()
+
+
+
+
+
+
+
+
+
+getPLD
+
+public static java.lang.String getPLD (java.lang.String hostname)
+
+Extract the PLD (paid-level domain) from the hostname. If the format isn't recognized,
+ the original hostname is returned.
+
+
+Parameters: hostname
- - hostname from URL, e.g. www.domain.com.it
+Returns: - PLD, e.g. domain.com.it
+
+
+
+
+
+getPLD
+
+public static java.lang.String getPLD (java.net.URL url)
+
+Extract the PLD (paid-level domain) from the URL.
+
+
+Parameters: url
- - Valid URL, e.g. http://www.domain.com.it
+Returns: - PLD e.g. domain.com.it
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.EffectiveTLD.html b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.EffectiveTLD.html
new file mode 100644
index 0000000..7fdb951
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.EffectiveTLD.html
@@ -0,0 +1,196 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.url.EffectiveTldFinder.EffectiveTLD (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.html b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.html
new file mode 100644
index 0000000..65addb1
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/class-use/EffectiveTldFinder.html
@@ -0,0 +1,180 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.url.EffectiveTldFinder (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.url.EffectiveTldFinder
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/class-use/PaidLevelDomain.html b/doc/javadoc/crawlercommons/url/class-use/PaidLevelDomain.html
new file mode 100644
index 0000000..84a42b5
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/class-use/PaidLevelDomain.html
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+Uses of Class crawlercommons.url.PaidLevelDomain (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Class crawlercommons.url.PaidLevelDomain
+
+No usage of crawlercommons.url.PaidLevelDomain
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/package-frame.html b/doc/javadoc/crawlercommons/url/package-frame.html
new file mode 100644
index 0000000..3722d2d
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/package-frame.html
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+crawlercommons.url (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons.url
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/package-summary.html b/doc/javadoc/crawlercommons/url/package-summary.html
new file mode 100644
index 0000000..92a08b2
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/package-summary.html
@@ -0,0 +1,166 @@
+
+
+
+
+
+
+crawlercommons.url (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.url
+
+
+
+
+
+Class Summary
+
+
+EffectiveTldFinder
+Given a URL's hostname, there are determining the actual domain requires
+ knowledge of the various domain registrars and their assignment policies.
+
+
+EffectiveTldFinder.EffectiveTLD
+
+
+
+PaidLevelDomain
+Routines to extract the PLD (paid-level domain, as per the IRLbot paper) from a hostname or URL.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/package-tree.html b/doc/javadoc/crawlercommons/url/package-tree.html
new file mode 100644
index 0000000..e75ed47
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/package-tree.html
@@ -0,0 +1,153 @@
+
+
+
+
+
+
+crawlercommons.url Class Hierarchy (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hierarchy For Package crawlercommons.url
+
+
+
+Package Hierarchies: All Packages
+
+
+Class Hierarchy
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/crawlercommons/url/package-use.html b/doc/javadoc/crawlercommons/url/package-use.html
new file mode 100644
index 0000000..1332d72
--- /dev/null
+++ b/doc/javadoc/crawlercommons/url/package-use.html
@@ -0,0 +1,177 @@
+
+
+
+
+
+
+Uses of Package crawlercommons.url (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Uses of Package crawlercommons.url
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/deprecated-list.html b/doc/javadoc/deprecated-list.html
new file mode 100644
index 0000000..4631eaf
--- /dev/null
+++ b/doc/javadoc/deprecated-list.html
@@ -0,0 +1,146 @@
+
+
+
+
+
+
+Deprecated List (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deprecated API
+
+
+Contents
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/help-doc.html b/doc/javadoc/help-doc.html
new file mode 100644
index 0000000..3175885
--- /dev/null
+++ b/doc/javadoc/help-doc.html
@@ -0,0 +1,223 @@
+
+
+
+
+
+
+API Help (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+How This API Document Is Organized
+
+This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+Overview
+
+
+
+The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.
+
+Package
+
+
+
+Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:
+Interfaces (italic) Classes Enums Exceptions Errors Annotation Types
+
+
+Class/Interface
+
+
+
+Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:
+Class inheritance diagram Direct Subclasses All Known Subinterfaces All Known Implementing Classes Class/interface declaration Class/interface description
+
+
Nested Class Summary Field Summary Constructor Summary Method Summary
+
+
Field Detail Constructor Detail Method Detail
+Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
+
+
+Annotation Type
+
+
+
+Each annotation type has its own separate page with the following sections:
+Annotation Type declaration Annotation Type description Required Element Summary Optional Element Summary Element Detail
+
+
+
+Enum
+
+
+
+Each enum has its own separate page with the following sections:
+Enum declaration Enum description Enum Constant Summary Enum Constant Detail
+
+
+Use
+
+Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.
+
+Tree (Class Hierarchy)
+
+There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object
. The interfaces do not inherit from java.lang.Object
.
+When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages. When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
+
+
+Deprecated API
+
+The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
+
+Index
+
+The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
+
+Prev/Next
+These links take you to the next or previous class, interface, package, or related page.
+Frames/No Frames
+These links show and hide the HTML frames. All pages are available with or without frames.
+
+
+Serialized Form
+Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.
+
+
+Constant Field Values
+The Constant Field Values page lists the static final fields and their values.
+
+
+
+This help file applies to API documentation generated using the standard doclet.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/index-all.html b/doc/javadoc/index-all.html
new file mode 100644
index 0000000..9f52bab
--- /dev/null
+++ b/doc/javadoc/index-all.html
@@ -0,0 +1,1047 @@
+
+
+
+
+
+
+Index (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+A B C D E F G H I K L M N P R S T U V W _
+
+A
+
+abort() -
+Method in class crawlercommons.fetcher.BaseFetcher
+Terminate any async request being processed.
+ abort() -
+Method in class crawlercommons.fetcher.file.SimpleFileFetcher
+
+ abort() -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ AbortedFetchException - Exception in crawlercommons.fetcher AbortedFetchException() -
+Constructor for exception crawlercommons.fetcher.AbortedFetchException
+
+ AbortedFetchException(String, AbortedFetchReason) -
+Constructor for exception crawlercommons.fetcher.AbortedFetchException
+
+ AbortedFetchException(String, String, AbortedFetchReason) -
+Constructor for exception crawlercommons.fetcher.AbortedFetchException
+
+ AbortedFetchReason - Enum in crawlercommons.fetcher AbstractSiteMap - Class in crawlercommons.sitemaps SiteMap or SiteMapIndex AbstractSiteMap() -
+Constructor for class crawlercommons.sitemaps.AbstractSiteMap
+
+ AbstractSiteMap.SitemapType - Enum in crawlercommons.sitemaps Various Sitemap types addRule(String, boolean) -
+Method in class crawlercommons.robots.SimpleRobotRules
+
+ addSitemap(String) -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ addSiteMapUrl(SiteMapURL) -
+Method in class crawlercommons.sitemaps.SiteMap
+
+ addValidMimeType(String) -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+ addValidMimeTypes(Set<String>) -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+
+
+
+B
+
+BadProtocolFetchException - Exception in crawlercommons.fetcher BadProtocolFetchException() -
+Constructor for exception crawlercommons.fetcher.BadProtocolFetchException
+
+ BadProtocolFetchException(String) -
+Constructor for exception crawlercommons.fetcher.BadProtocolFetchException
+
+ BaseFetcher - Class in crawlercommons.fetcher BaseFetcher() -
+Constructor for class crawlercommons.fetcher.BaseFetcher
+
+ BaseFetchException - Exception in crawlercommons.fetcher BaseFetchException() -
+Constructor for exception crawlercommons.fetcher.BaseFetchException
+
+ BaseFetchException(String) -
+Constructor for exception crawlercommons.fetcher.BaseFetchException
+
+ BaseFetchException(String, String) -
+Constructor for exception crawlercommons.fetcher.BaseFetchException
+
+ BaseFetchException(String, Exception) -
+Constructor for exception crawlercommons.fetcher.BaseFetchException
+
+ BaseFetchException(String, String, Exception) -
+Constructor for exception crawlercommons.fetcher.BaseFetchException
+
+ BaseHttpFetcher - Class in crawlercommons.fetcher.http BaseHttpFetcher(int, UserAgent) -
+Constructor for class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ BaseHttpFetcher.RedirectMode - Enum in crawlercommons.fetcher.http BaseRobotRules - Class in crawlercommons.robots Result from parsing a single robots.txt file - which means we
+ get a set of rules, and a crawl-delay. BaseRobotRules() -
+Constructor for class crawlercommons.robots.BaseRobotRules
+
+ BaseRobotsParser - Class in crawlercommons.robots BaseRobotsParser() -
+Constructor for class crawlercommons.robots.BaseRobotsParser
+
+
+
+
+C
+
+clear() -
+Method in class crawlercommons.fetcher.Payload
+
+ clearRules() -
+Method in class crawlercommons.robots.SimpleRobotRules
+
+ COMMENT -
+Static variable in class crawlercommons.url.EffectiveTldFinder
+
+ compareToBase(BaseFetchException) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ containsKey(Object) -
+Method in class crawlercommons.fetcher.Payload
+
+ containsValue(Object) -
+Method in class crawlercommons.fetcher.Payload
+
+ convertToDate(String) -
+Static method in class crawlercommons.sitemaps.AbstractSiteMap
+Convert the given date (given in an acceptable DateFormat), null if the
+ date is not in the correct format.
+ crawlercommons.fetcher - package crawlercommons.fetcher crawlercommons.fetcher.file - package crawlercommons.fetcher.file crawlercommons.fetcher.http - package crawlercommons.fetcher.http crawlercommons.robots - package crawlercommons.robots crawlercommons.sitemaps - package crawlercommons.sitemaps crawlercommons.url - package crawlercommons.url createFetcher(BaseHttpFetcher) -
+Static method in class crawlercommons.robots.RobotUtils
+
+ createFetcher(UserAgent, int) -
+Static method in class crawlercommons.robots.RobotUtils
+
+
+
+
+D
+
+DEFAULT_ACCEPT_LANGUAGE -
+Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ DEFAULT_BROWSER_VERSION -
+Static variable in class crawlercommons.fetcher.http.UserAgent
+
+ DEFAULT_MAX_CONNECTIONS_PER_HOST -
+Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ DEFAULT_MAX_CONTENT_SIZE -
+Static variable in class crawlercommons.fetcher.BaseFetcher
+
+ DEFAULT_MAX_REDIRECTS -
+Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ DEFAULT_MIN_RESPONSE_RATE -
+Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ DEFAULT_REDIRECT_MODE -
+Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ DOT -
+Static variable in class crawlercommons.url.EffectiveTldFinder
+
+ DOT_REGEX -
+Static variable in class crawlercommons.url.EffectiveTldFinder
+
+
+
+
+E
+
+EffectiveTldFinder - Class in crawlercommons.url Given a URL's hostname, there are determining the actual domain requires
+ knowledge of the various domain registrars and their assignment policies. EffectiveTldFinder.EffectiveTLD - Class in crawlercommons.url EffectiveTldFinder.EffectiveTLD(String) -
+Constructor for class crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+ EncodingUtils - Class in crawlercommons.fetcher EncodingUtils() -
+Constructor for class crawlercommons.fetcher.EncodingUtils
+
+ EncodingUtils.ExpandedResult - Class in crawlercommons.fetcher EncodingUtils.ExpandedResult(byte[], boolean) -
+Constructor for class crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+ entrySet() -
+Method in class crawlercommons.fetcher.Payload
+
+ equals(Object) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ equals(Object) -
+Method in class crawlercommons.fetcher.Payload
+
+ ETLD_DATA -
+Static variable in class crawlercommons.url.EffectiveTldFinder
+
+ EXCEPTION -
+Static variable in class crawlercommons.url.EffectiveTldFinder
+
+
+
+
+F
+
+failedFetch(int) -
+Method in class crawlercommons.robots.BaseRobotsParser
+The fetch of robots.txt failed, so return rules appropriate give the
+ HTTP status code.
+ failedFetch(int) -
+Method in class crawlercommons.robots.SimpleRobotRulesParser
+
+ fetch(String) -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ fetch(HttpRequestBase, String, Payload) -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ FetchedResult - Class in crawlercommons.fetcher FetchedResult(String, String, long, Metadata, byte[], String, int, Payload, String, int, String) -
+Constructor for class crawlercommons.fetcher.FetchedResult
+
+
+
+
+G
+
+get(String) -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+ get(String, Payload) -
+Method in class crawlercommons.fetcher.BaseFetcher
+Get the content stored in the resource referenced by
+get(String, Payload) -
+Method in class crawlercommons.fetcher.file.SimpleFileFetcher
+
+ get(String, Payload) -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ get(Object) -
+Method in class crawlercommons.fetcher.Payload
+
+ getAbortReason() -
+Method in exception crawlercommons.fetcher.AbortedFetchException
+
+ getAcceptLanguage() -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ getAgentName() -
+Method in class crawlercommons.fetcher.http.UserAgent
+
+ getAssignedDomain(String) -
+Static method in class crawlercommons.url.EffectiveTldFinder
+This method uses the effective TLD to determine which component of
+ a FQDN is the NIC-assigned domain name.
+ getBaseUrl() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getBaseUrl() -
+Method in class crawlercommons.sitemaps.SiteMap
+
+ getCause() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ getChangeFrequency() -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Return the URL's change frequency
+ getConnectionTimeout() -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ getContent() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getContentLength() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getContentType() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getCrawlDelay() -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ getDefaultMaxContentSize() -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+ getDomain() -
+Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+ getEffectiveTLD(String) -
+Static method in class crawlercommons.url.EffectiveTldFinder
+
+ getEffectiveTLDs() -
+Static method in class crawlercommons.url.EffectiveTldFinder
+
+ getError() -
+Method in exception crawlercommons.sitemaps.UnknownFormatException
+
+ getExpanded() -
+Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+ getFetchedUrl() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getFetchTime() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getFullDateFormat() -
+Static method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ getHeaders() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getHostAddress() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getHttpHeaders() -
+Method in exception crawlercommons.fetcher.HttpFetchException
+
+ getHttpStatus() -
+Method in exception crawlercommons.fetcher.HttpFetchException
+
+ getHttpVersion() -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ getInstance() -
+Static method in class crawlercommons.url.EffectiveTldFinder
+
+ getLastModified() -
+Method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ getLastModified() -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Return when this URL was last modified.
+ getLocalizedMessage() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ getMaxConnectionsPerHost() -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ getMaxContentSize(String) -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+ getMaxFetchTime() -
+Static method in class crawlercommons.robots.RobotUtils
+
+ getMaxRedirects() -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ getMaxRetryCount() -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ getMaxThreads() -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ getMessage() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ getMessage() -
+Method in exception crawlercommons.fetcher.HttpFetchException
+
+ getMimeTypeFromContentType(String) -
+Static method in class crawlercommons.fetcher.BaseFetcher
+
+ getMinResponseRate() -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+Return the minimum response rate.
+ getNewBaseUrl() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getNumRedirects() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getNumWarnings() -
+Method in class crawlercommons.robots.SimpleRobotRulesParser
+
+ getPayload() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getPLD(String) -
+Static method in class crawlercommons.url.PaidLevelDomain
+Extract the PLD (paid-level domain) from the hostname.
+ getPLD(URL) -
+Static method in class crawlercommons.url.PaidLevelDomain
+Extract the PLD (paid-level domain) from the URL.
+ getPriority() -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Return this URL's priority (a value between [0.0 - 1.0]).
+ getReason() -
+Method in exception crawlercommons.fetcher.RedirectFetchException
+
+ getRedirectedUrl() -
+Method in exception crawlercommons.fetcher.RedirectFetchException
+
+ getRedirectMode() -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ getResponseRate() -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ getRobotRules(BaseHttpFetcher, BaseRobotsParser, URL) -
+Static method in class crawlercommons.robots.RobotUtils
+Externally visible, static method for use in tools and for testing.
+ getSitemap(URL) -
+Method in class crawlercommons.sitemaps.SiteMapIndex
+Returns the Sitemap that has the given URL.
+ getSitemaps() -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ getSitemaps() -
+Method in class crawlercommons.sitemaps.SiteMapIndex
+
+ getSiteMapUrls() -
+Method in class crawlercommons.sitemaps.SiteMap
+
+ getSocketTimeout() -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ getStackTrace() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ getType() -
+Method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ getUrl() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ getUrl() -
+Method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ getUrl() -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Return the URL.
+ getUserAgent() -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ getUserAgentString() -
+Method in class crawlercommons.fetcher.http.UserAgent
+
+ getValidMimeTypes() -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+
+
+
+H
+
+hashCode() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ hashCode() -
+Method in class crawlercommons.fetcher.Payload
+
+ hasUnprocessedSitemap() -
+Method in class crawlercommons.sitemaps.SiteMapIndex
+
+ HttpFetchException - Exception in crawlercommons.fetcher HttpFetchException() -
+Constructor for exception crawlercommons.fetcher.HttpFetchException
+
+ HttpFetchException(String, String, int, Metadata) -
+Constructor for exception crawlercommons.fetcher.HttpFetchException
+
+
+
+
+I
+
+initCause(Throwable) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ initialize(InputStream) -
+Method in class crawlercommons.url.EffectiveTldFinder
+
+ IOFetchException - Exception in crawlercommons.fetcher IOFetchException() -
+Constructor for exception crawlercommons.fetcher.IOFetchException
+
+ IOFetchException(String, IOException) -
+Constructor for exception crawlercommons.fetcher.IOFetchException
+
+ isAllowAll() -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ isAllowAll() -
+Method in class crawlercommons.robots.SimpleRobotRules
+Is our ruleset set up to allow all access?
+ isAllowed(String) -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ isAllowed(String) -
+Method in class crawlercommons.robots.SimpleRobotRules
+
+ isAllowNone() -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ isAllowNone() -
+Method in class crawlercommons.robots.SimpleRobotRules
+Is our ruleset set up to disallow all access?
+ isConfigured() -
+Method in class crawlercommons.url.EffectiveTldFinder
+
+ isDeferVisits() -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ isEmpty() -
+Method in class crawlercommons.fetcher.Payload
+
+ isException() -
+Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+ isIndex() -
+Method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ isIndex() -
+Method in class crawlercommons.sitemaps.SiteMap
+
+ isIndex() -
+Method in class crawlercommons.sitemaps.SiteMapIndex
+
+ isProcessed() -
+Method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ isTruncated() -
+Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+ isWild() -
+Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+
+
+
+K
+
+keySet() -
+Method in class crawlercommons.fetcher.Payload
+
+
+
+
+L
+
+LOG -
+Static variable in class crawlercommons.sitemaps.SiteMapParser
+
+
+
+
+M
+
+MAX_BYTES_ALLOWED -
+Static variable in class crawlercommons.sitemaps.SiteMapParser
+Sitemap docs must be limited to 10MB (10,485,760 bytes)
+
+
+
+N
+
+nextUnprocessedSitemap() -
+Method in class crawlercommons.sitemaps.SiteMapIndex
+
+ NO_MIN_RESPONSE_RATE -
+Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ NO_REDIRECTS -
+Static variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+
+
+
+P
+
+PaidLevelDomain - Class in crawlercommons.url Routines to extract the PLD (paid-level domain, as per the IRLbot paper) from a hostname or URL. PaidLevelDomain() -
+Constructor for class crawlercommons.url.PaidLevelDomain
+
+ parseContent(String, byte[], String, String) -
+Method in class crawlercommons.robots.BaseRobotsParser
+Parse the robots.txt file in , and return rules appropriate for
+ processing paths by
+parseContent(String, byte[], String, String) -
+Method in class crawlercommons.robots.SimpleRobotRulesParser
+
+ parseSiteMap(String, byte[], AbstractSiteMap) -
+Method in class crawlercommons.sitemaps.SiteMapParser
+Returned a processed copy of an unprocessed sitemap object, i.e.
+ parseSiteMap(String, byte[], URL) -
+Method in class crawlercommons.sitemaps.SiteMapParser
+Returns a SiteMap or SiteMapIndex given a content type, byte content and
+ the URL of a sitemap
+ Payload - Class in crawlercommons.fetcher Payload() -
+Constructor for class crawlercommons.fetcher.Payload
+
+ printStackTrace() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ printStackTrace(PrintStream) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ printStackTrace(PrintWriter) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ processDeflateEncoded(byte[]) -
+Static method in class crawlercommons.fetcher.EncodingUtils
+
+ processDeflateEncoded(byte[], int) -
+Static method in class crawlercommons.fetcher.EncodingUtils
+
+ processGzipEncoded(byte[]) -
+Static method in class crawlercommons.fetcher.EncodingUtils
+
+ processGzipEncoded(byte[], int) -
+Static method in class crawlercommons.fetcher.EncodingUtils
+
+ put(String, Object) -
+Method in class crawlercommons.fetcher.Payload
+
+ putAll(Map<? extends String, ? extends Object>) -
+Method in class crawlercommons.fetcher.Payload
+
+
+
+
+R
+
+readBaseFields(DataInput) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ RedirectFetchException - Exception in crawlercommons.fetcher RedirectFetchException() -
+Constructor for exception crawlercommons.fetcher.RedirectFetchException
+
+ RedirectFetchException(String, String, RedirectFetchException.RedirectExceptionReason) -
+Constructor for exception crawlercommons.fetcher.RedirectFetchException
+
+ RedirectFetchException.RedirectExceptionReason - Enum in crawlercommons.fetcher remove(Object) -
+Method in class crawlercommons.fetcher.Payload
+
+ RobotUtils - Class in crawlercommons.robots RobotUtils() -
+Constructor for class crawlercommons.robots.RobotUtils
+
+
+
+
+S
+
+setAcceptLanguage(String) -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ setChangeFrequency(SiteMapURL.ChangeFrequency) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set the URL's change frequency
+ setChangeFrequency(String) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set the URL's change frequency
+ setConnectionTimeout(int) -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ setCrawlDelay(long) -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ setDefaultMaxContentSize(int) -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+ setDeferVisits(boolean) -
+Method in class crawlercommons.robots.BaseRobotRules
+
+ setExpanded(byte[]) -
+Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+ setHttpVersion(HttpVersion) -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ setLastModified(Date) -
+Method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ setLastModified(String) -
+Method in class crawlercommons.sitemaps.AbstractSiteMap
+
+ setLastModified(String) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set when this URL was last modified.
+ setLastModified(Date) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set when this URL was last modified.
+ setMaxConnectionsPerHost(int) -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ setMaxContentSize(String, int) -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+ setMaxRedirects(int) -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ setMaxRetryCount(int) -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ setMinResponseRate(int) -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ setPayload(Payload) -
+Method in class crawlercommons.fetcher.FetchedResult
+
+ setPriority(double) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the
+ given priority is out of range).
+ setPriority(String) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set the URL's priority to a value between [0.0 - 1.0] (0.0 is used if the
+ given priority is out of range).
+ setRedirectMode(BaseHttpFetcher.RedirectMode) -
+Method in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ setSocketTimeout(int) -
+Method in class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ setStackTrace(StackTraceElement[]) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ setTruncated(boolean) -
+Method in class crawlercommons.fetcher.EncodingUtils.ExpandedResult
+
+ setUrl(URL) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set the URL.
+ setUrl(String) -
+Method in class crawlercommons.sitemaps.SiteMapURL
+Set the URL.
+ setValidMimeTypes(Set<String>) -
+Method in class crawlercommons.fetcher.BaseFetcher
+
+ SimpleFileFetcher - Class in crawlercommons.fetcher.file SimpleFileFetcher() -
+Constructor for class crawlercommons.fetcher.file.SimpleFileFetcher
+
+ SimpleHttpFetcher - Class in crawlercommons.fetcher.http SimpleHttpFetcher(UserAgent) -
+Constructor for class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ SimpleHttpFetcher(int, UserAgent) -
+Constructor for class crawlercommons.fetcher.http.SimpleHttpFetcher
+
+ SimpleRobotRules - Class in crawlercommons.robots Result from parsing a single robots.txt file - which means we
+ get a set of rules, and a crawl-delay. SimpleRobotRules() -
+Constructor for class crawlercommons.robots.SimpleRobotRules
+
+ SimpleRobotRules(SimpleRobotRules.RobotRulesMode) -
+Constructor for class crawlercommons.robots.SimpleRobotRules
+
+ SimpleRobotRules.RobotRule - Class in crawlercommons.robots Single rule that maps from a path prefix to an allow flag. SimpleRobotRules.RobotRule(String, boolean) -
+Constructor for class crawlercommons.robots.SimpleRobotRules.RobotRule
+
+ SimpleRobotRules.RobotRule(Pattern, boolean) -
+Constructor for class crawlercommons.robots.SimpleRobotRules.RobotRule
+
+ SimpleRobotRules.RobotRulesMode - Enum in crawlercommons.robots SimpleRobotRulesParser - Class in crawlercommons.robots SimpleRobotRulesParser() -
+Constructor for class crawlercommons.robots.SimpleRobotRulesParser
+
+ SiteMap - Class in crawlercommons.sitemaps SiteMap() -
+Constructor for class crawlercommons.sitemaps.SiteMap
+
+ SiteMap(URL) -
+Constructor for class crawlercommons.sitemaps.SiteMap
+
+ SiteMap(String) -
+Constructor for class crawlercommons.sitemaps.SiteMap
+
+ SiteMap(URL, Date) -
+Constructor for class crawlercommons.sitemaps.SiteMap
+
+ SiteMap(String, String) -
+Constructor for class crawlercommons.sitemaps.SiteMap
+
+ SiteMapIndex - Class in crawlercommons.sitemaps SiteMapIndex() -
+Constructor for class crawlercommons.sitemaps.SiteMapIndex
+
+ SiteMapIndex(URL) -
+Constructor for class crawlercommons.sitemaps.SiteMapIndex
+
+ SiteMapParser - Class in crawlercommons.sitemaps SiteMapParser() -
+Constructor for class crawlercommons.sitemaps.SiteMapParser
+
+ SiteMapURL - Class in crawlercommons.sitemaps The SitemapUrl class represents a URL found in a Sitemap. SiteMapURL(String) -
+Constructor for class crawlercommons.sitemaps.SiteMapURL
+
+ SiteMapURL(URL) -
+Constructor for class crawlercommons.sitemaps.SiteMapURL
+
+ SiteMapURL(String, String, String, String) -
+Constructor for class crawlercommons.sitemaps.SiteMapURL
+
+ SiteMapURL(URL, Date, SiteMapURL.ChangeFrequency, double) -
+Constructor for class crawlercommons.sitemaps.SiteMapURL
+
+ SiteMapURL.ChangeFrequency - Enum in crawlercommons.sitemaps Allowed change frequencies size() -
+Method in class crawlercommons.fetcher.Payload
+
+
+
+
+T
+
+toString() -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+ toString() -
+Method in class crawlercommons.sitemaps.SiteMap
+
+ toString() -
+Method in class crawlercommons.sitemaps.SiteMapIndex
+
+ toString() -
+Method in class crawlercommons.sitemaps.SiteMapURL
+
+ toString() -
+Method in class crawlercommons.url.EffectiveTldFinder.EffectiveTLD
+
+
+
+
+U
+
+UnknownFormatException - Exception in crawlercommons.sitemaps UnknownFormatException() -
+Constructor for exception crawlercommons.sitemaps.UnknownFormatException
+
+ UnknownFormatException(String) -
+Constructor for exception crawlercommons.sitemaps.UnknownFormatException
+
+ UNSET_CRAWL_DELAY -
+Static variable in class crawlercommons.robots.BaseRobotRules
+
+ UrlFetchException - Exception in crawlercommons.fetcher UrlFetchException() -
+Constructor for exception crawlercommons.fetcher.UrlFetchException
+
+ UrlFetchException(String, String) -
+Constructor for exception crawlercommons.fetcher.UrlFetchException
+
+ UserAgent - Class in crawlercommons.fetcher.http UserAgent(String, String, String) -
+Constructor for class crawlercommons.fetcher.http.UserAgent
+
+ UserAgent(String, String, String, String) -
+Constructor for class crawlercommons.fetcher.http.UserAgent
+
+ UserAgent(String, String, String, String, String) -
+Constructor for class crawlercommons.fetcher.http.UserAgent
+
+
+
+
+V
+
+valueOf(String) -
+Static method in enum crawlercommons.fetcher.AbortedFetchReason
+Returns the enum constant of this type with the specified name.
+ valueOf(String) -
+Static method in enum crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode
+Returns the enum constant of this type with the specified name.
+ valueOf(String) -
+Static method in enum crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason
+Returns the enum constant of this type with the specified name.
+ valueOf(String) -
+Static method in enum crawlercommons.robots.SimpleRobotRules.RobotRulesMode
+Returns the enum constant of this type with the specified name.
+ valueOf(String) -
+Static method in enum crawlercommons.sitemaps.AbstractSiteMap.SitemapType
+Returns the enum constant of this type with the specified name.
+ valueOf(String) -
+Static method in enum crawlercommons.sitemaps.SiteMapURL.ChangeFrequency
+Returns the enum constant of this type with the specified name.
+ values() -
+Static method in enum crawlercommons.fetcher.AbortedFetchReason
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ values() -
+Static method in enum crawlercommons.fetcher.http.BaseHttpFetcher.RedirectMode
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ values() -
+Method in class crawlercommons.fetcher.Payload
+
+ values() -
+Static method in enum crawlercommons.fetcher.RedirectFetchException.RedirectExceptionReason
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ values() -
+Static method in enum crawlercommons.robots.SimpleRobotRules.RobotRulesMode
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ values() -
+Static method in enum crawlercommons.sitemaps.AbstractSiteMap.SitemapType
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ values() -
+Static method in enum crawlercommons.sitemaps.SiteMapURL.ChangeFrequency
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+
+
+
+W
+
+WILD_CARD -
+Static variable in class crawlercommons.url.EffectiveTldFinder
+
+ writeBaseFields(DataOutput) -
+Method in exception crawlercommons.fetcher.BaseFetchException
+
+
+
+
+_
+
+_acceptLanguage -
+Variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ _defaultMaxContentSize -
+Variable in class crawlercommons.fetcher.BaseFetcher
+
+ _maxConnectionsPerHost -
+Variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ _maxContentSizes -
+Variable in class crawlercommons.fetcher.BaseFetcher
+
+ _maxRedirects -
+Variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ _maxThreads -
+Variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ _minResponseRate -
+Variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ _redirectMode -
+Variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ _userAgent -
+Variable in class crawlercommons.fetcher.http.BaseHttpFetcher
+
+ _validMimeTypes -
+Variable in class crawlercommons.fetcher.BaseFetcher
+
+
+
+A B C D E F G H I K L M N P R S T U V W _
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/index.html b/doc/javadoc/index.html
new file mode 100644
index 0000000..5d8354d
--- /dev/null
+++ b/doc/javadoc/index.html
@@ -0,0 +1,39 @@
+
+
+
+
+
+
+crawlercommons 0.2-SNAPSHOT API
+
+
+
+
+
+
+
+
+
+
+
+
+
+Frame Alert
+
+
+This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client.
+
+Link toNon-frame version.
+
+
+
diff --git a/doc/javadoc/overview-frame.html b/doc/javadoc/overview-frame.html
new file mode 100644
index 0000000..a59de77
--- /dev/null
+++ b/doc/javadoc/overview-frame.html
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+Overview List (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/overview-summary.html b/doc/javadoc/overview-summary.html
new file mode 100644
index 0000000..ab2f352
--- /dev/null
+++ b/doc/javadoc/overview-summary.html
@@ -0,0 +1,176 @@
+
+
+
+
+
+
+Overview (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+crawlercommons 0.2-SNAPSHOT API
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/overview-tree.html b/doc/javadoc/overview-tree.html
new file mode 100644
index 0000000..9a010a5
--- /dev/null
+++ b/doc/javadoc/overview-tree.html
@@ -0,0 +1,183 @@
+
+
+
+
+
+
+Class Hierarchy (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hierarchy For All Packages
+
+
+Package Hierarchies: crawlercommons.fetcher , crawlercommons.fetcher.file , crawlercommons.fetcher.http , crawlercommons.robots , crawlercommons.sitemaps , crawlercommons.url
+
+
+Class Hierarchy
+
+
+
+Enum Hierarchy
+
+
+java.lang.Object
+java.lang.Enum<E> (implements java.lang.Comparable<T>, java.io.Serializable)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/package-list b/doc/javadoc/package-list
new file mode 100644
index 0000000..b45f53c
--- /dev/null
+++ b/doc/javadoc/package-list
@@ -0,0 +1,6 @@
+crawlercommons.fetcher
+crawlercommons.fetcher.file
+crawlercommons.fetcher.http
+crawlercommons.robots
+crawlercommons.sitemaps
+crawlercommons.url
diff --git a/doc/javadoc/resources/inherit.gif b/doc/javadoc/resources/inherit.gif
new file mode 100644
index 0000000..c814867
Binary files /dev/null and b/doc/javadoc/resources/inherit.gif differ
diff --git a/doc/javadoc/serialized-form.html b/doc/javadoc/serialized-form.html
new file mode 100644
index 0000000..911e1de
--- /dev/null
+++ b/doc/javadoc/serialized-form.html
@@ -0,0 +1,671 @@
+
+
+
+
+
+
+Serialized Form (crawlercommons 0.2-SNAPSHOT API)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Form
+
+
+
+
+
+
+Package crawlercommons.fetcher
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_abortReason
+
+AbortedFetchReason _abortReason
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_maxContentSizes
+
+java.util.Map<K,V> _maxContentSizes
+
+
+
+
+
+
+_defaultMaxContentSize
+
+int _defaultMaxContentSize
+
+
+
+
+
+
+_validMimeTypes
+
+java.util.Set<E> _validMimeTypes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_url
+
+java.lang.String _url
+
+
+
+
+
+
+_exception
+
+java.lang.Exception _exception
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_httpStatus
+
+int _httpStatus
+
+
+
+
+
+
+_httpHeaders
+
+org.apache.tika.metadata.Metadata _httpHeaders
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_redirectedUrl
+
+java.lang.String _redirectedUrl
+
+
+
+
+
+
+_reason
+
+RedirectFetchException.RedirectExceptionReason _reason
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.fetcher.file
+
+
+
+
+
+
+
+
+
+
+
+
+
+Package crawlercommons.fetcher.http
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_maxThreads
+
+int _maxThreads
+
+
+
+
+
+
+_userAgent
+
+UserAgent _userAgent
+
+
+
+
+
+
+_maxRedirects
+
+int _maxRedirects
+
+
+
+
+
+
+_maxConnectionsPerHost
+
+int _maxConnectionsPerHost
+
+
+
+
+
+
+_minResponseRate
+
+int _minResponseRate
+
+
+
+
+
+
+_acceptLanguage
+
+java.lang.String _acceptLanguage
+
+
+
+
+
+
+_redirectMode
+
+BaseHttpFetcher.RedirectMode _redirectMode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_httpVersion
+
+org.apache.http.HttpVersion _httpVersion
+
+
+
+
+
+
+_socketTimeout
+
+int _socketTimeout
+
+
+
+
+
+
+_connectionTimeout
+
+int _connectionTimeout
+
+
+
+
+
+
+_maxRetryCount
+
+int _maxRetryCount
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_agentName
+
+java.lang.String _agentName
+
+
+
+
+
+
+_emailAddress
+
+java.lang.String _emailAddress
+
+
+
+
+
+
+_webAddress
+
+java.lang.String _webAddress
+
+
+
+
+
+
+_browserVersion
+
+java.lang.String _browserVersion
+
+
+
+
+
+
+_crawlerVersion
+
+java.lang.String _crawlerVersion
+
+
+
+
+
+
+
+
+
+Package crawlercommons.robots
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+_numWarnings
+
+int _numWarnings
+
+
+
+
+
+
+
+
+
+Package crawlercommons.sitemaps
+
+
+
+
+
+
+
+
+
+
+
+
+Serialized Fields
+
+
+
+
+error
+
+java.lang.String error
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/javadoc/stylesheet.css b/doc/javadoc/stylesheet.css
new file mode 100644
index 0000000..6ea9e51
--- /dev/null
+++ b/doc/javadoc/stylesheet.css
@@ -0,0 +1,29 @@
+/* Javadoc style sheet */
+
+/* Define colors, fonts and other style attributes here to override the defaults */
+
+/* Page background color */
+body { background-color: #FFFFFF; color:#000000 }
+
+/* Headings */
+h1 { font-size: 145% }
+
+/* Table colors */
+.TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */
+.TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */
+.TableRowColor { background: #FFFFFF; color:#000000 } /* White */
+
+/* Font used in left-hand frame lists */
+.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 }
+.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 }
+.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 }
+
+/* Navigation bar fonts and colors */
+.NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */
+.NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */
+.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;}
+.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;}
+
+.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000}
+.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000}
+