mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-06-01 21:36:04 +02:00
353 lines
14 KiB
HTML
353 lines
14 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<!--NewPage-->
|
|
<HTML>
|
|
<HEAD>
|
|
<!-- Generated by javadoc (build 1.6.0_31) on Mon Nov 05 19:55:43 PST 2012 -->
|
|
<TITLE>
|
|
SiteMapParser (crawlercommons 0.2-SNAPSHOT API)
|
|
</TITLE>
|
|
|
|
<META NAME="date" CONTENT="2012-11-05">
|
|
|
|
<LINK REL ="stylesheet" TYPE="text/css" HREF="../../stylesheet.css" TITLE="Style">
|
|
|
|
<SCRIPT type="text/javascript">
|
|
function windowTitle()
|
|
{
|
|
if (location.href.indexOf('is-external=true') == -1) {
|
|
parent.document.title="SiteMapParser (crawlercommons 0.2-SNAPSHOT API)";
|
|
}
|
|
}
|
|
</SCRIPT>
|
|
<NOSCRIPT>
|
|
</NOSCRIPT>
|
|
|
|
</HEAD>
|
|
|
|
<BODY BGCOLOR="white" onload="windowTitle();">
|
|
<HR>
|
|
|
|
|
|
<!-- ========= START OF TOP NAVBAR ======= -->
|
|
<A NAME="navbar_top"><!-- --></A>
|
|
<A HREF="#skip-navbar_top" title="Skip navigation links"></A>
|
|
<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
|
|
<TR>
|
|
<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
|
|
<A NAME="navbar_top_firstrow"><!-- --></A>
|
|
<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
|
|
<TR ALIGN="center" VALIGN="top">
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-summary.html"><FONT CLASS="NavBarFont1"><B>Package</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> <FONT CLASS="NavBarFont1Rev"><B>Class</B></FONT> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="class-use/SiteMapParser.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
</TD>
|
|
<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
|
|
</EM>
|
|
</TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
|
|
<A HREF="../../crawlercommons/sitemaps/SiteMapIndex.html" title="class in crawlercommons.sitemaps"><B>PREV CLASS</B></A>
|
|
<A HREF="../../crawlercommons/sitemaps/SiteMapURL.html" title="class in crawlercommons.sitemaps"><B>NEXT CLASS</B></A></FONT></TD>
|
|
<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
|
|
<A HREF="../../index.html?crawlercommons/sitemaps/SiteMapParser.html" target="_top"><B>FRAMES</B></A>
|
|
<A HREF="SiteMapParser.html" target="_top"><B>NO FRAMES</B></A>
|
|
<SCRIPT type="text/javascript">
|
|
<!--
|
|
if(window==top) {
|
|
document.writeln('<A HREF="../../allclasses-noframe.html"><B>All Classes</B></A>');
|
|
}
|
|
//-->
|
|
</SCRIPT>
|
|
<NOSCRIPT>
|
|
<A HREF="../../allclasses-noframe.html"><B>All Classes</B></A>
|
|
</NOSCRIPT>
|
|
|
|
|
|
</FONT></TD>
|
|
</TR>
|
|
<TR>
|
|
<TD VALIGN="top" CLASS="NavBarCell3"><FONT SIZE="-2">
|
|
SUMMARY: NESTED | <A HREF="#field_summary">FIELD</A> | <A HREF="#constructor_summary">CONSTR</A> | <A HREF="#method_summary">METHOD</A></FONT></TD>
|
|
<TD VALIGN="top" CLASS="NavBarCell3"><FONT SIZE="-2">
|
|
DETAIL: <A HREF="#field_detail">FIELD</A> | <A HREF="#constructor_detail">CONSTR</A> | <A HREF="#method_detail">METHOD</A></FONT></TD>
|
|
</TR>
|
|
</TABLE>
|
|
<A NAME="skip-navbar_top"></A>
|
|
<!-- ========= END OF TOP NAVBAR ========= -->
|
|
|
|
<HR>
|
|
<!-- ======== START OF CLASS DATA ======== -->
|
|
<H2>
|
|
<FONT SIZE="-1">
|
|
crawlercommons.sitemaps</FONT>
|
|
<BR>
|
|
Class SiteMapParser</H2>
|
|
<PRE>
|
|
java.lang.Object
|
|
<IMG SRC="../../resources/inherit.gif" ALT="extended by "><B>crawlercommons.sitemaps.SiteMapParser</B>
|
|
</PRE>
|
|
<HR>
|
|
<DL>
|
|
<DT><PRE>public class <B>SiteMapParser</B><DT>extends java.lang.Object</DL>
|
|
</PRE>
|
|
|
|
<P>
|
|
<HR>
|
|
|
|
<P>
|
|
<!-- =========== FIELD SUMMARY =========== -->
|
|
|
|
<A NAME="field_summary"><!-- --></A>
|
|
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
|
|
<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
|
|
<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
|
|
<B>Field Summary</B></FONT></TH>
|
|
</TR>
|
|
<TR BGCOLOR="white" CLASS="TableRowColor">
|
|
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
|
|
<CODE>static org.slf4j.Logger</CODE></FONT></TD>
|
|
<TD><CODE><B><A HREF="../../crawlercommons/sitemaps/SiteMapParser.html#LOG">LOG</A></B></CODE>
|
|
|
|
<BR>
|
|
</TD>
|
|
</TR>
|
|
<TR BGCOLOR="white" CLASS="TableRowColor">
|
|
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
|
|
<CODE>static int</CODE></FONT></TD>
|
|
<TD><CODE><B><A HREF="../../crawlercommons/sitemaps/SiteMapParser.html#MAX_BYTES_ALLOWED">MAX_BYTES_ALLOWED</A></B></CODE>
|
|
|
|
<BR>
|
|
Sitemap docs must be limited to 10MB (10,485,760 bytes)</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
|
|
|
|
<A NAME="constructor_summary"><!-- --></A>
|
|
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
|
|
<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
|
|
<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
|
|
<B>Constructor Summary</B></FONT></TH>
|
|
</TR>
|
|
<TR BGCOLOR="white" CLASS="TableRowColor">
|
|
<TD><CODE><B><A HREF="../../crawlercommons/sitemaps/SiteMapParser.html#SiteMapParser()">SiteMapParser</A></B>()</CODE>
|
|
|
|
<BR>
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ========== METHOD SUMMARY =========== -->
|
|
|
|
<A NAME="method_summary"><!-- --></A>
|
|
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
|
|
<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
|
|
<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
|
|
<B>Method Summary</B></FONT></TH>
|
|
</TR>
|
|
<TR BGCOLOR="white" CLASS="TableRowColor">
|
|
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
|
|
<CODE> <A HREF="../../crawlercommons/sitemaps/AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</A></CODE></FONT></TD>
|
|
<TD><CODE><B><A HREF="../../crawlercommons/sitemaps/SiteMapParser.html#parseSiteMap(java.lang.String, byte[], crawlercommons.sitemaps.AbstractSiteMap)">parseSiteMap</A></B>(java.lang.String contentType,
|
|
byte[] content,
|
|
<A HREF="../../crawlercommons/sitemaps/AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</A> sitemap)</CODE>
|
|
|
|
<BR>
|
|
Returned a processed copy of an unprocessed sitemap object, i.e.</TD>
|
|
</TR>
|
|
<TR BGCOLOR="white" CLASS="TableRowColor">
|
|
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
|
|
<CODE> <A HREF="../../crawlercommons/sitemaps/AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</A></CODE></FONT></TD>
|
|
<TD><CODE><B><A HREF="../../crawlercommons/sitemaps/SiteMapParser.html#parseSiteMap(java.lang.String, byte[], java.net.URL)">parseSiteMap</A></B>(java.lang.String contentType,
|
|
byte[] content,
|
|
java.net.URL url)</CODE>
|
|
|
|
<BR>
|
|
Returns a SiteMap or SiteMapIndex given a content type, byte content and
|
|
the URL of a sitemap</TD>
|
|
</TR>
|
|
</TABLE>
|
|
<A NAME="methods_inherited_from_class_java.lang.Object"><!-- --></A>
|
|
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
|
|
<TR BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor">
|
|
<TH ALIGN="left"><B>Methods inherited from class java.lang.Object</B></TH>
|
|
</TR>
|
|
<TR BGCOLOR="white" CLASS="TableRowColor">
|
|
<TD><CODE>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</CODE></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<P>
|
|
|
|
<!-- ============ FIELD DETAIL =========== -->
|
|
|
|
<A NAME="field_detail"><!-- --></A>
|
|
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
|
|
<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
|
|
<TH ALIGN="left" COLSPAN="1"><FONT SIZE="+2">
|
|
<B>Field Detail</B></FONT></TH>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<A NAME="LOG"><!-- --></A><H3>
|
|
LOG</H3>
|
|
<PRE>
|
|
public static final org.slf4j.Logger <B>LOG</B></PRE>
|
|
<DL>
|
|
<DL>
|
|
</DL>
|
|
</DL>
|
|
<HR>
|
|
|
|
<A NAME="MAX_BYTES_ALLOWED"><!-- --></A><H3>
|
|
MAX_BYTES_ALLOWED</H3>
|
|
<PRE>
|
|
public static int <B>MAX_BYTES_ALLOWED</B></PRE>
|
|
<DL>
|
|
<DD>Sitemap docs must be limited to 10MB (10,485,760 bytes)
|
|
<P>
|
|
<DL>
|
|
</DL>
|
|
</DL>
|
|
|
|
<!-- ========= CONSTRUCTOR DETAIL ======== -->
|
|
|
|
<A NAME="constructor_detail"><!-- --></A>
|
|
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
|
|
<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
|
|
<TH ALIGN="left" COLSPAN="1"><FONT SIZE="+2">
|
|
<B>Constructor Detail</B></FONT></TH>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<A NAME="SiteMapParser()"><!-- --></A><H3>
|
|
SiteMapParser</H3>
|
|
<PRE>
|
|
public <B>SiteMapParser</B>()</PRE>
|
|
<DL>
|
|
</DL>
|
|
|
|
<!-- ============ METHOD DETAIL ========== -->
|
|
|
|
<A NAME="method_detail"><!-- --></A>
|
|
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
|
|
<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
|
|
<TH ALIGN="left" COLSPAN="1"><FONT SIZE="+2">
|
|
<B>Method Detail</B></FONT></TH>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<A NAME="parseSiteMap(java.lang.String, byte[], crawlercommons.sitemaps.AbstractSiteMap)"><!-- --></A><H3>
|
|
parseSiteMap</H3>
|
|
<PRE>
|
|
public <A HREF="../../crawlercommons/sitemaps/AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</A> <B>parseSiteMap</B>(java.lang.String contentType,
|
|
byte[] content,
|
|
<A HREF="../../crawlercommons/sitemaps/AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</A> sitemap)
|
|
throws <A HREF="../../crawlercommons/sitemaps/UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</A>,
|
|
java.io.IOException</PRE>
|
|
<DL>
|
|
<DD>Returned a processed copy of an unprocessed sitemap object, i.e. transfer the value of
|
|
getLastModified and sets the original sitemap to processed.
|
|
<P>
|
|
<DD><DL>
|
|
|
|
<DT><B>Throws:</B>
|
|
<DD><CODE><A HREF="../../crawlercommons/sitemaps/UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</A></CODE>
|
|
<DD><CODE>java.io.IOException</CODE></DL>
|
|
</DD>
|
|
</DL>
|
|
<HR>
|
|
|
|
<A NAME="parseSiteMap(java.lang.String, byte[], java.net.URL)"><!-- --></A><H3>
|
|
parseSiteMap</H3>
|
|
<PRE>
|
|
public <A HREF="../../crawlercommons/sitemaps/AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</A> <B>parseSiteMap</B>(java.lang.String contentType,
|
|
byte[] content,
|
|
java.net.URL url)
|
|
throws <A HREF="../../crawlercommons/sitemaps/UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</A>,
|
|
java.io.IOException</PRE>
|
|
<DL>
|
|
<DD>Returns a SiteMap or SiteMapIndex given a content type, byte content and
|
|
the URL of a sitemap
|
|
<P>
|
|
<DD><DL>
|
|
|
|
<DT><B>Throws:</B>
|
|
<DD><CODE><A HREF="../../crawlercommons/sitemaps/UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</A></CODE>
|
|
<DD><CODE>java.io.IOException</CODE></DL>
|
|
</DD>
|
|
</DL>
|
|
<!-- ========= END OF CLASS DATA ========= -->
|
|
<HR>
|
|
|
|
|
|
<!-- ======= START OF BOTTOM NAVBAR ====== -->
|
|
<A NAME="navbar_bottom"><!-- --></A>
|
|
<A HREF="#skip-navbar_bottom" title="Skip navigation links"></A>
|
|
<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
|
|
<TR>
|
|
<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
|
|
<A NAME="navbar_bottom_firstrow"><!-- --></A>
|
|
<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
|
|
<TR ALIGN="center" VALIGN="top">
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-summary.html"><FONT CLASS="NavBarFont1"><B>Package</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> <FONT CLASS="NavBarFont1Rev"><B>Class</B></FONT> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="class-use/SiteMapParser.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A> </TD>
|
|
<TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
</TD>
|
|
<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
|
|
</EM>
|
|
</TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
|
|
<A HREF="../../crawlercommons/sitemaps/SiteMapIndex.html" title="class in crawlercommons.sitemaps"><B>PREV CLASS</B></A>
|
|
<A HREF="../../crawlercommons/sitemaps/SiteMapURL.html" title="class in crawlercommons.sitemaps"><B>NEXT CLASS</B></A></FONT></TD>
|
|
<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
|
|
<A HREF="../../index.html?crawlercommons/sitemaps/SiteMapParser.html" target="_top"><B>FRAMES</B></A>
|
|
<A HREF="SiteMapParser.html" target="_top"><B>NO FRAMES</B></A>
|
|
<SCRIPT type="text/javascript">
|
|
<!--
|
|
if(window==top) {
|
|
document.writeln('<A HREF="../../allclasses-noframe.html"><B>All Classes</B></A>');
|
|
}
|
|
//-->
|
|
</SCRIPT>
|
|
<NOSCRIPT>
|
|
<A HREF="../../allclasses-noframe.html"><B>All Classes</B></A>
|
|
</NOSCRIPT>
|
|
|
|
|
|
</FONT></TD>
|
|
</TR>
|
|
<TR>
|
|
<TD VALIGN="top" CLASS="NavBarCell3"><FONT SIZE="-2">
|
|
SUMMARY: NESTED | <A HREF="#field_summary">FIELD</A> | <A HREF="#constructor_summary">CONSTR</A> | <A HREF="#method_summary">METHOD</A></FONT></TD>
|
|
<TD VALIGN="top" CLASS="NavBarCell3"><FONT SIZE="-2">
|
|
DETAIL: <A HREF="#field_detail">FIELD</A> | <A HREF="#constructor_detail">CONSTR</A> | <A HREF="#method_detail">METHOD</A></FONT></TD>
|
|
</TR>
|
|
</TABLE>
|
|
<A NAME="skip-navbar_bottom"></A>
|
|
<!-- ======== END OF BOTTOM NAVBAR ======= -->
|
|
|
|
<HR>
|
|
|
|
</BODY>
|
|
</HTML>
|