mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-18 18:06:05 +02:00
1112 lines
62 KiB
HTML
1112 lines
62 KiB
HTML
<!DOCTYPE HTML>
|
|
<!-- NewPage -->
|
|
<html lang="en">
|
|
<head>
|
|
<!-- Generated by javadoc (11.0.19) on Thu Jul 13 10:31:24 CEST 2023 -->
|
|
<title>SiteMapParser (Crawler-commons 1.4 API)</title>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
<meta name="dc.created" content="2023-07-13">
|
|
<link rel="stylesheet" type="text/css" href="../../stylesheet.css" title="Style">
|
|
<link rel="stylesheet" type="text/css" href="../../jquery/jquery-ui.min.css" title="Style">
|
|
<link rel="stylesheet" type="text/css" href="../../jquery-ui.overrides.css" title="Style">
|
|
<script type="text/javascript" src="../../script.js"></script>
|
|
<script type="text/javascript" src="../../jquery/jszip/dist/jszip.min.js"></script>
|
|
<script type="text/javascript" src="../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
|
|
<!--[if IE]>
|
|
<script type="text/javascript" src="../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
|
|
<![endif]-->
|
|
<script type="text/javascript" src="../../jquery/jquery-3.6.1.min.js"></script>
|
|
<script type="text/javascript" src="../../jquery/jquery-ui.min.js"></script>
|
|
</head>
|
|
<body>
|
|
<script type="text/javascript"><!--
|
|
try {
|
|
if (location.href.indexOf('is-external=true') == -1) {
|
|
parent.document.title="SiteMapParser (Crawler-commons 1.4 API)";
|
|
}
|
|
}
|
|
catch(err) {
|
|
}
|
|
//-->
|
|
var data = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":9,"i20":10,"i21":10};
|
|
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
|
|
var altColor = "altColor";
|
|
var rowColor = "rowColor";
|
|
var tableTab = "tableTab";
|
|
var activeTableTab = "activeTableTab";
|
|
var pathtoroot = "../../";
|
|
var useModuleDirectories = false;
|
|
loadScripts(document, 'script');</script>
|
|
<noscript>
|
|
<div>JavaScript is disabled on your browser.</div>
|
|
</noscript>
|
|
<header role="banner">
|
|
<nav role="navigation">
|
|
<div class="fixedNav">
|
|
<!-- ========= START OF TOP NAVBAR ======= -->
|
|
<div class="topNav"><a id="navbar.top">
|
|
<!-- -->
|
|
</a>
|
|
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
|
|
<a id="navbar.top.firstrow">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="navList" title="Navigation">
|
|
<li><a href="../../index.html">Overview</a></li>
|
|
<li><a href="package-summary.html">Package</a></li>
|
|
<li class="navBarCell1Rev">Class</li>
|
|
<li><a href="class-use/SiteMapParser.html">Use</a></li>
|
|
<li><a href="package-tree.html">Tree</a></li>
|
|
<li><a href="../../deprecated-list.html">Deprecated</a></li>
|
|
<li><a href="../../index-all.html">Index</a></li>
|
|
<li><a href="../../help-doc.html">Help</a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="subNav">
|
|
<ul class="navList" id="allclasses_navbar_top">
|
|
<li><a href="../../allclasses.html">All Classes</a></li>
|
|
</ul>
|
|
<ul class="navListSearch">
|
|
<li><label for="search">SEARCH:</label>
|
|
<input type="text" id="search" value="search" disabled="disabled">
|
|
<input type="reset" id="reset" value="reset" disabled="disabled">
|
|
</li>
|
|
</ul>
|
|
<div>
|
|
<script type="text/javascript"><!--
|
|
allClassesLink = document.getElementById("allclasses_navbar_top");
|
|
if(window==top) {
|
|
allClassesLink.style.display = "block";
|
|
}
|
|
else {
|
|
allClassesLink.style.display = "none";
|
|
}
|
|
//-->
|
|
</script>
|
|
<noscript>
|
|
<div>JavaScript is disabled on your browser.</div>
|
|
</noscript>
|
|
</div>
|
|
<div>
|
|
<ul class="subNavList">
|
|
<li>Summary: </li>
|
|
<li>Nested | </li>
|
|
<li><a href="#field.summary">Field</a> | </li>
|
|
<li><a href="#constructor.summary">Constr</a> | </li>
|
|
<li><a href="#method.summary">Method</a></li>
|
|
</ul>
|
|
<ul class="subNavList">
|
|
<li>Detail: </li>
|
|
<li><a href="#field.detail">Field</a> | </li>
|
|
<li><a href="#constructor.detail">Constr</a> | </li>
|
|
<li><a href="#method.detail">Method</a></li>
|
|
</ul>
|
|
</div>
|
|
<a id="skip.navbar.top">
|
|
<!-- -->
|
|
</a></div>
|
|
<!-- ========= END OF TOP NAVBAR ========= -->
|
|
</div>
|
|
<div class="navPadding"> </div>
|
|
<script type="text/javascript"><!--
|
|
$('.navPadding').css('padding-top', $('.fixedNav').css("height"));
|
|
//-->
|
|
</script>
|
|
</nav>
|
|
</header>
|
|
<!-- ======== START OF CLASS DATA ======== -->
|
|
<main role="main">
|
|
<div class="header">
|
|
<div class="subTitle"><span class="packageLabelInType">Package</span> <a href="package-summary.html">crawlercommons.sitemaps</a></div>
|
|
<h2 title="Class SiteMapParser" class="title">Class SiteMapParser</h2>
|
|
</div>
|
|
<div class="contentContainer">
|
|
<ul class="inheritance">
|
|
<li><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang" class="externalLink">java.lang.Object</a></li>
|
|
<li>
|
|
<ul class="inheritance">
|
|
<li>crawlercommons.sitemaps.SiteMapParser</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<div class="description">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<hr>
|
|
<pre>public class <span class="typeNameLabel">SiteMapParser</span>
|
|
extends <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang" class="externalLink">Object</a></pre>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="summary">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<!-- =========== FIELD SUMMARY =========== -->
|
|
<section>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a id="field.summary">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Field Summary</h3>
|
|
<table class="memberSummary">
|
|
<caption><span>Fields</span><span class="tabEnd"> </span></caption>
|
|
<tr>
|
|
<th class="colFirst" scope="col">Modifier and Type</th>
|
|
<th class="colSecond" scope="col">Field</th>
|
|
<th class="colLast" scope="col">Description</th>
|
|
</tr>
|
|
<tr class="altColor">
|
|
<td class="colFirst"><code>protected <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/Set.html?is-external=true" title="class or interface in java.util" class="externalLink">Set</a><<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#acceptedNamespaces">acceptedNamespaces</a></span></code></th>
|
|
<td class="colLast">
|
|
<div class="block">Set of namespaces (if <a href="#strictNamespace"><code>strictNamespace</code></a>) accepted by the parser.</div>
|
|
</td>
|
|
</tr>
|
|
<tr class="rowColor">
|
|
<td class="colFirst"><code>protected <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/Map.html?is-external=true" title="class or interface in java.util" class="externalLink">Map</a><<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>,​<a href="extension/Extension.html" title="enum in crawlercommons.sitemaps.extension">Extension</a>></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#extensionNamespaces">extensionNamespaces</a></span></code></th>
|
|
<td class="colLast">
|
|
<div class="block">Map of sitemap extension namespaces required to find the right extension
|
|
handler.</div>
|
|
</td>
|
|
</tr>
|
|
<tr class="altColor">
|
|
<td class="colFirst"><code>static org.slf4j.Logger</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#LOG">LOG</a></span></code></th>
|
|
<td class="colLast"> </td>
|
|
</tr>
|
|
<tr class="rowColor">
|
|
<td class="colFirst"><code>static int</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_BYTES_ALLOWED">MAX_BYTES_ALLOWED</a></span></code></th>
|
|
<td class="colLast">
|
|
<div class="block">Sitemaps (including sitemap index files) "must be no larger than
|
|
50MB (52,428,800 bytes)" as specified in the
|
|
<a href="https://www.sitemaps.org/protocol.html#index">Sitemaps XML
|
|
format</a> (before Nov.</div>
|
|
</td>
|
|
</tr>
|
|
<tr class="altColor">
|
|
<td class="colFirst"><code>protected boolean</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#strict">strict</a></span></code></th>
|
|
<td class="colLast">
|
|
<div class="block">True (by default) meaning that invalid URLs should be rejected, as the
|
|
official docs allow the siteMapURLs to be only under the base url:
|
|
https://www.sitemaps.org/protocol.html#location</div>
|
|
</td>
|
|
</tr>
|
|
<tr class="rowColor">
|
|
<td class="colFirst"><code>protected boolean</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#strictNamespace">strictNamespace</a></span></code></th>
|
|
<td class="colLast">
|
|
<div class="block">Indicates whether the parser should work with the namespace from the
|
|
specifications or any namespace.</div>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
</li>
|
|
</ul>
|
|
</section>
|
|
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
|
|
<section>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a id="constructor.summary">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Constructor Summary</h3>
|
|
<table class="memberSummary">
|
|
<caption><span>Constructors</span><span class="tabEnd"> </span></caption>
|
|
<tr>
|
|
<th class="colFirst" scope="col">Constructor</th>
|
|
<th class="colLast" scope="col">Description</th>
|
|
</tr>
|
|
<tr class="altColor">
|
|
<th class="colConstructorName" scope="row"><code><span class="memberNameLink"><a href="#%3Cinit%3E()">SiteMapParser</a></span>()</code></th>
|
|
<td class="colLast">
|
|
<div class="block">SiteMapParser with strict location validation (<a href="#isStrict()"><code>isStrict()</code></a>) and not
|
|
allowing partially parsed content.</div>
|
|
</td>
|
|
</tr>
|
|
<tr class="rowColor">
|
|
<th class="colConstructorName" scope="row"><code><span class="memberNameLink"><a href="#%3Cinit%3E(boolean)">SiteMapParser</a></span>​(boolean strict)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">SiteMapParser with configurable location validation, not allowing
|
|
partially parsed content.</div>
|
|
</td>
|
|
</tr>
|
|
<tr class="altColor">
|
|
<th class="colConstructorName" scope="row"><code><span class="memberNameLink"><a href="#%3Cinit%3E(boolean,boolean)">SiteMapParser</a></span>​(boolean strict,
|
|
boolean allowPartial)</code></th>
|
|
<td class="colLast"> </td>
|
|
</tr>
|
|
</table>
|
|
</li>
|
|
</ul>
|
|
</section>
|
|
<!-- ========== METHOD SUMMARY =========== -->
|
|
<section>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a id="method.summary">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Method Summary</h3>
|
|
<table class="memberSummary">
|
|
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption>
|
|
<tr>
|
|
<th class="colFirst" scope="col">Modifier and Type</th>
|
|
<th class="colSecond" scope="col">Method</th>
|
|
<th class="colLast" scope="col">Description</th>
|
|
</tr>
|
|
<tr id="i0" class="altColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#addAcceptedNamespace(java.lang.String)">addAcceptedNamespace</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> namespaceUri)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Add namespace URI to set of accepted namespaces.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i1" class="rowColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#addAcceptedNamespace(java.lang.String%5B%5D)">addAcceptedNamespace</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>[] namespaceUris)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Add namespace URIs to set of accepted namespaces.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i2" class="altColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#enableExtension(crawlercommons.sitemaps.extension.Extension)">enableExtension</a></span>​(<a href="extension/Extension.html" title="enum in crawlercommons.sitemaps.extension">Extension</a> extension)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Enable a support for a sitemap extension in the parser.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i3" class="rowColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#enableExtensions()">enableExtensions</a></span>()</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Enable all supported sitemap extensions in the parser.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i4" class="altColor">
|
|
<td class="colFirst"><code>boolean</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#isStrict()">isStrict</a></span>()</code></th>
|
|
<td class="colLast"> </td>
|
|
</tr>
|
|
<tr id="i5" class="rowColor">
|
|
<td class="colFirst"><code>boolean</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#isStrictNamespace()">isStrictNamespace</a></span>()</code></th>
|
|
<td class="colLast"> </td>
|
|
</tr>
|
|
<tr id="i6" class="altColor">
|
|
<td class="colFirst"><code><a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#parseSiteMap(byte%5B%5D,java.net.URL)">parseSiteMap</a></span>​(byte[] content,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> url)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Parse a sitemap, given the content bytes and the URL.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i7" class="rowColor">
|
|
<td class="colFirst"><code><a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#parseSiteMap(java.lang.String,byte%5B%5D,crawlercommons.sitemaps.AbstractSiteMap)">parseSiteMap</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> contentType,
|
|
byte[] content,
|
|
<a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> sitemap)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Returns a processed copy of an unprocessed sitemap object, i.e.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i8" class="altColor">
|
|
<td class="colFirst"><code><a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#parseSiteMap(java.lang.String,byte%5B%5D,java.net.URL)">parseSiteMap</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> contentType,
|
|
byte[] content,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> url)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Parse a sitemap, given the MIME type, the content bytes, and the URL.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i9" class="rowColor">
|
|
<td class="colFirst"><code><a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#parseSiteMap(java.net.URL)">parseSiteMap</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> onlineSitemapUrl)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Returns a SiteMap or SiteMapIndex given an online sitemap URL
|
|
|
|
Please note that this method is a static method which goes online and
|
|
fetches the sitemap then parses it
|
|
|
|
This method is a convenience method for a user who has a sitemap URL and
|
|
wants a "Keep it simple" way to parse it.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i10" class="altColor">
|
|
<td class="colFirst"><code>protected <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#processGzippedXML(java.net.URL,byte%5B%5D)">processGzippedXML</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> url,
|
|
byte[] response)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Decompress the gzipped content and process the resulting XML Sitemap.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i11" class="rowColor">
|
|
<td class="colFirst"><code>protected <a href="SiteMap.html" title="class in crawlercommons.sitemaps">SiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#processText(java.net.URL,byte%5B%5D)">processText</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
byte[] content)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Process a text-based Sitemap.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i12" class="altColor">
|
|
<td class="colFirst"><code>protected <a href="SiteMap.html" title="class in crawlercommons.sitemaps">SiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#processText(java.net.URL,java.io.InputStream)">processText</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/InputStream.html?is-external=true" title="class or interface in java.io" class="externalLink">InputStream</a> stream)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Process a text-based Sitemap.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i13" class="rowColor">
|
|
<td class="colFirst"><code>protected <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#processXml(java.net.URL,byte%5B%5D)">processXml</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
byte[] xmlContent)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Parse the given XML content.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i14" class="altColor">
|
|
<td class="colFirst"><code>protected <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a></code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#processXml(java.net.URL,org.xml.sax.InputSource)">processXml</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/org/xml/sax/InputSource.html?is-external=true" title="class or interface in org.xml.sax" class="externalLink">InputSource</a> is)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Parse the given XML content.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i15" class="rowColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#setAllowDocTypeDefinitions(boolean)">setAllowDocTypeDefinitions</a></span>​(boolean allowDocTypeDefinitions)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Sets if the parser allows a DTD in sitemaps or feeds.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i16" class="altColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#setStrictNamespace(boolean)">setStrictNamespace</a></span>​(boolean s)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Sets the parser to allow any XML namespace or just the one from the
|
|
specification, or any accepted namespace (see
|
|
<a href="#addAcceptedNamespace(java.lang.String)"><code>addAcceptedNamespace(String)</code></a>).</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i17" class="rowColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#setURLFilter(crawlercommons.filters.URLFilter)">setURLFilter</a></span>​(<a href="../filters/URLFilter.html" title="class in crawlercommons.filters">URLFilter</a> filter)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Use <a href="../filters/URLFilter.html" title="class in crawlercommons.filters"><code>URLFilter</code></a> to filter URLs, eg.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i18" class="altColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#setURLFilter(java.util.function.Function)">setURLFilter</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/function/Function.html?is-external=true" title="class or interface in java.util.function" class="externalLink">Function</a><<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>,​<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>> filter)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Set URL filter function to normalize URLs found in sitemaps or filter
|
|
URLs away if the function returns null.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i19" class="rowColor">
|
|
<td class="colFirst"><code>static boolean</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#urlIsValid(java.lang.String,java.lang.String)">urlIsValid</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> sitemapBaseUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> testUrl)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">See if testUrl is under sitemapBaseUrl.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i20" class="altColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#walkSiteMap(crawlercommons.sitemaps.AbstractSiteMap,java.util.function.Consumer)">walkSiteMap</a></span>​(<a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> sitemap,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/function/Consumer.html?is-external=true" title="class or interface in java.util.function" class="externalLink">Consumer</a><<a href="SiteMapURL.html" title="class in crawlercommons.sitemaps">SiteMapURL</a>> action)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Traverse a sitemap, recursively fetching and traversing the content of
|
|
any enclosed sitemap index, and performing the specified action for each
|
|
sitemap URL until all URLs have been processed or the action throws an
|
|
exception.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i21" class="rowColor">
|
|
<td class="colFirst"><code>void</code></td>
|
|
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#walkSiteMap(java.net.URL,java.util.function.Consumer)">walkSiteMap</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> onlineSitemapUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/function/Consumer.html?is-external=true" title="class or interface in java.util.function" class="externalLink">Consumer</a><<a href="SiteMapURL.html" title="class in crawlercommons.sitemaps">SiteMapURL</a>> action)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Fetch a sitemap from the specified URL, recursively fetching and
|
|
traversing the content of any enclosed sitemap index, and performing the
|
|
specified action for each sitemap URL until all URLs have been processed
|
|
or the action throws an exception.</div>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a id="methods.inherited.from.class.java.lang.Object">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Methods inherited from class java.lang.<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang" class="externalLink">Object</a></h3>
|
|
<code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#clone()" title="class or interface in java.lang" class="externalLink">clone</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#equals(java.lang.Object)" title="class or interface in java.lang" class="externalLink">equals</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#finalize()" title="class or interface in java.lang" class="externalLink">finalize</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#getClass()" title="class or interface in java.lang" class="externalLink">getClass</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#hashCode()" title="class or interface in java.lang" class="externalLink">hashCode</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#notify()" title="class or interface in java.lang" class="externalLink">notify</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#notifyAll()" title="class or interface in java.lang" class="externalLink">notifyAll</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#toString()" title="class or interface in java.lang" class="externalLink">toString</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#wait()" title="class or interface in java.lang" class="externalLink">wait</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#wait(long)" title="class or interface in java.lang" class="externalLink">wait</a>, <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/Object.html?is-external=true#wait(long,int)" title="class or interface in java.lang" class="externalLink">wait</a></code></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</section>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="details">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<!-- ============ FIELD DETAIL =========== -->
|
|
<section>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a id="field.detail">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Field Detail</h3>
|
|
<a id="LOG">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>LOG</h4>
|
|
<pre>public static final org.slf4j.Logger LOG</pre>
|
|
</li>
|
|
</ul>
|
|
<a id="MAX_BYTES_ALLOWED">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>MAX_BYTES_ALLOWED</h4>
|
|
<pre>public static final int MAX_BYTES_ALLOWED</pre>
|
|
<div class="block">Sitemaps (including sitemap index files) "must be no larger than
|
|
50MB (52,428,800 bytes)" as specified in the
|
|
<a href="https://www.sitemaps.org/protocol.html#index">Sitemaps XML
|
|
format</a> (before Nov. 2016 the limit has been 10MB).</div>
|
|
<dl>
|
|
<dt><span class="seeLabel">See Also:</span></dt>
|
|
<dd><a href="../../constant-values.html#crawlercommons.sitemaps.SiteMapParser.MAX_BYTES_ALLOWED">Constant Field Values</a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="strict">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>strict</h4>
|
|
<pre>protected boolean strict</pre>
|
|
<div class="block">True (by default) meaning that invalid URLs should be rejected, as the
|
|
official docs allow the siteMapURLs to be only under the base url:
|
|
https://www.sitemaps.org/protocol.html#location</div>
|
|
</li>
|
|
</ul>
|
|
<a id="strictNamespace">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>strictNamespace</h4>
|
|
<pre>protected boolean strictNamespace</pre>
|
|
<div class="block">Indicates whether the parser should work with the namespace from the
|
|
specifications or any namespace. Defaults to false.</div>
|
|
</li>
|
|
</ul>
|
|
<a id="acceptedNamespaces">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>acceptedNamespaces</h4>
|
|
<pre>protected <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/Set.html?is-external=true" title="class or interface in java.util" class="externalLink">Set</a><<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>> acceptedNamespaces</pre>
|
|
<div class="block">Set of namespaces (if <a href="#strictNamespace"><code>strictNamespace</code></a>) accepted by the parser. URLs from other namespaces are ignored.</div>
|
|
</li>
|
|
</ul>
|
|
<a id="extensionNamespaces">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockListLast">
|
|
<li class="blockList">
|
|
<h4>extensionNamespaces</h4>
|
|
<pre>protected <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/Map.html?is-external=true" title="class or interface in java.util" class="externalLink">Map</a><<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>,​<a href="extension/Extension.html" title="enum in crawlercommons.sitemaps.extension">Extension</a>> extensionNamespaces</pre>
|
|
<div class="block">Map of sitemap extension namespaces required to find the right extension
|
|
handler.</div>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</section>
|
|
<!-- ========= CONSTRUCTOR DETAIL ======== -->
|
|
<section>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a id="constructor.detail">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Constructor Detail</h3>
|
|
<a id="<init>()">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>SiteMapParser</h4>
|
|
<pre>public SiteMapParser()</pre>
|
|
<div class="block">SiteMapParser with strict location validation (<a href="#isStrict()"><code>isStrict()</code></a>) and not
|
|
allowing partially parsed content.</div>
|
|
</li>
|
|
</ul>
|
|
<a id="<init>(boolean)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>SiteMapParser</h4>
|
|
<pre>public SiteMapParser​(boolean strict)</pre>
|
|
<div class="block">SiteMapParser with configurable location validation, not allowing
|
|
partially parsed content.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>strict</code> - see <a href="#isStrict()"><code>isStrict()</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="<init>(boolean,boolean)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockListLast">
|
|
<li class="blockList">
|
|
<h4>SiteMapParser</h4>
|
|
<pre>public SiteMapParser​(boolean strict,
|
|
boolean allowPartial)</pre>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>strict</code> - see <a href="#isStrict()"><code>isStrict()</code></a></dd>
|
|
<dd><code>allowPartial</code> - if true: allow URLs from sitemaps only partially parsed
|
|
because of format errors or truncated (incompletely fetched)
|
|
content. If false any parser error will cause an
|
|
<a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps"><code>UnknownFormatException</code></a>.</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</section>
|
|
<!-- ============ METHOD DETAIL ========== -->
|
|
<section>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a id="method.detail">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Method Detail</h3>
|
|
<a id="setAllowDocTypeDefinitions(boolean)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>setAllowDocTypeDefinitions</h4>
|
|
<pre class="methodSignature">public void setAllowDocTypeDefinitions​(boolean allowDocTypeDefinitions)</pre>
|
|
<div class="block">Sets if the parser allows a DTD in sitemaps or feeds.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>allowDocTypeDefinitions</code> - true if allowed. Default is false.</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="isStrict()">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>isStrict</h4>
|
|
<pre class="methodSignature">public boolean isStrict()</pre>
|
|
<dl>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>whether invalid URLs will be rejected (where invalid means that
|
|
the URL is not under the base URL, see <a href="https://www.sitemaps.org/protocol.html#location">sitemap file
|
|
location</a>)</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="isStrictNamespace()">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>isStrictNamespace</h4>
|
|
<pre class="methodSignature">public boolean isStrictNamespace()</pre>
|
|
<dl>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>whether the parser allows any namespace or just the one from the
|
|
specification (or any namespace accepted,
|
|
<a href="#addAcceptedNamespace(java.lang.String)"><code>addAcceptedNamespace(String)</code></a>)</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="setStrictNamespace(boolean)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>setStrictNamespace</h4>
|
|
<pre class="methodSignature">public void setStrictNamespace​(boolean s)</pre>
|
|
<div class="block">Sets the parser to allow any XML namespace or just the one from the
|
|
specification, or any accepted namespace (see
|
|
<a href="#addAcceptedNamespace(java.lang.String)"><code>addAcceptedNamespace(String)</code></a>). Note enabling strict namespace
|
|
checking always adds the namespace defined by the current sitemap
|
|
specification (<a href="Namespace.html#SITEMAP"><code>Namespace.SITEMAP</code></a>) to the list of accepted
|
|
namespaces.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>s</code> - if true enable strict namespace-checking, disable if false</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="addAcceptedNamespace(java.lang.String)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>addAcceptedNamespace</h4>
|
|
<pre class="methodSignature">public void addAcceptedNamespace​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> namespaceUri)</pre>
|
|
<div class="block">Add namespace URI to set of accepted namespaces.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>namespaceUri</code> - URI of the accepted XML namespace</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="addAcceptedNamespace(java.lang.String[])">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>addAcceptedNamespace</h4>
|
|
<pre class="methodSignature">public void addAcceptedNamespace​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>[] namespaceUris)</pre>
|
|
<div class="block">Add namespace URIs to set of accepted namespaces.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>namespaceUris</code> - array of accepted XML namespace URIs</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="enableExtension(crawlercommons.sitemaps.extension.Extension)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>enableExtension</h4>
|
|
<pre class="methodSignature">public void enableExtension​(<a href="extension/Extension.html" title="enum in crawlercommons.sitemaps.extension">Extension</a> extension)</pre>
|
|
<div class="block">Enable a support for a sitemap extension in the parser.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>extension</code> - sitemap extension (news, images, videos, etc.)</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="enableExtensions()">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>enableExtensions</h4>
|
|
<pre class="methodSignature">public void enableExtensions()</pre>
|
|
<div class="block">Enable all supported sitemap extensions in the parser.</div>
|
|
</li>
|
|
</ul>
|
|
<a id="setURLFilter(java.util.function.Function)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>setURLFilter</h4>
|
|
<pre class="methodSignature">public void setURLFilter​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/function/Function.html?is-external=true" title="class or interface in java.util.function" class="externalLink">Function</a><<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>,​<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>> filter)</pre>
|
|
<div class="block">Set URL filter function to normalize URLs found in sitemaps or filter
|
|
URLs away if the function returns null.</div>
|
|
</li>
|
|
</ul>
|
|
<a id="setURLFilter(crawlercommons.filters.URLFilter)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>setURLFilter</h4>
|
|
<pre class="methodSignature">public void setURLFilter​(<a href="../filters/URLFilter.html" title="class in crawlercommons.filters">URLFilter</a> filter)</pre>
|
|
<div class="block">Use <a href="../filters/URLFilter.html" title="class in crawlercommons.filters"><code>URLFilter</code></a> to filter URLs, eg. to configure that URLs found in
|
|
sitemaps are normalized by
|
|
<a href="../filters/basic/BasicURLNormalizer.html" title="class in crawlercommons.filters.basic"><code>BasicURLNormalizer</code></a>:
|
|
|
|
<pre>
|
|
sitemapParser.setURLFilter(new BasicURLNormalizer());
|
|
</pre></div>
|
|
</li>
|
|
</ul>
|
|
<a id="parseSiteMap(java.net.URL)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>parseSiteMap</h4>
|
|
<pre class="methodSignature">public <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> parseSiteMap​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> onlineSitemapUrl)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a>,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Returns a SiteMap or SiteMapIndex given an online sitemap URL
|
|
|
|
Please note that this method is a static method which goes online and
|
|
fetches the sitemap then parses it
|
|
|
|
This method is a convenience method for a user who has a sitemap URL and
|
|
wants a "Keep it simple" way to parse it.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>onlineSitemapUrl</code> - URL of the online sitemap</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>Extracted SiteMap/SiteMapIndex or null if the onlineSitemapUrl is
|
|
null</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the sitemap</dd>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error reading in the site map
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="parseSiteMap(java.lang.String,byte[],crawlercommons.sitemaps.AbstractSiteMap)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>parseSiteMap</h4>
|
|
<pre class="methodSignature">public <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> parseSiteMap​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> contentType,
|
|
byte[] content,
|
|
<a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> sitemap)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a>,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Returns a processed copy of an unprocessed sitemap object, i.e. transfer
|
|
the value of getLastModified(). Please note that the sitemap input stays
|
|
unchanged. Note that contentType is assumed to be correct; in general it
|
|
is more robust to use the method that doesn't take a contentType, but
|
|
instead detects this using Tika.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>contentType</code> - MIME type of content</dd>
|
|
<dd><code>content</code> - raw bytes of sitemap file</dd>
|
|
<dd><code>sitemap</code> - an <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps"><code>AbstractSiteMap</code></a>
|
|
implementation</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>Extracted SiteMap/SiteMapIndex</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the sitemap</dd>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error reading in the site map
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="parseSiteMap(byte[],java.net.URL)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>parseSiteMap</h4>
|
|
<pre class="methodSignature">public <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> parseSiteMap​(byte[] content,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> url)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a>,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Parse a sitemap, given the content bytes and the URL.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>content</code> - raw bytes of sitemap file</dd>
|
|
<dd><code>url</code> - URL to sitemap file</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>Extracted SiteMap/SiteMapIndex</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the sitemap</dd>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error reading in the site map
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="parseSiteMap(java.lang.String,byte[],java.net.URL)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>parseSiteMap</h4>
|
|
<pre class="methodSignature">public <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> parseSiteMap​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> contentType,
|
|
byte[] content,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> url)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a>,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Parse a sitemap, given the MIME type, the content bytes, and the URL.
|
|
Note that contentType is assumed to be correct; in general it is more
|
|
robust to use the method that doesn't take a contentType, but instead
|
|
detects this using Tika.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>contentType</code> - MIME type of content</dd>
|
|
<dd><code>content</code> - raw bytes of sitemap file</dd>
|
|
<dd><code>url</code> - URL to sitemap file</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>Extracted SiteMap/SiteMapIndex</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the sitemap</dd>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error reading in the site map
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="walkSiteMap(java.net.URL,java.util.function.Consumer)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>walkSiteMap</h4>
|
|
<pre class="methodSignature">public void walkSiteMap​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> onlineSitemapUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/function/Consumer.html?is-external=true" title="class or interface in java.util.function" class="externalLink">Consumer</a><<a href="SiteMapURL.html" title="class in crawlercommons.sitemaps">SiteMapURL</a>> action)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a>,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Fetch a sitemap from the specified URL, recursively fetching and
|
|
traversing the content of any enclosed sitemap index, and performing the
|
|
specified action for each sitemap URL until all URLs have been processed
|
|
or the action throws an exception.
|
|
<p>
|
|
This method is a convenience method for a user who has a sitemap URL and
|
|
wants a simple way to traverse it.
|
|
<p>
|
|
Exceptions thrown by the action are relayed to the caller.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>onlineSitemapUrl</code> - URL of the online sitemap</dd>
|
|
<dd><code>action</code> - The action to be performed for each element</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the sitemap</dd>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error fetching the content of any
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="walkSiteMap(crawlercommons.sitemaps.AbstractSiteMap,java.util.function.Consumer)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>walkSiteMap</h4>
|
|
<pre class="methodSignature">public void walkSiteMap​(<a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> sitemap,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/function/Consumer.html?is-external=true" title="class or interface in java.util.function" class="externalLink">Consumer</a><<a href="SiteMapURL.html" title="class in crawlercommons.sitemaps">SiteMapURL</a>> action)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a>,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Traverse a sitemap, recursively fetching and traversing the content of
|
|
any enclosed sitemap index, and performing the specified action for each
|
|
sitemap URL until all URLs have been processed or the action throws an
|
|
exception.
|
|
<p>
|
|
This method is a convenience method for a user who has a sitemap and
|
|
wants a simple way to traverse it.
|
|
<p>
|
|
Exceptions thrown by the action are relayed to the caller.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>sitemap</code> - The sitemap to traverse</dd>
|
|
<dd><code>action</code> - The action to be performed for each element</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the sitemap</dd>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error fetching the content of any
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="processXml(java.net.URL,byte[])">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>processXml</h4>
|
|
<pre class="methodSignature">protected <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> processXml​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
byte[] xmlContent)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></pre>
|
|
<div class="block">Parse the given XML content.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>sitemapUrl</code> - URL to sitemap file</dd>
|
|
<dd><code>xmlContent</code> - the byte[] backing the sitemapUrl</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>The site map</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the sitemap</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="processText(java.net.URL,byte[])">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>processText</h4>
|
|
<pre class="methodSignature">protected <a href="SiteMap.html" title="class in crawlercommons.sitemaps">SiteMap</a> processText​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
byte[] content)
|
|
throws <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Process a text-based Sitemap. Text sitemaps only list URLs but no
|
|
priorities, last mods, etc.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>sitemapUrl</code> - URL to sitemap file</dd>
|
|
<dd><code>content</code> - the byte[] backing the sitemapUrl</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>The site map</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error reading in the site map content</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="processText(java.net.URL,java.io.InputStream)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>processText</h4>
|
|
<pre class="methodSignature">protected <a href="SiteMap.html" title="class in crawlercommons.sitemaps">SiteMap</a> processText​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/InputStream.html?is-external=true" title="class or interface in java.io" class="externalLink">InputStream</a> stream)
|
|
throws <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></pre>
|
|
<div class="block">Process a text-based Sitemap. Text sitemaps only list URLs but no
|
|
priorities, last mods, etc.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>sitemapUrl</code> - URL to sitemap file</dd>
|
|
<dd><code>stream</code> - content stream</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>The site map</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error reading in the site map content</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="processGzippedXML(java.net.URL,byte[])">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>processGzippedXML</h4>
|
|
<pre class="methodSignature">protected <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> processGzippedXML​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> url,
|
|
byte[] response)
|
|
throws <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a>,
|
|
<a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></pre>
|
|
<div class="block">Decompress the gzipped content and process the resulting XML Sitemap.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>url</code> - - URL of the gzipped content</dd>
|
|
<dd><code>response</code> - - Gzipped content</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>the site map</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the gzip</dd>
|
|
<dd><code><a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io" class="externalLink">IOException</a></code> - if there is an error reading in the gzip <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="processXml(java.net.URL,org.xml.sax.InputSource)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>processXml</h4>
|
|
<pre class="methodSignature">protected <a href="AbstractSiteMap.html" title="class in crawlercommons.sitemaps">AbstractSiteMap</a> processXml​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink">URL</a> sitemapUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/org/xml/sax/InputSource.html?is-external=true" title="class or interface in org.xml.sax" class="externalLink">InputSource</a> is)
|
|
throws <a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></pre>
|
|
<div class="block">Parse the given XML content.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>sitemapUrl</code> - a sitemap <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/net/URL.html?is-external=true" title="class or interface in java.net" class="externalLink"><code>URL</code></a></dd>
|
|
<dd><code>is</code> - an <a href="https://docs.oracle.com/en/java/javase/11/docs/api/org/xml/sax/InputSource.html?is-external=true" title="class or interface in org.xml.sax" class="externalLink"><code>InputSource</code></a> backing the sitemap</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>the site map</dd>
|
|
<dt><span class="throwsLabel">Throws:</span></dt>
|
|
<dd><code><a href="UnknownFormatException.html" title="class in crawlercommons.sitemaps">UnknownFormatException</a></code> - if there is an error parsing the
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/org/xml/sax/InputSource.html?is-external=true" title="class or interface in org.xml.sax" class="externalLink"><code>InputSource</code></a></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a id="urlIsValid(java.lang.String,java.lang.String)">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockListLast">
|
|
<li class="blockList">
|
|
<h4>urlIsValid</h4>
|
|
<pre class="methodSignature">public static boolean urlIsValid​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> sitemapBaseUrl,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> testUrl)</pre>
|
|
<div class="block">See if testUrl is under sitemapBaseUrl. Only URLs under sitemapBaseUrl
|
|
are valid.</div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>sitemapBaseUrl</code> - the base URL of the sitemap</dd>
|
|
<dd><code>testUrl</code> - the URL to be tested</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>true if testUrl is under sitemapBaseUrl, false otherwise</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</section>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</main>
|
|
<!-- ========= END OF CLASS DATA ========= -->
|
|
<footer role="contentinfo">
|
|
<nav role="navigation">
|
|
<!-- ======= START OF BOTTOM NAVBAR ====== -->
|
|
<div class="bottomNav"><a id="navbar.bottom">
|
|
<!-- -->
|
|
</a>
|
|
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
|
|
<a id="navbar.bottom.firstrow">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="navList" title="Navigation">
|
|
<li><a href="../../index.html">Overview</a></li>
|
|
<li><a href="package-summary.html">Package</a></li>
|
|
<li class="navBarCell1Rev">Class</li>
|
|
<li><a href="class-use/SiteMapParser.html">Use</a></li>
|
|
<li><a href="package-tree.html">Tree</a></li>
|
|
<li><a href="../../deprecated-list.html">Deprecated</a></li>
|
|
<li><a href="../../index-all.html">Index</a></li>
|
|
<li><a href="../../help-doc.html">Help</a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="subNav">
|
|
<ul class="navList" id="allclasses_navbar_bottom">
|
|
<li><a href="../../allclasses.html">All Classes</a></li>
|
|
</ul>
|
|
<div>
|
|
<script type="text/javascript"><!--
|
|
allClassesLink = document.getElementById("allclasses_navbar_bottom");
|
|
if(window==top) {
|
|
allClassesLink.style.display = "block";
|
|
}
|
|
else {
|
|
allClassesLink.style.display = "none";
|
|
}
|
|
//-->
|
|
</script>
|
|
<noscript>
|
|
<div>JavaScript is disabled on your browser.</div>
|
|
</noscript>
|
|
</div>
|
|
<div>
|
|
<ul class="subNavList">
|
|
<li>Summary: </li>
|
|
<li>Nested | </li>
|
|
<li><a href="#field.summary">Field</a> | </li>
|
|
<li><a href="#constructor.summary">Constr</a> | </li>
|
|
<li><a href="#method.summary">Method</a></li>
|
|
</ul>
|
|
<ul class="subNavList">
|
|
<li>Detail: </li>
|
|
<li><a href="#field.detail">Field</a> | </li>
|
|
<li><a href="#constructor.detail">Constr</a> | </li>
|
|
<li><a href="#method.detail">Method</a></li>
|
|
</ul>
|
|
</div>
|
|
<a id="skip.navbar.bottom">
|
|
<!-- -->
|
|
</a></div>
|
|
<!-- ======== END OF BOTTOM NAVBAR ======= -->
|
|
</nav>
|
|
<p class="legalCopy"><small>Copyright © 2009–2023 <a href="https://github.com/crawler-commons">Crawler-Commons</a>. All rights reserved.</small></p>
|
|
</footer>
|
|
</body>
|
|
</html>
|