mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-18 18:06:05 +02:00
218 lines
7.6 KiB
HTML
218 lines
7.6 KiB
HTML
<!DOCTYPE HTML>
|
||
<!-- NewPage -->
|
||
<html lang="en">
|
||
<head>
|
||
<!-- Generated by javadoc (11.0.19) on Thu Jul 13 10:31:24 CEST 2023 -->
|
||
<title>crawlercommons.robots (Crawler-commons 1.4 API)</title>
|
||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||
<meta name="dc.created" content="2023-07-13">
|
||
<link rel="stylesheet" type="text/css" href="../../stylesheet.css" title="Style">
|
||
<link rel="stylesheet" type="text/css" href="../../jquery/jquery-ui.min.css" title="Style">
|
||
<link rel="stylesheet" type="text/css" href="../../jquery-ui.overrides.css" title="Style">
|
||
<script type="text/javascript" src="../../script.js"></script>
|
||
<script type="text/javascript" src="../../jquery/jszip/dist/jszip.min.js"></script>
|
||
<script type="text/javascript" src="../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
|
||
<!--[if IE]>
|
||
<script type="text/javascript" src="../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
|
||
<![endif]-->
|
||
<script type="text/javascript" src="../../jquery/jquery-3.6.1.min.js"></script>
|
||
<script type="text/javascript" src="../../jquery/jquery-ui.min.js"></script>
|
||
</head>
|
||
<body>
|
||
<script type="text/javascript"><!--
|
||
try {
|
||
if (location.href.indexOf('is-external=true') == -1) {
|
||
parent.document.title="crawlercommons.robots (Crawler-commons 1.4 API)";
|
||
}
|
||
}
|
||
catch(err) {
|
||
}
|
||
//-->
|
||
var pathtoroot = "../../";
|
||
var useModuleDirectories = false;
|
||
loadScripts(document, 'script');</script>
|
||
<noscript>
|
||
<div>JavaScript is disabled on your browser.</div>
|
||
</noscript>
|
||
<header role="banner">
|
||
<nav role="navigation">
|
||
<div class="fixedNav">
|
||
<!-- ========= START OF TOP NAVBAR ======= -->
|
||
<div class="topNav"><a id="navbar.top">
|
||
<!-- -->
|
||
</a>
|
||
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
|
||
<a id="navbar.top.firstrow">
|
||
<!-- -->
|
||
</a>
|
||
<ul class="navList" title="Navigation">
|
||
<li><a href="../../index.html">Overview</a></li>
|
||
<li class="navBarCell1Rev">Package</li>
|
||
<li>Class</li>
|
||
<li><a href="package-use.html">Use</a></li>
|
||
<li><a href="package-tree.html">Tree</a></li>
|
||
<li><a href="../../deprecated-list.html">Deprecated</a></li>
|
||
<li><a href="../../index-all.html">Index</a></li>
|
||
<li><a href="../../help-doc.html">Help</a></li>
|
||
</ul>
|
||
</div>
|
||
<div class="subNav">
|
||
<ul class="navList" id="allclasses_navbar_top">
|
||
<li><a href="../../allclasses.html">All Classes</a></li>
|
||
</ul>
|
||
<ul class="navListSearch">
|
||
<li><label for="search">SEARCH:</label>
|
||
<input type="text" id="search" value="search" disabled="disabled">
|
||
<input type="reset" id="reset" value="reset" disabled="disabled">
|
||
</li>
|
||
</ul>
|
||
<div>
|
||
<script type="text/javascript"><!--
|
||
allClassesLink = document.getElementById("allclasses_navbar_top");
|
||
if(window==top) {
|
||
allClassesLink.style.display = "block";
|
||
}
|
||
else {
|
||
allClassesLink.style.display = "none";
|
||
}
|
||
//-->
|
||
</script>
|
||
<noscript>
|
||
<div>JavaScript is disabled on your browser.</div>
|
||
</noscript>
|
||
</div>
|
||
<a id="skip.navbar.top">
|
||
<!-- -->
|
||
</a></div>
|
||
<!-- ========= END OF TOP NAVBAR ========= -->
|
||
</div>
|
||
<div class="navPadding"> </div>
|
||
<script type="text/javascript"><!--
|
||
$('.navPadding').css('padding-top', $('.fixedNav').css("height"));
|
||
//-->
|
||
</script>
|
||
</nav>
|
||
</header>
|
||
<main role="main">
|
||
<div class="header">
|
||
<h1 title="Package" class="title">Package crawlercommons.robots</h1>
|
||
</div>
|
||
<div class="contentContainer">
|
||
<section><a id="package.description">
|
||
<!-- -->
|
||
</a>
|
||
<div class="block">The robots package contains all of the robots.txt rule inference, parsing and utilities contained within Crawler Commons.</div>
|
||
</section>
|
||
<ul class="blockList">
|
||
<li class="blockList">
|
||
<table class="typeSummary">
|
||
<caption><span>Class Summary</span><span class="tabEnd"> </span></caption>
|
||
<tr>
|
||
<th class="colFirst" scope="col">Class</th>
|
||
<th class="colLast" scope="col">Description</th>
|
||
</tr>
|
||
<tbody>
|
||
<tr class="altColor">
|
||
<th class="colFirst" scope="row"><a href="BaseRobotRules.html" title="class in crawlercommons.robots">BaseRobotRules</a></th>
|
||
<td class="colLast">
|
||
<div class="block">Result from parsing a single robots.txt file – a set of allow/disallow rules
|
||
to check whether a given URL is allowed, and optionally a <a href="https://en.wikipedia.org/wiki/Robots.txt#Crawl-delay_directive">Crawl-delay</a> and <a href="https://www.sitemaps.org/protocol.html#submit_robots">Sitemap</a> URLs.</div>
|
||
</td>
|
||
</tr>
|
||
<tr class="rowColor">
|
||
<th class="colFirst" scope="row"><a href="BaseRobotsParser.html" title="class in crawlercommons.robots">BaseRobotsParser</a></th>
|
||
<td class="colLast">
|
||
<div class="block">Robots.txt parser definition.</div>
|
||
</td>
|
||
</tr>
|
||
<tr class="altColor">
|
||
<th class="colFirst" scope="row"><a href="SimpleRobotRules.html" title="class in crawlercommons.robots">SimpleRobotRules</a></th>
|
||
<td class="colLast">
|
||
<div class="block">Result from parsing a single robots.txt file – a set of allow/disallow rules
|
||
to check whether a given URL is allowed, and optionally a <a href="https://en.wikipedia.org/wiki/Robots.txt#Crawl-delay_directive">Crawl-delay</a> and <a href="https://www.sitemaps.org/protocol.html#submit_robots">Sitemap</a> URLs.</div>
|
||
</td>
|
||
</tr>
|
||
<tr class="rowColor">
|
||
<th class="colFirst" scope="row"><a href="SimpleRobotRules.RobotRule.html" title="class in crawlercommons.robots">SimpleRobotRules.RobotRule</a></th>
|
||
<td class="colLast">
|
||
<div class="block">Single rule that maps from a path prefix to an allow flag.</div>
|
||
</td>
|
||
</tr>
|
||
<tr class="altColor">
|
||
<th class="colFirst" scope="row"><a href="SimpleRobotRulesParser.html" title="class in crawlercommons.robots">SimpleRobotRulesParser</a></th>
|
||
<td class="colLast">
|
||
<div class="block">Robots.txt parser following RFC 9309, supporting the Sitemap and Crawl-delay
|
||
extensions.</div>
|
||
</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</li>
|
||
<li class="blockList">
|
||
<table class="typeSummary">
|
||
<caption><span>Enum Summary</span><span class="tabEnd"> </span></caption>
|
||
<tr>
|
||
<th class="colFirst" scope="col">Enum</th>
|
||
<th class="colLast" scope="col">Description</th>
|
||
</tr>
|
||
<tbody>
|
||
<tr class="altColor">
|
||
<th class="colFirst" scope="row"><a href="SimpleRobotRules.RobotRulesMode.html" title="enum in crawlercommons.robots">SimpleRobotRules.RobotRulesMode</a></th>
|
||
<td class="colLast"> </td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</main>
|
||
<footer role="contentinfo">
|
||
<nav role="navigation">
|
||
<!-- ======= START OF BOTTOM NAVBAR ====== -->
|
||
<div class="bottomNav"><a id="navbar.bottom">
|
||
<!-- -->
|
||
</a>
|
||
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
|
||
<a id="navbar.bottom.firstrow">
|
||
<!-- -->
|
||
</a>
|
||
<ul class="navList" title="Navigation">
|
||
<li><a href="../../index.html">Overview</a></li>
|
||
<li class="navBarCell1Rev">Package</li>
|
||
<li>Class</li>
|
||
<li><a href="package-use.html">Use</a></li>
|
||
<li><a href="package-tree.html">Tree</a></li>
|
||
<li><a href="../../deprecated-list.html">Deprecated</a></li>
|
||
<li><a href="../../index-all.html">Index</a></li>
|
||
<li><a href="../../help-doc.html">Help</a></li>
|
||
</ul>
|
||
</div>
|
||
<div class="subNav">
|
||
<ul class="navList" id="allclasses_navbar_bottom">
|
||
<li><a href="../../allclasses.html">All Classes</a></li>
|
||
</ul>
|
||
<div>
|
||
<script type="text/javascript"><!--
|
||
allClassesLink = document.getElementById("allclasses_navbar_bottom");
|
||
if(window==top) {
|
||
allClassesLink.style.display = "block";
|
||
}
|
||
else {
|
||
allClassesLink.style.display = "none";
|
||
}
|
||
//-->
|
||
</script>
|
||
<noscript>
|
||
<div>JavaScript is disabled on your browser.</div>
|
||
</noscript>
|
||
</div>
|
||
<a id="skip.navbar.bottom">
|
||
<!-- -->
|
||
</a></div>
|
||
<!-- ======== END OF BOTTOM NAVBAR ======= -->
|
||
</nav>
|
||
<p class="legalCopy"><small>Copyright © 2009–2023 <a href="https://github.com/crawler-commons">Crawler-Commons</a>. All rights reserved.</small></p>
|
||
</footer>
|
||
</body>
|
||
</html>
|