mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-18 18:06:05 +02:00
221 lines
9.7 KiB
HTML
221 lines
9.7 KiB
HTML
<!DOCTYPE HTML>
|
|
<!-- NewPage -->
|
|
<html lang="en">
|
|
<head>
|
|
<!-- Generated by javadoc (11.0.19) on Thu Jul 13 10:31:24 CEST 2023 -->
|
|
<title>Uses of Class crawlercommons.robots.SimpleRobotRules (Crawler-commons 1.4 API)</title>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
<meta name="dc.created" content="2023-07-13">
|
|
<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
|
|
<link rel="stylesheet" type="text/css" href="../../../jquery/jquery-ui.min.css" title="Style">
|
|
<link rel="stylesheet" type="text/css" href="../../../jquery-ui.overrides.css" title="Style">
|
|
<script type="text/javascript" src="../../../script.js"></script>
|
|
<script type="text/javascript" src="../../../jquery/jszip/dist/jszip.min.js"></script>
|
|
<script type="text/javascript" src="../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
|
|
<!--[if IE]>
|
|
<script type="text/javascript" src="../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
|
|
<![endif]-->
|
|
<script type="text/javascript" src="../../../jquery/jquery-3.6.1.min.js"></script>
|
|
<script type="text/javascript" src="../../../jquery/jquery-ui.min.js"></script>
|
|
</head>
|
|
<body>
|
|
<script type="text/javascript"><!--
|
|
try {
|
|
if (location.href.indexOf('is-external=true') == -1) {
|
|
parent.document.title="Uses of Class crawlercommons.robots.SimpleRobotRules (Crawler-commons 1.4 API)";
|
|
}
|
|
}
|
|
catch(err) {
|
|
}
|
|
//-->
|
|
var pathtoroot = "../../../";
|
|
var useModuleDirectories = false;
|
|
loadScripts(document, 'script');</script>
|
|
<noscript>
|
|
<div>JavaScript is disabled on your browser.</div>
|
|
</noscript>
|
|
<header role="banner">
|
|
<nav role="navigation">
|
|
<div class="fixedNav">
|
|
<!-- ========= START OF TOP NAVBAR ======= -->
|
|
<div class="topNav"><a id="navbar.top">
|
|
<!-- -->
|
|
</a>
|
|
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
|
|
<a id="navbar.top.firstrow">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="navList" title="Navigation">
|
|
<li><a href="../../../index.html">Overview</a></li>
|
|
<li><a href="../package-summary.html">Package</a></li>
|
|
<li><a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">Class</a></li>
|
|
<li class="navBarCell1Rev">Use</li>
|
|
<li><a href="../package-tree.html">Tree</a></li>
|
|
<li><a href="../../../deprecated-list.html">Deprecated</a></li>
|
|
<li><a href="../../../index-all.html">Index</a></li>
|
|
<li><a href="../../../help-doc.html">Help</a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="subNav">
|
|
<ul class="navList" id="allclasses_navbar_top">
|
|
<li><a href="../../../allclasses.html">All Classes</a></li>
|
|
</ul>
|
|
<ul class="navListSearch">
|
|
<li><label for="search">SEARCH:</label>
|
|
<input type="text" id="search" value="search" disabled="disabled">
|
|
<input type="reset" id="reset" value="reset" disabled="disabled">
|
|
</li>
|
|
</ul>
|
|
<div>
|
|
<script type="text/javascript"><!--
|
|
allClassesLink = document.getElementById("allclasses_navbar_top");
|
|
if(window==top) {
|
|
allClassesLink.style.display = "block";
|
|
}
|
|
else {
|
|
allClassesLink.style.display = "none";
|
|
}
|
|
//-->
|
|
</script>
|
|
<noscript>
|
|
<div>JavaScript is disabled on your browser.</div>
|
|
</noscript>
|
|
</div>
|
|
<a id="skip.navbar.top">
|
|
<!-- -->
|
|
</a></div>
|
|
<!-- ========= END OF TOP NAVBAR ========= -->
|
|
</div>
|
|
<div class="navPadding"> </div>
|
|
<script type="text/javascript"><!--
|
|
$('.navPadding').css('padding-top', $('.fixedNav').css("height"));
|
|
//-->
|
|
</script>
|
|
</nav>
|
|
</header>
|
|
<main role="main">
|
|
<div class="header">
|
|
<h2 title="Uses of Class crawlercommons.robots.SimpleRobotRules" class="title">Uses of Class<br>crawlercommons.robots.SimpleRobotRules</h2>
|
|
</div>
|
|
<div class="classUseContainer">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<table class="useSummary">
|
|
<caption><span>Packages that use <a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">SimpleRobotRules</a></span><span class="tabEnd"> </span></caption>
|
|
<tr>
|
|
<th class="colFirst" scope="col">Package</th>
|
|
<th class="colLast" scope="col">Description</th>
|
|
</tr>
|
|
<tbody>
|
|
<tr class="altColor">
|
|
<th class="colFirst" scope="row"><a href="#crawlercommons.robots">crawlercommons.robots</a></th>
|
|
<td class="colLast">
|
|
<div class="block">The robots package contains all of the robots.txt rule inference, parsing and utilities contained within Crawler Commons.</div>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</li>
|
|
<li class="blockList">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<section><a id="crawlercommons.robots">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Uses of <a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">SimpleRobotRules</a> in <a href="../package-summary.html">crawlercommons.robots</a></h3>
|
|
<table class="useSummary">
|
|
<caption><span>Methods in <a href="../package-summary.html">crawlercommons.robots</a> that return <a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">SimpleRobotRules</a></span><span class="tabEnd"> </span></caption>
|
|
<tr>
|
|
<th class="colFirst" scope="col">Modifier and Type</th>
|
|
<th class="colSecond" scope="col">Method</th>
|
|
<th class="colLast" scope="col">Description</th>
|
|
</tr>
|
|
<tbody>
|
|
<tr class="altColor">
|
|
<td class="colFirst"><code><a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">SimpleRobotRules</a></code></td>
|
|
<th class="colSecond" scope="row"><span class="typeNameLabel">SimpleRobotRulesParser.</span><code><span class="memberNameLink"><a href="../SimpleRobotRulesParser.html#failedFetch(int)">failedFetch</a></span>​(int httpStatusCode)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">The fetch of robots.txt failed, so return rules appropriate for the given
|
|
HTTP status code.</div>
|
|
</td>
|
|
</tr>
|
|
<tr class="rowColor">
|
|
<td class="colFirst"><code><a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">SimpleRobotRules</a></code></td>
|
|
<th class="colSecond" scope="row"><span class="typeNameLabel">SimpleRobotRulesParser.</span><code><span class="memberNameLink"><a href="../SimpleRobotRulesParser.html#parseContent(java.lang.String,byte%5B%5D,java.lang.String,java.lang.String)">parseContent</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> url,
|
|
byte[] content,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> contentType,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> robotNames)</code></th>
|
|
<td class="colLast">
|
|
<div class="block"><span class="deprecatedLabel">Deprecated.</span></div>
|
|
</td>
|
|
</tr>
|
|
<tr class="altColor">
|
|
<td class="colFirst"><code><a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">SimpleRobotRules</a></code></td>
|
|
<th class="colSecond" scope="row"><span class="typeNameLabel">SimpleRobotRulesParser.</span><code><span class="memberNameLink"><a href="../SimpleRobotRulesParser.html#parseContent(java.lang.String,byte%5B%5D,java.lang.String,java.util.Collection)">parseContent</a></span>​(<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> url,
|
|
byte[] content,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a> contentType,
|
|
<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/util/Collection.html?is-external=true" title="class or interface in java.util" class="externalLink">Collection</a><<a href="https://docs.oracle.com/en/java/javase/11/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang" class="externalLink">String</a>> robotNames)</code></th>
|
|
<td class="colLast">
|
|
<div class="block">Parse the robots.txt file in <i>content</i>, and return rules appropriate
|
|
for processing paths by <i>userAgent</i>.</div>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</section>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</main>
|
|
<footer role="contentinfo">
|
|
<nav role="navigation">
|
|
<!-- ======= START OF BOTTOM NAVBAR ====== -->
|
|
<div class="bottomNav"><a id="navbar.bottom">
|
|
<!-- -->
|
|
</a>
|
|
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
|
|
<a id="navbar.bottom.firstrow">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="navList" title="Navigation">
|
|
<li><a href="../../../index.html">Overview</a></li>
|
|
<li><a href="../package-summary.html">Package</a></li>
|
|
<li><a href="../SimpleRobotRules.html" title="class in crawlercommons.robots">Class</a></li>
|
|
<li class="navBarCell1Rev">Use</li>
|
|
<li><a href="../package-tree.html">Tree</a></li>
|
|
<li><a href="../../../deprecated-list.html">Deprecated</a></li>
|
|
<li><a href="../../../index-all.html">Index</a></li>
|
|
<li><a href="../../../help-doc.html">Help</a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="subNav">
|
|
<ul class="navList" id="allclasses_navbar_bottom">
|
|
<li><a href="../../../allclasses.html">All Classes</a></li>
|
|
</ul>
|
|
<div>
|
|
<script type="text/javascript"><!--
|
|
allClassesLink = document.getElementById("allclasses_navbar_bottom");
|
|
if(window==top) {
|
|
allClassesLink.style.display = "block";
|
|
}
|
|
else {
|
|
allClassesLink.style.display = "none";
|
|
}
|
|
//-->
|
|
</script>
|
|
<noscript>
|
|
<div>JavaScript is disabled on your browser.</div>
|
|
</noscript>
|
|
</div>
|
|
<a id="skip.navbar.bottom">
|
|
<!-- -->
|
|
</a></div>
|
|
<!-- ======== END OF BOTTOM NAVBAR ======= -->
|
|
</nav>
|
|
<p class="legalCopy"><small>Copyright © 2009–2023 <a href="https://github.com/crawler-commons">Crawler-Commons</a>. All rights reserved.</small></p>
|
|
</footer>
|
|
</body>
|
|
</html>
|