mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-09-21 17:01:35 +02:00
Add HTTP status code and reason to FetchedResult.
https://code.google.com/p/crawler-commons/issues/detail?id=6
This commit is contained in:
parent
4653ced854
commit
7300003e13
@ -33,7 +33,8 @@ public class FetchedResult {
|
||||
private final String _newBaseUrl;
|
||||
private final int _numRedirects;
|
||||
private final String _hostAddress;
|
||||
|
||||
private final int _statusCode; // HTTP status code
|
||||
private final String _reasonPhrase; // HTTP reason phrase, or null
|
||||
private Payload _payload;
|
||||
|
||||
public FetchedResult( String baseUrl,
|
||||
@ -46,7 +47,9 @@ public class FetchedResult {
|
||||
Payload payload,
|
||||
String newBaseUrl,
|
||||
int numRedirects,
|
||||
String hostAddress){
|
||||
String hostAddress,
|
||||
int statusCode,
|
||||
String reasonPhrase) {
|
||||
_payload = payload;
|
||||
|
||||
if (baseUrl == null) {
|
||||
@ -83,6 +86,8 @@ public class FetchedResult {
|
||||
_newBaseUrl = newBaseUrl;
|
||||
_numRedirects = numRedirects;
|
||||
_hostAddress = hostAddress;
|
||||
_statusCode = statusCode;
|
||||
_reasonPhrase = reasonPhrase;
|
||||
}
|
||||
|
||||
public Payload getPayload() {
|
||||
@ -136,4 +141,12 @@ public class FetchedResult {
|
||||
public String getHostAddress() {
|
||||
return _hostAddress;
|
||||
}
|
||||
|
||||
public int getStatusCode() {
|
||||
return _statusCode;
|
||||
}
|
||||
|
||||
public String getReasonPhrase() {
|
||||
return _reasonPhrase;
|
||||
}
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ public class SimpleFileFetcher extends BaseFetcher {
|
||||
long responseRate = (content.length * 1000L) / totalReadTime;
|
||||
String contentType = "application/octet-stream";
|
||||
return new FetchedResult(url, url, System.currentTimeMillis(), new Metadata(), content, contentType,
|
||||
(int)responseRate, payload, url, 0, "localhost");
|
||||
(int)responseRate, payload, url, 0, "localhost", HttpStatus.SC_OK, null);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new HttpFetchException(url, "Error fetching " + url, HttpStatus.SC_NOT_FOUND, new Metadata());
|
||||
} catch (IOException e) {
|
||||
|
@ -490,6 +490,8 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
|
||||
String contentType = "";
|
||||
String mimeType = "";
|
||||
String hostAddress = null;
|
||||
int statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR;
|
||||
String reasonPhrase = null;
|
||||
|
||||
// Create a local instance of cookie store, and bind to local context
|
||||
// Without this we get killed w/lots of threads, due to sync() on single cookie store.
|
||||
@ -513,9 +515,11 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
|
||||
headerMap.add(header.getName(), header.getValue());
|
||||
}
|
||||
|
||||
int httpStatus = response.getStatusLine().getStatusCode();
|
||||
statusCode = response.getStatusLine().getStatusCode();
|
||||
reasonPhrase = response.getStatusLine().getReasonPhrase();
|
||||
|
||||
if (LOGGER.isTraceEnabled()) {
|
||||
fetchTrace.append("; status code: " + httpStatus);
|
||||
fetchTrace.append("; status code: " + statusCode);
|
||||
if (headerMap.get(HttpHeaders.CONTENT_LENGTH) != null) {
|
||||
fetchTrace.append("; Content-Length: " + headerMap.get(HttpHeaders.CONTENT_LENGTH));
|
||||
}
|
||||
@ -525,9 +529,9 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
|
||||
}
|
||||
}
|
||||
|
||||
if ((httpStatus < 200) || (httpStatus >= 300)) {
|
||||
if ((statusCode < 200) || (statusCode >= 300)) {
|
||||
// We can't just check against SC_OK, as some wackos return 201, 202, etc
|
||||
throw new HttpFetchException(url, "Error fetching " + url, httpStatus, headerMap);
|
||||
throw new HttpFetchException(url, "Error fetching " + url + " due to \"" + reasonPhrase + "\"", statusCode, headerMap);
|
||||
}
|
||||
|
||||
redirectedUrl = extractRedirectedUrl(url, localContext);
|
||||
@ -754,7 +758,9 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
|
||||
payload,
|
||||
newBaseUrl,
|
||||
numRedirects,
|
||||
hostAddress);
|
||||
hostAddress,
|
||||
statusCode,
|
||||
reasonPhrase);
|
||||
}
|
||||
|
||||
private boolean isTextMimeType(String mimeType) {
|
||||
|
@ -40,6 +40,7 @@ public class SimpleFileFetcherTest {
|
||||
SimpleFileFetcher fetcher = new SimpleFileFetcher();
|
||||
FetchedResult result = fetcher.get(url);
|
||||
Assert.assertEquals(0, result.getNumRedirects());
|
||||
Assert.assertEquals(HttpStatus.SC_OK, result.getStatusCode());
|
||||
|
||||
String fetchedContent = new String(result.getContent(), "us-ascii");
|
||||
Assert.assertEquals("Now is the time for all good men to come to the aid of their country.", fetchedContent);
|
||||
@ -66,6 +67,7 @@ public class SimpleFileFetcherTest {
|
||||
|
||||
SimpleFileFetcher fetcher = new SimpleFileFetcher();
|
||||
FetchedResult result = fetcher.get(url);
|
||||
Assert.assertEquals(HttpStatus.SC_OK, result.getStatusCode());
|
||||
Assert.assertEquals(0, result.getContentLength());
|
||||
Assert.assertEquals(0, result.getResponseRate());
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ import crawlercommons.fetcher.AbortedFetchException;
|
||||
import crawlercommons.fetcher.AbortedFetchReason;
|
||||
import crawlercommons.fetcher.BaseFetcher;
|
||||
import crawlercommons.fetcher.FetchedResult;
|
||||
import crawlercommons.fetcher.HttpFetchException;
|
||||
import crawlercommons.fetcher.IOFetchException;
|
||||
import crawlercommons.fetcher.Payload;
|
||||
import crawlercommons.fetcher.RedirectFetchException;
|
||||
@ -84,6 +85,7 @@ public class SimpleHttpFetcherTest {
|
||||
return _webServer.getServer();
|
||||
}
|
||||
|
||||
// TODO - merge this code with RedirectResponseHandler class in crawlercommons.test package.
|
||||
@SuppressWarnings("serial")
|
||||
private class RedirectResponseHandler extends AbstractHttpHandler {
|
||||
|
||||
@ -245,7 +247,7 @@ public class SimpleHttpFetcherTest {
|
||||
|
||||
String url = "http://localhost:8089/test.html";
|
||||
FetchedResult result = fetcher.get(url);
|
||||
|
||||
assertEquals(HttpStatus.SC_OK, result.getStatusCode());
|
||||
assertTrue("Content size should be truncated", result.getContent().length <= fetcher.getDefaultMaxContentSize());
|
||||
}
|
||||
|
||||
@ -259,7 +261,9 @@ public class SimpleHttpFetcherTest {
|
||||
String urlToFetch = "http://localhost:8089/karlie.html";
|
||||
|
||||
FetchedResult result1 = fetcher.get(urlToFetch);
|
||||
assertEquals(HttpStatus.SC_OK, result1.getStatusCode());
|
||||
FetchedResult result2 = fetcher.get(urlToFetch);
|
||||
assertEquals(HttpStatus.SC_OK, result2.getStatusCode());
|
||||
|
||||
// Verify that we got the same data from each fetch request.
|
||||
assertEquals(1000, result1.getContent().length);
|
||||
@ -445,4 +449,21 @@ public class SimpleHttpFetcherTest {
|
||||
assertEquals("127.0.0.1", hostAddress);
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testMissingPage() throws Exception {
|
||||
startServer(new ResourcesResponseHandler(), 8089);
|
||||
BaseFetcher fetcher = new SimpleHttpFetcher(1, TestUtils.CC_TEST_AGENT);
|
||||
String url = "http://localhost:8089/this-page-will-not-exist.html";
|
||||
|
||||
try {
|
||||
fetcher.get(url);
|
||||
fail("Should have thrown exception");
|
||||
} catch (HttpFetchException e) {
|
||||
assertEquals(HttpStatus.SC_NOT_FOUND, e.getHttpStatus());
|
||||
|
||||
// Make sure the reason gets into the exception message.
|
||||
assertTrue(e.getMessage().contains("Not Found"));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user