1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-05-12 00:26:03 +02:00

Minor changes + applied formatting pre 0.10 release

This commit is contained in:
Julien Nioche 2018-06-05 11:33:27 +01:00
parent 8195140e21
commit 0da1b8b8b5
2 changed files with 11 additions and 34 deletions

View File

@ -1,9 +1,11 @@
Crawler-Commons Change Log
Current Development 0.10-SNAPSHOT (yyyy-mm-dd)
Release 0.10 (2018-06-05)
- Add JAX-B dependencies to POM (jnioche) #207
- Add method to parse and iterate sitemap SiteMapParser#walkSiteMap(URL,Consumer) (Luc Boruta) #190
- Sitemap file location to ignore query part of URL (sebastian-nagel) #202
- [Sitemaps] Add method to parse and iterate sitemap SiteMapParser#walkSiteMap(URL,Consumer) (Luc Boruta) #190
- [Sitemaps] Sitemap file location to ignore query part of URL (sebastian-nagel) #202
- [RSS sitemaps] Link extraction from RSS feeds fails on XML entities (sebastian-nagel) #204
- [RSS sitemaps] Resolve relative links in RSS feeds (sebastian-nagel) #203
- [RSS sitemaps] Extract links from <guid> elements (sebastian-nagel) #201

View File

@ -9,41 +9,16 @@ import java.util.Locale;
public class MimeTypeDetector {
private static String[] XML_MIMETYPES = new String[] {
"application/xml",
"application/x-xml",
"text/xml",
"application/atom+xml",
"application/rss+xml",
"text/rss",
"application/rdf+xml"
};
private static String[] XML_MIMETYPES = new String[] { "application/xml", "application/x-xml", "text/xml", "application/atom+xml", "application/rss+xml", "text/rss", "application/rdf+xml" };
private static String[] TEXT_MIMETYPES = new String[] {
"text/plain"
};
private static String[] TEXT_MIMETYPES = new String[] { "text/plain" };
private static String[] GZIP_MIMETYPES = new String[] {
"application/gzip",
"application/gzip-compressed",
"application/gzipped",
"application/x-gzip",
"application/x-gzip-compressed",
"application/x-gunzip",
"gzip/document"
};
private static String[] GZIP_MIMETYPES = new String[] { "application/gzip", "application/gzip-compressed", "application/gzipped", "application/x-gzip", "application/x-gzip-compressed",
"application/x-gunzip", "gzip/document" };
private static String[][] MIMETYPES = {
XML_MIMETYPES,
TEXT_MIMETYPES,
GZIP_MIMETYPES
};
private static String[][] MIMETYPES = { XML_MIMETYPES, TEXT_MIMETYPES, GZIP_MIMETYPES };
private static byte[] UTF8_BOM = {
(byte) 0xEF,
(byte) 0xBB,
(byte) 0xBF
};
private static byte[] UTF8_BOM = { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
private static final int LEADING_WHITESPACE_MAX_SKIP = 32;
@ -144,7 +119,7 @@ public class MimeTypeDetector {
offsetText++;
}
}
if (patternMatches(entry.getPattern(), content, offsetText, (length-offsetText))) {
if (patternMatches(entry.getPattern(), content, offsetText, (length - offsetText))) {
return entry.getMimeType();
}
} else {