mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-12 00:26:03 +02:00
Minor changes + applied formatting pre 0.10 release
This commit is contained in:
parent
8195140e21
commit
0da1b8b8b5
|
@ -1,9 +1,11 @@
|
|||
Crawler-Commons Change Log
|
||||
|
||||
Current Development 0.10-SNAPSHOT (yyyy-mm-dd)
|
||||
|
||||
Release 0.10 (2018-06-05)
|
||||
- Add JAX-B dependencies to POM (jnioche) #207
|
||||
- Add method to parse and iterate sitemap SiteMapParser#walkSiteMap(URL,Consumer) (Luc Boruta) #190
|
||||
- Sitemap file location to ignore query part of URL (sebastian-nagel) #202
|
||||
- [Sitemaps] Add method to parse and iterate sitemap SiteMapParser#walkSiteMap(URL,Consumer) (Luc Boruta) #190
|
||||
- [Sitemaps] Sitemap file location to ignore query part of URL (sebastian-nagel) #202
|
||||
- [RSS sitemaps] Link extraction from RSS feeds fails on XML entities (sebastian-nagel) #204
|
||||
- [RSS sitemaps] Resolve relative links in RSS feeds (sebastian-nagel) #203
|
||||
- [RSS sitemaps] Extract links from <guid> elements (sebastian-nagel) #201
|
||||
|
|
|
@ -9,41 +9,16 @@ import java.util.Locale;
|
|||
|
||||
public class MimeTypeDetector {
|
||||
|
||||
private static String[] XML_MIMETYPES = new String[] {
|
||||
"application/xml",
|
||||
"application/x-xml",
|
||||
"text/xml",
|
||||
"application/atom+xml",
|
||||
"application/rss+xml",
|
||||
"text/rss",
|
||||
"application/rdf+xml"
|
||||
};
|
||||
private static String[] XML_MIMETYPES = new String[] { "application/xml", "application/x-xml", "text/xml", "application/atom+xml", "application/rss+xml", "text/rss", "application/rdf+xml" };
|
||||
|
||||
private static String[] TEXT_MIMETYPES = new String[] {
|
||||
"text/plain"
|
||||
};
|
||||
private static String[] TEXT_MIMETYPES = new String[] { "text/plain" };
|
||||
|
||||
private static String[] GZIP_MIMETYPES = new String[] {
|
||||
"application/gzip",
|
||||
"application/gzip-compressed",
|
||||
"application/gzipped",
|
||||
"application/x-gzip",
|
||||
"application/x-gzip-compressed",
|
||||
"application/x-gunzip",
|
||||
"gzip/document"
|
||||
};
|
||||
private static String[] GZIP_MIMETYPES = new String[] { "application/gzip", "application/gzip-compressed", "application/gzipped", "application/x-gzip", "application/x-gzip-compressed",
|
||||
"application/x-gunzip", "gzip/document" };
|
||||
|
||||
private static String[][] MIMETYPES = {
|
||||
XML_MIMETYPES,
|
||||
TEXT_MIMETYPES,
|
||||
GZIP_MIMETYPES
|
||||
};
|
||||
private static String[][] MIMETYPES = { XML_MIMETYPES, TEXT_MIMETYPES, GZIP_MIMETYPES };
|
||||
|
||||
private static byte[] UTF8_BOM = {
|
||||
(byte) 0xEF,
|
||||
(byte) 0xBB,
|
||||
(byte) 0xBF
|
||||
};
|
||||
private static byte[] UTF8_BOM = { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
|
||||
|
||||
private static final int LEADING_WHITESPACE_MAX_SKIP = 32;
|
||||
|
||||
|
@ -144,7 +119,7 @@ public class MimeTypeDetector {
|
|||
offsetText++;
|
||||
}
|
||||
}
|
||||
if (patternMatches(entry.getPattern(), content, offsetText, (length-offsetText))) {
|
||||
if (patternMatches(entry.getPattern(), content, offsetText, (length - offsetText))) {
|
||||
return entry.getMimeType();
|
||||
}
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue