mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-05-19 10:16:03 +02:00
Allow set option for allow DTD, instead of system setting
This commit is contained in:
parent
a2981962fc
commit
273ac6ac7e
|
@ -102,6 +102,11 @@ public class SiteMapParser {
|
|||
|
||||
private MimeTypeDetector mimeTypeDetector;
|
||||
|
||||
/**
|
||||
* Option to allow DTD when parsing site map
|
||||
*/
|
||||
private boolean allowDocTypeDefinitions = false;
|
||||
|
||||
/* Function to normalize or filter URLs. Does nothing by default. */
|
||||
private Function<String, String> urlFilter = (String url) -> url;
|
||||
|
||||
|
@ -595,7 +600,7 @@ public class SiteMapParser {
|
|||
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
|
||||
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
|
||||
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
||||
if (!"true".equalsIgnoreCase(System.getProperty("crawler-commons.sitemap.allowDocTypes"))) {
|
||||
if (!this.allowDocTypeDefinitions) {
|
||||
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
@ -664,4 +669,12 @@ public class SiteMapParser {
|
|||
public static boolean urlIsValid(String sitemapBaseUrl, String testUrl) {
|
||||
return testUrl.startsWith(sitemapBaseUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set if the parser allow DTD
|
||||
* @param allowDocTypeDefinitions true if allowed. Default is false.
|
||||
*/
|
||||
public void setAllowDocTypeDefinitions(boolean allowDocTypeDefinitions) {
|
||||
this.allowDocTypeDefinitions = allowDocTypeDefinitions;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,8 +113,7 @@ public class SiteMapParserTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testSitemapXXE() throws UnknownFormatException, IOException {
|
||||
System.clearProperty("crawler-commons.sitemap.allowDocTypes");
|
||||
public void testSitemapXXE() throws IOException {
|
||||
// A file on disk that would be read if we were vulnerable to XXE
|
||||
File doNotVisit = new File("src/test/resources/sitemaps/do-not-visit.txt");
|
||||
|
||||
|
@ -145,12 +144,12 @@ public class SiteMapParserTest {
|
|||
|
||||
@Test
|
||||
public void testSitemapXXEWithDocTypeAllowed() throws UnknownFormatException, IOException {
|
||||
System.setProperty("crawler-commons.sitemap.allowDocTypes", "true");
|
||||
// A file on disk that would be read if we were vulnerable to XXE
|
||||
File doNotVisit = new File("src/test/resources/sitemaps/do-not-visit.txt");
|
||||
|
||||
// Create a sitemap with an external entity referring to the local file
|
||||
SiteMapParser parser = new SiteMapParser();
|
||||
parser.setAllowDocTypeDefinitions(true);
|
||||
String contentType = "text/xml";
|
||||
StringBuilder scontent = new StringBuilder(1024);
|
||||
scontent.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") //
|
||||
|
|
Loading…
Reference in New Issue