1
0
Fork 0
mirror of https://github.com/crawler-commons/crawler-commons synced 2024-06-04 14:16:05 +02:00

Add support for the Mobile sitemap extension

This commit is contained in:
Sebastian Nagel 2018-09-28 10:05:03 +02:00
parent b924bd0828
commit 66745b29a1
7 changed files with 168 additions and 2 deletions

View File

@ -65,6 +65,11 @@ public class Namespace {
"http://www.google.com/schemas/sitemap-news/0.84" //
};
public static final String[] MOBILE = { //
"http://www.google.com/schemas/sitemap-mobile/1.0", //
"https://www.google.com/schemas/sitemap-mobile/1.0" //
};
public static final String LINKS = "http://www.w3.org/1999/xhtml";
/**
@ -113,6 +118,7 @@ public class Namespace {
SITEMAP_EXTENSION_NAMESPACES.put(Extension.NEWS, Arrays.asList(NEWS));
SITEMAP_EXTENSION_NAMESPACES.put(Extension.IMAGE, Arrays.asList(IMAGE));
SITEMAP_EXTENSION_NAMESPACES.put(Extension.VIDEO, Arrays.asList(VIDEO));
SITEMAP_EXTENSION_NAMESPACES.put(Extension.MOBILE, Arrays.asList(MOBILE));
SITEMAP_EXTENSION_NAMESPACES.put(Extension.LINKS, Arrays.asList(LINKS));
}
}

View File

@ -38,5 +38,11 @@ public enum Extension {
* localized page versions/variants, see
* https://support.google.com/webmasters/answer/189077
*/
LINKS
LINKS,
/**
* <cite>Mobile sitemaps just contain an empty "mobile" tag to identify a
* URL as having mobile content</cite>, cf.
* http://www.google.com/schemas/sitemap-mobile/1.0
*/
MOBILE
}

View File

@ -0,0 +1,44 @@
/**
* Copyright 2018 Crawler-Commons
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package crawlercommons.sitemaps.extension;
/**
* Google mobile sitemap attributes, see
* http://www.google.de/schemas/sitemap-mobile/1.0/ and
* https://www.google.com/schemas/sitemap-mobile/1.0/sitemap-mobile.xsd:
* <blockquote>Mobile sitemaps just contain an empty "mobile" tag to identify a
* URL as having mobile content.</blockquote>
*/
public class MobileAttributes extends ExtensionMetadata {
@Override
public String toString() {
return "Mobile content avaiblabe: yes";
}
@Override
public boolean equals(Object other) {
if (other == null) {
return false;
}
if (!(other instanceof MobileAttributes)) {
return false;
}
return true;
}
}

View File

@ -54,7 +54,7 @@ public abstract class ExtensionHandler extends DefaultHandler {
case LINKS:
return new LinksHandler();
case MOBILE:
return null;
return new MobileHandler();
default:
return null;
}

View File

@ -0,0 +1,66 @@
/**
* Copyright 2018 Crawler-Commons
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package crawlercommons.sitemaps.sax.extension;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import crawlercommons.sitemaps.extension.ExtensionMetadata;
import crawlercommons.sitemaps.extension.MobileAttributes;
/** Handle SAX events in the Google Mobile sitemap extension namespace. */
public class MobileHandler extends ExtensionHandler {
private static MobileAttributes[] noMobileAttributes = new MobileAttributes[0];
private static MobileAttributes[] mobileAttributes = new MobileAttributes[1];
static {
mobileAttributes[0] = new MobileAttributes();
}
private boolean mobileElementFound = false;
public MobileHandler() {
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("mobile".equals(localName)) {
mobileElementFound = true;
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
}
public ExtensionMetadata[] getAttributes() {
if (mobileElementFound) {
return mobileAttributes;
}
return noMobileAttributes;
}
public void reset() {
super.reset();
mobileElementFound = false;
}
}

View File

@ -18,6 +18,7 @@ package crawlercommons.sitemaps;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.net.MalformedURLException;
@ -33,6 +34,7 @@ import crawlercommons.sitemaps.extension.Extension;
import crawlercommons.sitemaps.extension.ExtensionMetadata;
import crawlercommons.sitemaps.extension.ImageAttributes;
import crawlercommons.sitemaps.extension.LinkAttributes;
import crawlercommons.sitemaps.extension.MobileAttributes;
import crawlercommons.sitemaps.extension.NewsAttributes;
import crawlercommons.sitemaps.extension.VideoAttributes;
@ -185,6 +187,32 @@ public class SiteMapParserExtensionTest {
}
}
@Test
public void testMobileSitemap() throws UnknownFormatException, IOException {
SiteMapParser parser = new SiteMapParser();
parser.enableExtension(Extension.MOBILE);
String contentType = "text/xml";
byte[] content = SiteMapParserTest.getResourceAsBytes("src/test/resources/sitemaps/extension/sitemap-mobile.xml");
URL url = new URL("http://www.example.org/sitemap-mobile.xml");
AbstractSiteMap asm = parser.parseSiteMap(contentType, content, url);
assertEquals(false, asm.isIndex());
assertEquals(true, asm instanceof SiteMap);
SiteMap sm = (SiteMap) asm;
for (SiteMapURL su : sm.getSiteMapUrls()) {
URL u = su.getUrl();
ExtensionMetadata[] attrs = su.getAttributesForExtension(Extension.MOBILE);
if (u.getPath().contains("mobile-friendly")) {
assertNotNull(attrs);
MobileAttributes attr = (MobileAttributes) attrs[0];
assertNotNull(attr);
} else {
assertTrue(attrs == null || attrs.length == 0);
}
}
}
@Test
public void testShinpaideshuNewsSitemap() throws UnknownFormatException, IOException {
SiteMapParser parser = new SiteMapParser();
@ -216,6 +244,7 @@ public class SiteMapParserExtensionTest {
parser.enableExtension(Extension.NEWS);
parser.enableExtension(Extension.IMAGE);
parser.enableExtension(Extension.VIDEO);
parser.enableExtension(Extension.MOBILE);
String contentType = "text/xml";
byte[] content = SiteMapParserTest.getResourceAsBytes("src/test/resources/sitemaps/extension/hebdenbridgetimes-articles-sitemap.xml");

View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:mobile="http://www.google.com/schemas/sitemap-mobile/1.0">
<url>
<loc>http://www.example.com/mobile-friendly-1/</loc>
<mobile:mobile/>
</url>
<url>
<loc>http://www.example.com/mobile-friendly-2/</loc>
<mobile:mobile></mobile:mobile>
</url>
<url>
<loc>http://www.example.com/no-mobile/</loc>
</url>
</urlset>