mirror of
https://github.com/crawler-commons/crawler-commons
synced 2024-06-04 14:16:05 +02:00
Add support for the Mobile sitemap extension
This commit is contained in:
parent
b924bd0828
commit
66745b29a1
|
@ -65,6 +65,11 @@ public class Namespace {
|
|||
"http://www.google.com/schemas/sitemap-news/0.84" //
|
||||
};
|
||||
|
||||
public static final String[] MOBILE = { //
|
||||
"http://www.google.com/schemas/sitemap-mobile/1.0", //
|
||||
"https://www.google.com/schemas/sitemap-mobile/1.0" //
|
||||
};
|
||||
|
||||
public static final String LINKS = "http://www.w3.org/1999/xhtml";
|
||||
|
||||
/**
|
||||
|
@ -113,6 +118,7 @@ public class Namespace {
|
|||
SITEMAP_EXTENSION_NAMESPACES.put(Extension.NEWS, Arrays.asList(NEWS));
|
||||
SITEMAP_EXTENSION_NAMESPACES.put(Extension.IMAGE, Arrays.asList(IMAGE));
|
||||
SITEMAP_EXTENSION_NAMESPACES.put(Extension.VIDEO, Arrays.asList(VIDEO));
|
||||
SITEMAP_EXTENSION_NAMESPACES.put(Extension.MOBILE, Arrays.asList(MOBILE));
|
||||
SITEMAP_EXTENSION_NAMESPACES.put(Extension.LINKS, Arrays.asList(LINKS));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,5 +38,11 @@ public enum Extension {
|
|||
* localized page versions/variants, see
|
||||
* https://support.google.com/webmasters/answer/189077
|
||||
*/
|
||||
LINKS
|
||||
LINKS,
|
||||
/**
|
||||
* <cite>Mobile sitemaps just contain an empty "mobile" tag to identify a
|
||||
* URL as having mobile content</cite>, cf.
|
||||
* http://www.google.com/schemas/sitemap-mobile/1.0
|
||||
*/
|
||||
MOBILE
|
||||
}
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Copyright 2018 Crawler-Commons
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package crawlercommons.sitemaps.extension;
|
||||
|
||||
/**
|
||||
* Google mobile sitemap attributes, see
|
||||
* http://www.google.de/schemas/sitemap-mobile/1.0/ and
|
||||
* https://www.google.com/schemas/sitemap-mobile/1.0/sitemap-mobile.xsd:
|
||||
* <blockquote>Mobile sitemaps just contain an empty "mobile" tag to identify a
|
||||
* URL as having mobile content.</blockquote>
|
||||
*/
|
||||
public class MobileAttributes extends ExtensionMetadata {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Mobile content avaiblabe: yes";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null) {
|
||||
return false;
|
||||
}
|
||||
if (!(other instanceof MobileAttributes)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -54,7 +54,7 @@ public abstract class ExtensionHandler extends DefaultHandler {
|
|||
case LINKS:
|
||||
return new LinksHandler();
|
||||
case MOBILE:
|
||||
return null;
|
||||
return new MobileHandler();
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
/**
|
||||
* Copyright 2018 Crawler-Commons
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package crawlercommons.sitemaps.sax.extension;
|
||||
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import crawlercommons.sitemaps.extension.ExtensionMetadata;
|
||||
import crawlercommons.sitemaps.extension.MobileAttributes;
|
||||
|
||||
/** Handle SAX events in the Google Mobile sitemap extension namespace. */
|
||||
public class MobileHandler extends ExtensionHandler {
|
||||
|
||||
private static MobileAttributes[] noMobileAttributes = new MobileAttributes[0];
|
||||
private static MobileAttributes[] mobileAttributes = new MobileAttributes[1];
|
||||
static {
|
||||
mobileAttributes[0] = new MobileAttributes();
|
||||
}
|
||||
|
||||
private boolean mobileElementFound = false;
|
||||
|
||||
public MobileHandler() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
|
||||
if ("mobile".equals(localName)) {
|
||||
mobileElementFound = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(String uri, String localName, String qName) throws SAXException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void characters(char[] ch, int start, int length) throws SAXException {
|
||||
}
|
||||
|
||||
public ExtensionMetadata[] getAttributes() {
|
||||
if (mobileElementFound) {
|
||||
return mobileAttributes;
|
||||
}
|
||||
return noMobileAttributes;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
super.reset();
|
||||
mobileElementFound = false;
|
||||
}
|
||||
|
||||
}
|
|
@ -18,6 +18,7 @@ package crawlercommons.sitemaps;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
|
@ -33,6 +34,7 @@ import crawlercommons.sitemaps.extension.Extension;
|
|||
import crawlercommons.sitemaps.extension.ExtensionMetadata;
|
||||
import crawlercommons.sitemaps.extension.ImageAttributes;
|
||||
import crawlercommons.sitemaps.extension.LinkAttributes;
|
||||
import crawlercommons.sitemaps.extension.MobileAttributes;
|
||||
import crawlercommons.sitemaps.extension.NewsAttributes;
|
||||
import crawlercommons.sitemaps.extension.VideoAttributes;
|
||||
|
||||
|
@ -185,6 +187,32 @@ public class SiteMapParserExtensionTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMobileSitemap() throws UnknownFormatException, IOException {
|
||||
SiteMapParser parser = new SiteMapParser();
|
||||
parser.enableExtension(Extension.MOBILE);
|
||||
|
||||
String contentType = "text/xml";
|
||||
byte[] content = SiteMapParserTest.getResourceAsBytes("src/test/resources/sitemaps/extension/sitemap-mobile.xml");
|
||||
|
||||
URL url = new URL("http://www.example.org/sitemap-mobile.xml");
|
||||
AbstractSiteMap asm = parser.parseSiteMap(contentType, content, url);
|
||||
assertEquals(false, asm.isIndex());
|
||||
assertEquals(true, asm instanceof SiteMap);
|
||||
SiteMap sm = (SiteMap) asm;
|
||||
for (SiteMapURL su : sm.getSiteMapUrls()) {
|
||||
URL u = su.getUrl();
|
||||
ExtensionMetadata[] attrs = su.getAttributesForExtension(Extension.MOBILE);
|
||||
if (u.getPath().contains("mobile-friendly")) {
|
||||
assertNotNull(attrs);
|
||||
MobileAttributes attr = (MobileAttributes) attrs[0];
|
||||
assertNotNull(attr);
|
||||
} else {
|
||||
assertTrue(attrs == null || attrs.length == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShinpaideshuNewsSitemap() throws UnknownFormatException, IOException {
|
||||
SiteMapParser parser = new SiteMapParser();
|
||||
|
@ -216,6 +244,7 @@ public class SiteMapParserExtensionTest {
|
|||
parser.enableExtension(Extension.NEWS);
|
||||
parser.enableExtension(Extension.IMAGE);
|
||||
parser.enableExtension(Extension.VIDEO);
|
||||
parser.enableExtension(Extension.MOBILE);
|
||||
|
||||
String contentType = "text/xml";
|
||||
byte[] content = SiteMapParserTest.getResourceAsBytes("src/test/resources/sitemaps/extension/hebdenbridgetimes-articles-sitemap.xml");
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
||||
xmlns:mobile="http://www.google.com/schemas/sitemap-mobile/1.0">
|
||||
<url>
|
||||
<loc>http://www.example.com/mobile-friendly-1/</loc>
|
||||
<mobile:mobile/>
|
||||
</url>
|
||||
<url>
|
||||
<loc>http://www.example.com/mobile-friendly-2/</loc>
|
||||
<mobile:mobile></mobile:mobile>
|
||||
</url>
|
||||
<url>
|
||||
<loc>http://www.example.com/no-mobile/</loc>
|
||||
</url>
|
||||
</urlset>
|
Loading…
Reference in New Issue