205 lines
6.3 KiB
JavaScript
205 lines
6.3 KiB
JavaScript
{
|
||
"translatorID": "2e4ebd19-83ab-4a56-8fa6-bcd52b576470",
|
||
"label": "Sueddeutsche.de",
|
||
"creator": "Martin Meyerhoff",
|
||
"target": "^https?://www\\.sueddeutsche\\.de",
|
||
"minVersion": "3.0",
|
||
"maxVersion": "",
|
||
"priority": 100,
|
||
"inRepository": true,
|
||
"translatorType": 4,
|
||
"browserSupport": "gcsibv",
|
||
"lastUpdated": "2017-06-24 21:03:57"
|
||
}
|
||
|
||
/*
|
||
Sueddeutsche.de Translator
|
||
Copyright (C) 2011 Martin Meyerhoff
|
||
|
||
This program is free software: you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation, either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
*/
|
||
|
||
/*
|
||
This one has the search function on a different host, so I cannot scan the search results. A multiple option, though, is given for the page itself.
|
||
Test here:
|
||
http://www.sueddeutsche.de/politik
|
||
http://www.sueddeutsche.de/thema/Krieg_in_Libyen
|
||
http://www.sueddeutsche.de/muenchen
|
||
|
||
Reference article: http://www.sueddeutsche.de/wissen/embryonale-stammzellen-wo-sind-die-naiven-1.1143034
|
||
*/
|
||
|
||
function detectWeb(doc, url) {
|
||
if (ZU.xpathText(doc, '//h2/strong')) {
|
||
return "newspaperArticle";
|
||
} else if (ZU.xpath(doc, '//div[@id="topthemen" or @class="panoramateaser" \
|
||
or contains(@class,"maincolumn") or contains(@class, "teaser")]\
|
||
//a[starts-with(@class,"entry-title") \
|
||
and starts-with(@href,"http://www.sueddeutsche.de") \
|
||
and not(contains(@href,"/app/"))]').length){
|
||
return "multiple";
|
||
}
|
||
}
|
||
|
||
function scrape(doc, url) {
|
||
//don't parse things like image galleries
|
||
//e.g. http://www.sueddeutsche.de/kultur/thomas-manns-villa-in-los-angeles-weimar-am-pazifik-1.1301388
|
||
if (!ZU.xpathText(doc, '//h2/strong')) return;
|
||
|
||
var newItem = new Zotero.Item("newspaperArticle");
|
||
newItem.url = url;
|
||
|
||
var title = ZU.xpathText(doc, '//meta[contains(@property, "og:title")]/@content');
|
||
newItem.title = Zotero.Utilities.trim(title.replace(/\s?–\s?/, ": "));
|
||
|
||
// Author. This is tricky, the SZ uses the author field for whatever they like.
|
||
// Sometimes, there is no author.
|
||
var author = ZU.xpathText(doc, '//section[contains(@class, "authors")]//span[contains(@class, "moreInfo")]/strong')
|
||
|
||
// One case i've seen: A full sentence as the "author", with no author in it.
|
||
if (author && author.trim().charAt(author.length - 1) != '.') {
|
||
author = author.replace(/^\s*Von\s|Ein Kommentar von/i, '')
|
||
// For multiple Authors, the SZ uses comma, und and u
|
||
.split(/\s+(?:und|u|,)\s+/);
|
||
|
||
for (var i in author) {
|
||
if (author[i].match(/\s/)) { // only names that contain a space!
|
||
newItem.creators.push(ZU.cleanAuthor(author[i], "author"));
|
||
}
|
||
}
|
||
}
|
||
|
||
// summary
|
||
newItem.abstractNote = ZU.xpathText(doc, '//meta[contains(@property, "og:description")]/@content');
|
||
|
||
// Date
|
||
newItem.date = ZU.xpathText(doc, "//time[@class='timeformat']");
|
||
if (newItem.date) {
|
||
newItem.date = ZU.strToISO(newItem.date);
|
||
}
|
||
|
||
// Section
|
||
var section = url.match(/sueddeutsche\.de\/([^\/]+)/);
|
||
newItem.section = ZU.capitalizeTitle(section[1]);
|
||
|
||
// Tags
|
||
var tags = ZU.xpathText(doc, '//meta[@name="keywords"]/@content');
|
||
if (tags) {
|
||
tags = tags.split(/\s*,\s+/);
|
||
for (var i=0, n=tags.length; i<n; i++) {
|
||
newItem.tags.push(ZU.trimInternal(tags[i]));
|
||
}
|
||
}
|
||
|
||
// Publication
|
||
newItem.publicationTitle = "sueddeutsche.de"
|
||
newItem.ISSN = "0174-4917";
|
||
newItem.language = "de";
|
||
|
||
// Attachment. inserting /2.220/ gives us a printable version
|
||
var printurl = url.replace(/(.*\/)(.*$)/, '$12.220/$2');
|
||
newItem.attachments.push({
|
||
url: printurl,
|
||
title: "Snapshot",
|
||
mimeType: "text/html",
|
||
snapshot: true
|
||
});
|
||
|
||
newItem.complete()
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
if (detectWeb(doc, url) == "multiple") {
|
||
var links = ZU.xpath(doc,
|
||
'//div[@id="topthemen" or @class="panoramateaser" \
|
||
or contains(@class,"maincolumn") or contains(@class, "teaser")]\
|
||
//a[starts-with(@class,"entry-title") \
|
||
and starts-with(@href,"http://www.sueddeutsche.de") \
|
||
and not(contains(@href,"/app/"))]');
|
||
|
||
var items = new Object();
|
||
var title;
|
||
for (var i=0, n=links.length; i<n; i++) {
|
||
title = ZU.xpathText(links[i], './node()[not(self::div)]', null, '');
|
||
items[links[i].href] = ZU.trimInternal(title);
|
||
}
|
||
|
||
Zotero.selectItems(items, function(items) {
|
||
if (!items) return true;
|
||
|
||
var articles = new Array();
|
||
for (var i in items) {
|
||
articles.push(i);
|
||
}
|
||
ZU.processDocuments(articles, scrape);
|
||
});
|
||
} else {
|
||
scrape(doc, url);
|
||
}
|
||
}/** BEGIN TEST CASES **/
|
||
var testCases = [
|
||
{
|
||
"type": "web",
|
||
"url": "http://www.sueddeutsche.de/politik/verdacht-gegen-hessischen-verfassungsschuetzer-spitzname-kleiner-adolf-1.1190178",
|
||
"items": [
|
||
{
|
||
"itemType": "newspaperArticle",
|
||
"title": "Spitzname \"Kleiner Adolf\"",
|
||
"creators": [
|
||
{
|
||
"firstName": "Peter",
|
||
"lastName": "Blechschmidt",
|
||
"creatorType": "author"
|
||
},
|
||
{
|
||
"firstName": "Marc",
|
||
"lastName": "Widmann",
|
||
"creatorType": "author"
|
||
}
|
||
],
|
||
"date": "2011-11-16",
|
||
"ISSN": "0174-4917",
|
||
"abstractNote": "Als die Zwickauer Zelle in einem Kasseler Internet-Café Halit Y. hinrichtet, surft ein hessischer Verfassungsschützer dort im Netz. In seiner Wohnung findet die Polizei später Hinweise auf eine rechtsradikale Gesinnung - doch die Ermittlungen gegen den Mann werden eingestellt. Dabei bleiben viele Fragen offen.",
|
||
"language": "de",
|
||
"libraryCatalog": "Sueddeutsche.de",
|
||
"publicationTitle": "sueddeutsche.de",
|
||
"section": "politik",
|
||
"url": "http://www.sueddeutsche.de/politik/verdacht-gegen-hessischen-verfassungsschuetzer-spitzname-kleiner-adolf-1.1190178",
|
||
"attachments": [
|
||
{
|
||
"title": "Snapshot",
|
||
"mimeType": "text/html",
|
||
"snapshot": true
|
||
}
|
||
],
|
||
"tags": [
|
||
"Internet",
|
||
"Politik",
|
||
"Polizei",
|
||
"SZ",
|
||
"Süddeutsche Zeitung",
|
||
"rechter Terror"
|
||
],
|
||
"notes": [],
|
||
"seeAlso": []
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"type": "web",
|
||
"url": "http://www.sueddeutsche.de/politik",
|
||
"items": "multiple"
|
||
}
|
||
]
|
||
/** END TEST CASES **/ |