zotero-db/translators/Washington Post.js

415 lines
12 KiB
JavaScript
Raw Normal View History

{
"translatorID": "d1bf1c29-4432-4ada-8893-2e29fc88fd9e",
"translatorType": 4,
"label": "Washington Post",
"creator": "Philipp Zumstein",
"target": "^https?://www\\.washingtonpost\\.com/",
"minVersion": "3.0",
"maxVersion": null,
"priority": 100,
"inRepository": true,
"browserSupport": "gcsibv",
"lastUpdated": "2024-07-14 04:05:00"
}
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2017 Philipp Zumstein
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
function detectWeb(doc, url) {
if (ZU.xpathText(doc, '//h1[@data-qa="headline"]')) {
if (url.includes('/blogs/')) {
return "blogPost";
}
else {
return "newspaperArticle";
}
}
if (text(doc, '#default-topper-container h1')) {
return "newspaperArticle";
}
if (text(doc, 'h1') && text(doc, 'header[layout="full_bleed"]')) {
return "newspaperArticle";
}
// For older articles
if (url.includes('/archive/') || url.includes('/wp-dyn/content/')) {
return "newspaperArticle";
}
if (getSearchResults(doc, true)) {
return "multiple";
}
return false;
}
function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = ZU.xpath(doc, '//div[contains(@class, "pb-feed-headline")]//a[not(contains(@href, "/video/"))]');
for (var i = 0; i < rows.length; i++) {
var href = rows[i].href;
var title = ZU.trimInternal(rows[i].textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}
async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
for (let url of Object.keys(items)) {
await scrape(await requestDocument(url));
}
}
else {
await scrape(doc, url);
}
}
async function scrape(doc, url) {
var type = url.includes('/blogs/') ? 'blogPost' : 'newspaperArticle';
let translator = Zotero.loadTranslator('web');
// Embedded Metadata
translator.setTranslator('951c027d-74ac-47d4-a107-9c3069ab7b48');
translator.setDocument(doc);
translator.setHandler('itemDone', (_obj, item) => {
item.itemType = type;
// Old articles
if (url.includes('/wp-dyn/content/')) {
let authors = ZU.xpathText(doc, '//div[@id="byline"]');
if (authors) {
item.creators.push(ZU.cleanAuthor(authors.replace(/^By /, ''), "author"));
}
}
else {
let authors = doc.querySelectorAll('.author-name, [rel="author"]');
authors = Array.from(authors).map(x => x.textContent.trim());
item.creators = ZU.arrayUnique(authors)
.map(x => ZU.cleanAuthor(x, "author"));
}
item.date = attr(doc, 'meta[property="article_published_time"]', 'content')
|| ZU.xpathText(doc, '//span[@itemprop="datePublished"]/@content')
|| ZU.xpathText(doc, '//meta[@name="DC.date.issued"]/@content');
if (item.date) {
item.date = ZU.strToISO(item.date);
}
// the automatic added tags here are usually not really helpful
item.tags = [];
item.language = "en-US";
if (type == 'newspaperArticle') {
item.ISSN = "0190-8286";
}
item.section = ZU.xpathText(doc, '(//div[contains(@class, "headline-kicker")])[1]');
item.complete();
});
let em = await translator.getTranslatorObject();
await em.doWeb(doc, url);
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "https://www.washingtonpost.com/wp-dyn/content/article/2008/11/07/AR2008110703296.html",
"items": [
{
"itemType": "newspaperArticle",
"title": "Split Over Russia Grows in Europe",
"creators": [
{
"firstName": "Craig",
"lastName": "Whitlock",
"creatorType": "author"
}
],
"date": "2008-11-08",
"ISSN": "0190-8286",
"abstractNote": "BERLIN, Nov. 7 -- Russia sent President-elect Barack Obama a message this week when it threatened to \"neutralize\" the proposed U.S. missile defense shield in Eastern Europe. But analysts said the tough talk from Moscow had another aim as well: to exploit a festering divide within Europe.",
"language": "en-US",
"libraryCatalog": "www.washingtonpost.com",
"url": "http://www.washingtonpost.com/wp-dyn/content/article/2008/11/07/AR2008110703296.html",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.washingtonpost.com/world/national-security/aulaqi-killing-reignites-debate-on-limits-of-executive-power/2011/09/30/gIQAx1bUAL_story.html?hpid=z1",
"items": [
{
"itemType": "newspaperArticle",
"title": "Secret U.S. memo sanctioned killing of Aulaqi",
"creators": [
{
"firstName": "Peter",
"lastName": "Finn",
"creatorType": "author"
}
],
"date": "2011-09-30",
"ISSN": "0190-8286",
"abstractNote": "The Obama administration has refused to reveal the details of its legal rationale for targeting radical cleric Anwar al-Aulaqi.",
"language": "en-US",
"libraryCatalog": "www.washingtonpost.com",
"publicationTitle": "Washington Post",
"url": "https://www.washingtonpost.com/world/national-security/aulaqi-killing-reignites-debate-on-limits-of-executive-power/2011/09/30/gIQAx1bUAL_story.html",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.washingtonpost.com/blogs/ezra-klein/post/jack-abramoffs-guide-to-buying-congressmen/2011/08/25/gIQAoXKLvM_blog.html",
"items": [
{
"itemType": "blogPost",
"title": "Jack Abramoffs guide to buying congressmen",
"creators": [
{
"firstName": "Ezra",
"lastName": "Klein",
"creatorType": "author"
}
],
"date": "2011-11-07",
"abstractNote": "Its easy if you know what to do.",
"blogTitle": "Washington Post",
"language": "en-US",
"url": "https://www.washingtonpost.com/blogs/ezra-klein/post/jack-abramoffs-guide-to-buying-congressmen/2011/08/25/gIQAoXKLvM_blog.html",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.washingtonpost.com/archive/entertainment/books/1991/04/07/bombs-in-the-cause-of-brotherhood/fe590e29-8052-4086-b9a9-6fcabdbae4ba/",
"items": [
{
"itemType": "newspaperArticle",
"title": "BOMBS IN THE CAUSE OF BROTHERHOOD",
"creators": [
{
"firstName": "Claudio",
"lastName": "Segre",
"creatorType": "author"
}
],
"date": "1991-04-07",
"ISSN": "0190-8286",
"language": "en-US",
"libraryCatalog": "www.washingtonpost.com",
"publicationTitle": "Washington Post",
"url": "https://www.washingtonpost.com/archive/entertainment/books/1991/04/07/bombs-in-the-cause-of-brotherhood/fe590e29-8052-4086-b9a9-6fcabdbae4ba/",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.washingtonpost.com/world/the_americas/coronavirus-brazil-bolsonaro-tests-positive/2020/07/07/5fa71548-c049-11ea-b4f6-cb39cd8940fb_story.html",
"items": [
{
"itemType": "newspaperArticle",
"title": "Brazils Bolsonaro tests positive for coronavirus",
"creators": [
{
"firstName": "Terrence",
"lastName": "McCoy",
"creatorType": "author"
}
],
"date": "2020-07-07",
"ISSN": "0190-8286",
"abstractNote": "The populist president said hes taking hydroxychloroquine to treat the infection. The U.S. ambassador to Brazil has tested negative for covid-19.",
"language": "en-US",
"libraryCatalog": "www.washingtonpost.com",
"publicationTitle": "Washington Post",
"url": "https://www.washingtonpost.com/world/the_americas/coronavirus-brazil-bolsonaro-tests-positive/2020/07/07/5fa71548-c049-11ea-b4f6-cb39cd8940fb_story.html",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.washingtonpost.com/media/2021/06/09/new-yorker-protest/",
"items": [
{
"itemType": "newspaperArticle",
"title": "The New Yorkers labor dispute reaches Anna Wintours doorstep",
"creators": [
{
"firstName": "Jada",
"lastName": "Yuan",
"creatorType": "author"
},
{
"firstName": "Elahe",
"lastName": "Izadi",
"creatorType": "author"
}
],
"date": "2021-06-09",
"ISSN": "0190-8286",
"abstractNote": "Workers for the prestigious Condé Nast-owned magazine are threatening a strike.",
"language": "en-US",
"libraryCatalog": "www.washingtonpost.com",
"publicationTitle": "Washington Post",
"url": "https://www.washingtonpost.com/media/2021/06/09/new-yorker-protest/",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.washingtonpost.com/climate-environment/interactive/2024/louisiana-sea-wall-gas-facility-flooding/",
"items": [
{
"itemType": "newspaperArticle",
"title": "A rising fortress in sinking land",
"creators": [
{
"firstName": "Steven",
"lastName": "Mufson",
"creatorType": "author"
},
{
"firstName": "Ricky",
"lastName": "Carioti",
"creatorType": "author"
}
],
"date": "2024-07-05",
"ISSN": "0190-8286",
"abstractNote": "Rising seas and steel walls test the strength of a Louisiana coastal gas development, raising questions about flooding, climate change and community impacts.",
"language": "en-US",
"libraryCatalog": "www.washingtonpost.com",
"publicationTitle": "Washington Post",
"url": "https://www.washingtonpost.com/climate-environment/interactive/2024/louisiana-sea-wall-gas-facility-flooding/",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://www.washingtonpost.com/home/2024/07/13/tips-choosing-right-size-lamp/",
"items": [
{
"itemType": "newspaperArticle",
"title": "Is your lamp the right size? Theres an equation for that.",
"creators": [
{
"firstName": "Laura",
"lastName": "Daily",
"creatorType": "author"
}
],
"date": "2024-07-13",
"ISSN": "0190-8286",
"abstractNote": "You dont need to be a math whiz to choose a lamp. You just need to consider function and know a bit about proportion.",
"language": "en-US",
"libraryCatalog": "www.washingtonpost.com",
"publicationTitle": "Washington Post",
"shortTitle": "Is your lamp the right size?",
"url": "https://www.washingtonpost.com/home/2024/07/13/tips-choosing-right-size-lamp/",
"attachments": [
{
"title": "Snapshot",
"mimeType": "text/html"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
}
]
/** END TEST CASES **/