{ "translatorID": "31659710-d04e-45d0-84ba-8e3f5afc4a54", "translatorType": 4, "label": "Twitter", "creator": "Bo An, Dan Stillman", "target": "^https?://([^/]+\\.)?(twitter|x)\\.com/", "minVersion": "4.0", "maxVersion": null, "priority": 100, "inRepository": true, "browserSupport": "gcsibv", "lastUpdated": "2024-05-24 17:20:00" } /* ***** BEGIN LICENSE BLOCK ***** Twitter Translator Copyright © 2020-2021 Bo An, Dan Stillman This file is part of Zotero. Zotero is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Zotero is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with Zotero. If not, see . ***** END LICENSE BLOCK ***** */ let titleRe = /^(?:\(\d+\) )?(.+) .* (?:Twitter|X): .([\S\s]+). \/ (?:Twitter|X)/; let domainRe = /https?:\/\/(?:www\.)?([^/]+)+/; function detectWeb(doc, url) { if (url.includes('/status/')) { return "forumPost"; } return false; } function unshortenURLs(doc, str) { var matches = str.match(/https?:\/\/t\.co\/[a-z0-9]+/gi); if (matches) { for (let match of matches) { let url = unshortenURL(doc, match); // Replace t.co URLs (with optional query string, such as "?amp=1") // in text with real URLs str = str.replace(new RegExp(ZU.quotemeta(match) + '(\\?\\w+)?'), url); } } return str; } function unshortenURL(doc, tCoURL) { var a = doc.querySelector('a[href*="' + tCoURL + '"]'); if (!a || !domainRe.test(a.textContent)) { return tCoURL; } return a.textContent.replace(/…$/, ''); } function extractURLs(doc, str) { var urls = []; var matches = str.match(/https?:\/\/t\.co\/[a-z0-9]+/gi); if (matches) { for (let match of matches) { urls.push(unshortenURL(doc, match)); } } return urls; } // Find the link to the permalink (e.g., "8h") function findPermalinkLink(doc, canonicalURL) { let path = canonicalURL.match(/https?:\/\/[^/]+(.+)/)[1]; return doc.querySelector(`a[href="${path}" i]`); } function doWeb(doc, url) { scrape(doc, url); } function scrape(doc, url) { var item = new Zotero.Item("forumPost"); var canonicalURL = doc.querySelector('link[rel="canonical"]').href; // For unclear reasons, in some cases the URL doesn't have capitalization // but rel="canonical" does, and in other cases it's the other way around, // so if rel="canonical" doesn't have any caps, use the URL if (!/[A-Z]/.test(canonicalURL)) { canonicalURL = url.match(/^([^?#]+)/)[1]; } var originalTitle = doc.title; var unshortenedTitle = ZU.unescapeHTML(unshortenURLs(doc, originalTitle)); // Extract tweet from "[optional count] [Display Name] on Twitter: “[tweet]”" var matches = unshortenedTitle.match(titleRe); var [, author, tweet] = matches; // Title is tweet with newlines removed item.title = tweet.replace(/\s+/g, ' '); // Don't set short title when tweet contains colon item.shortTitle = false; // Identify the tweet block by looking for the client link (e.g, "Tweetbot") var articleEl; var clientLink = doc.querySelector('a[href*="source-labels"]'); if (clientLink) { articleEl = clientLink.closest('article'); } // If client link not found, use permalink // // This is the case on share URLs such as // https://twitter.com/aerospacecorp/status/1391160460150382598?s=27, // but that doesn't serve content to the test runner for some reason, so // we don't have a test for it. else { let a = findPermalinkLink(doc, canonicalURL); articleEl = a.closest('article'); } var tweetSelector = 'article[role="article"]'; // If the title is modified (e.g., because we stripped newlines), add the // full tweet in Abstract. // // Same if it's a quote tweet, since the quoted tweet isn't included in the // title. It would be better to just get the tweet URL, but that doesn't // seem to be available on the page. // // DEBUG: 'role*=blockquote' doesn't seem to be used anymore, so there // doesn't seem to be a good way to get the contents of the quoted tweet let blockquote = articleEl.querySelector(`${tweetSelector} div[role*=blockquote]`); if (tweet != item.title || blockquote) { let note = ZU.text2html('“' + tweet + '”'); if (blockquote) { note += '
' + ZU.text2html(blockquote.innerText.replace(/[\s]+/g, ' ').trim()) + "
"; } item.notes.push({ note }); } item.language = attr(articleEl, 'div[lang]', 'lang'); item.creators.push({ lastName: `${author} [@${canonicalURL.split('/')[3]}]`, fieldMode: 1, creatorType: 'author' }); // Date and time var spans = articleEl.querySelectorAll(`${tweetSelector} a span`); for (let span of spans) { // Is this used in all locales? let dotSep = ' · '; let str = span.textContent; if (!str.includes(dotSep)) { // Z.debug("Date separator not found") // Share URLs don't show the date, so use the