zotero/translators/EBSCOhost.js

593 lines
18 KiB
JavaScript
Raw Normal View History

2024-08-27 21:48:20 -05:00
{
"translatorID": "d0b1914a-11f1-4dd7-8557-b32fe8a3dd47",
"translatorType": 4,
"label": "EBSCOhost",
"creator": "Simon Kornblith, Michael Berkowitz, Josh Geller",
"target": "^https?://[^/]+/(eds|bsi|ehost)/(results|detail|folder|pdfviewer)",
"minVersion": "3.0",
"maxVersion": null,
"priority": 100,
"inRepository": true,
"browserSupport": "gcsibv",
"lastUpdated": "2024-05-09 17:00:00"
}
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2021 Simon Kornblith, Michael Berkowitz, Josh Geller
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
// eslint-disable-next-line no-unused-vars
function detectWeb(doc, url) {
// See if this is a search results or folder results page
var multiple = getResultList(doc, {}, {}); // we don't care about actual data at this point
if (multiple) {
return "multiple";
}
if (doc.querySelector("a.permalink-link")) {
return "journalArticle";
}
else if (ZU.xpathText(doc, '//section[@class="record-header"]/h2')) {
return "journalArticle";
}
return false;
}
/*
* given the text of the delivery page, downloads an item
*/
function downloadFunction(text, url, prefs) {
if (!(/^TY\s\s?-/m.test(text))) {
text = "\nTY - JOUR\n" + text; // this is probably not going to work if there is garbage text in the begining
}
// fix DOI
text = text.replace(/^(?:L3|DI)(\s\s?-)/gm, 'DO$1');
// There are cases where the RIS type isn't good--
// there is sometimes better data in M3
// This list should be augmented over time
var m, m3Data;
var itemType = prefs.itemType;
if (!itemType && (m = text.match(/^M3\s+-\s*(.*)$/m))) {
m3Data = m[1]; // used later
switch (m3Data) {
case "Literary Criticism":
case "Case Study":
itemType = "journalArticle";
break;
}
}
// remove M3 so it does not interfere with DOI.
// hopefully EBCSOhost doesn't use this for anything useful
text = text.replace(/^M3\s\s?-.*/gm, '');
// we'll save this for later, in case we have to throw away a subtitle
// from the RIS
let subtitle;
// EBSCOhost uses nonstandard tags to represent journal titles on some items
// Sometimes T2 also just duplicates the journal title across along with JO/JF/J1
// no /g flag so we don't create duplicate tags
let journalRe = /^(JO|JF|J1)/m;
let journalTitleRe = /^(?:JO|JF|J1)\s\s?-\s?(.*)/m;
if (journalTitleRe.test(text)) {
let subtitleRe = /^T2\s\s?-\s?(.*)/m;
let subtitleMatch = text.match(subtitleRe);
if (subtitleMatch && subtitleMatch[1] != text.match(journalTitleRe)[1]) {
// if there's already something in T2, store it and erase it from the RIS
subtitle = subtitleMatch[1];
text = text.replace(subtitleRe, '');
}
text = text.replace(journalRe, 'T2');
}
// Let's try to keep season info
// Y1 - 1993///Winter93
// Y1 - 2009///Spring2009
// maybe also Y1 - 1993///93Winter
var season = text.match(
/^(Y1\s+-\s+(\d{2})(\d{2})\/\/\/)(?:\2?\3(.+)|(.+?)\2?\3)\s*$/m);
season = season && (season[4] || season[5]);
// load translator for RIS
var translator = Zotero.loadTranslator("import");
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
translator.setString(text);
// eslint-disable-next-line padded-blocks
translator.setHandler("itemDone", function (obj, item) {
/* Fix capitalization issues */
// title
if (!item.title && prefs.itemTitle) {
item.title = prefs.itemTitle;
}
if (item.title) {
// Strip final period from title if present
item.title = item.title.replace(/([^.])\.\s*$/, '$1');
if (item.title.toUpperCase() == item.title) {
item.title = ZU.capitalizeTitle(item.title, true);
}
if (subtitle) {
item.title += `: ${subtitle}`;
}
}
// authors
var fn, ln;
for (var i = 0, n = item.creators.length; i < n; i++) {
fn = item.creators[i].firstName;
if (fn && fn.toUpperCase() == fn) {
item.creators[i].firstName = ZU.capitalizeTitle(fn, true);
}
ln = item.creators[i].lastName;
if (ln && ln.toUpperCase() == ln) {
item.creators[i].lastName = ZU.capitalizeTitle(ln, true);
}
}
// Sometimes EBSCOhost gives us year and season
if (season) {
item.date = season + ' ' + item.date;
}
// The non-DOI values in M3 should never pass RIS translator,
// but, just in case, if we know it's not DOI, let's remove it
if (item.DOI && item.DOI == m3Data) {
item.DOI = undefined;
}
// Strip EBSCOhost tags from the end of abstract
if (item.abstractNote) {
item.abstractNote = item.abstractNote
.replace(/\s*\[[^\].]+\]$/, ''); // to be safe, don't strip sentences
}
// Get the accession number from URL if not in RIS
var an = url.match(/_(\d+)_AN/);
if (!item.callNumber) {
if (an) {
an = an[1];
item.callNumber = an;
}
}
else if (!an) { // we'll need this later
an = item.callNumber;
}
// A lot of extra info is jammed into notes
item.notes = [];
// the archive field is pretty useless:
item.archive = "";
if (item.url) {
// Trim the ⟨=cs suffix -- EBSCO can't find the record with it!
item.url = item.url.replace(/(AN=[0-9]+)⟨=[a-z]{2}/, "$1")
.replace(/#.*$/, '');
if (!prefs.hasFulltext) {
// For items without full text,
// move the stable link to a link attachment
item.attachments.push({
url: item.url + "&scope=cite",
title: "EBSCO Record",
mimeType: "text/html",
snapshot: false
});
item.url = undefined;
}
}
if (prefs.pdfURL) {
item.attachments.push({
url: prefs.pdfURL,
title: "EBSCO Full Text",
mimeType: "application/pdf",
proxy: false
});
item.complete();
}
else if (prefs.fetchPDF) {
var args = urlToArgs(url);
if (prefs.mobile) {
// the PDF is not embedded in the mobile view
var id = url.match(/([^/]+)\?sid/)[1];
var pdfurl = "/ehost/pdfviewer/pdfviewer/"
+ id
+ "?sid=" + args.sid
+ "&vid=" + args.vid;
item.attachments.push({
url: pdfurl,
title: "EBSCO Full Text",
mimeType: "application/pdf"
});
item.complete();
}
else {
var pdf = "/ehost/pdfviewer/pdfviewer?"
+ "sid=" + args.sid
+ "&vid=" + args.vid;
Z.debug("Fetching PDF from " + pdf);
ZU.processDocuments(pdf,
function (pdfDoc) {
if (!isCorrectViewerPage(pdfDoc)) {
Z.debug('PDF viewer page doesn\'t appear to be serving the correct PDF. Skipping PDF attachment.');
return;
}
var realpdf = findPdfUrl(pdfDoc);
if (realpdf) {
item.attachments.push({
url: realpdf,
title: "EBSCO Full Text",
mimeType: "application/pdf",
proxy: false
});
}
else {
Z.debug("Could not find a reference to PDF.");
}
},
function () {
Z.debug("PDF retrieval done.");
item.complete();
}
);
}
}
else {
Z.debug("Not attempting to retrieve PDF.");
item.complete();
}
});
translator.getTranslatorObject(function (trans) {
trans.options.itemType = itemType;
trans.doImport();
});
}
// collects item url->title (in items) and item url->database info (in itemInfo)
function getResultList(doc, items, itemInfo) {
var results = ZU.xpath(doc, '//li[@class="result-list-li"]');
var title, folderData, count = 0;
// make search results work if you can't add to folder, e.g. for EBSCO used as discovery service of library such as
// https://www.library.umass.edu/
// http://search.ebscohost.com/login.aspx?direct=true&site=eds-live&scope=site&type=0&custid=s4895734&groupid=main&profid=eds&mode=and&lang=en&authtype=ip,guest,athens
if (results.length > 0) {
let folder = ZU.xpathText(doc, '//span[@class = "item add-to-folder"]/input/@value|.//span[@class = "item add-to-folder"]/a[1]/@data-folder');
for (let i = 0; i < results.length; i++) {
title = ZU.xpath(results[i], './/a[@class = "title-link color-p4"]');
if (!title.length) continue;
if (folder) {
folderData = ZU.xpath(results[i],
'.//span[@class = "item add-to-folder"]/input/@value|.//span[@class = "item add-to-folder"]/a[1]/@data-folder');
// I'm not sure if the input/@value format still exists somewhere, but leaving this in to be safe
// skip if we're missing something
itemInfo[title[0].href] = {
folderData: folderData[0].textContent,
// let's also store item type
itemType: ZU.xpathText(results[i],
'.//div[contains(@class, "pubtype")]/span/@class'),
itemTitle: ZU.xpathText(results[i], './/span[@class="title-link-wrapper"]/a'),
// check if PDF is available
fetchPDF: ZU.xpath(results[i], './/span[@class="record-formats"]/a[contains(@class,"pdf-ft")]').length,
hasFulltext: ZU.xpath(results[i], './/span[@class="record-formats"]/a[contains(@class,"pdf-ft") or contains(@class, "html-ft")]').length
};
}
count++;
items[title[0].href] = title[0].textContent;
}
}
else {
results = ZU.xpath(doc, '//ol[@id="resultlist"]//li[@class="resultlist-record"]');
let folder = ZU.xpathText(doc, '//a[@class="add-to-folder"]');
for (let i = 0; i < results.length; i++) {
title = ZU.xpath(results[i], './/h2[@class="record-title"]/a');
if (!title.length) continue;
if (folder) {
folderData = ZU.xpath(results[i], './/a[@class="add-to-folder"]/@data-folder');
itemInfo[title[0].href] = {
folderData: folderData[0].textContent,
// let's also store item type
itemType: ZU.xpathText(results[i],
'.//div[contains(@class, "pub-type")]/@class'),
itemTitle: ZU.xpathText(results[i], './/h2[@class="record-title"]/a'),
// check if FullText is available - if it is we also try the PDF
fetchPDF: ZU.xpath(results[i], './/ul[@class="record-description"]/li/span[contains(text(),"Full Text")]').length,
hasFulltext: ZU.xpath(results[i], './/ul[@class="record-description"]/li/span[contains(text(),"Full Text")]').length
};
}
count++;
items[title[0].href] = title[0].textContent;
}
}
return count;
}
// returns Zotero item type given a class name for the item icon in search list
function ebscoToZoteroItemType(ebscoType) {
if (!ebscoType) return false;
var m = ebscoType.match(/\bpt-(\S+)/);
if (m) {
switch (m[1]) {
case "review":
case "academicJournal":
return "journalArticle";
// This isn't always right. See https://forums.zotero.org/discussion/42535/atlas-codes-journals-as-magazines/
// case "serialPeriodical":
// return "magazineArticle"; //is this right?
// break;
case "newspaperArticle":
return "newspaperArticle";
}
}
return false;
}
// extracts arguments from a url and places them into an object
var argumentsRE = /([^?=&]+)(?:=([^&]*))?/g;
function urlToArgs(url) {
// reset index
argumentsRE.lastIndex = 0;
var args = {};
var arg;
// eslint-disable-next-line padded-blocks
while ((arg = argumentsRE.exec(url))) {
args[arg[1]] = arg[2];
}
return args;
}
// given a PDF viewer document, returns whether it appears to be displaying
// the correct PDF corresponding to the requested item. EBSCO sometimes returns
// the PDF for a previously viewed item when the current item doesn't have an
// associated PDF, but it won't serve metadata on the PDF viewer page in these
// cases.
function isCorrectViewerPage(pdfDoc) {
let citationAmplitude = attr(pdfDoc, 'a[name="citation"][data-amplitude]', 'data-amplitude');
if (!citationAmplitude || !citationAmplitude.startsWith('{')) {
Z.debug('PDF viewer page structure has changed - assuming PDF is correct');
return true;
}
return !!JSON.parse(citationAmplitude).result_index;
}
// given a pdfviewer page, extracts the PDF url
function findPdfUrl(pdfDoc) {
var el;
var realpdf = (el = pdfDoc.getElementById('downloadLink')) && el.href; // link
if (!realpdf) {
// input
realpdf = (el = pdfDoc.getElementById('pdfUrl')) && el.value;
}
if (!realpdf) {
realpdf = (el = pdfDoc.getElementById('pdfIframe') // iframe
|| pdfDoc.getElementById('pdfEmbed')) // embed
&& el.src;
}
return realpdf;
}
/**
* borrowed from http://www.webtoolkit.info/javascript-base64.html
*/
// We should have this available now - leaving in the code just in case
/*
var base64KeyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
function utf8Encode(string) {
string = string.replace(/\r\n/g, "\n");
var utftext = "";
for (var n = 0; n < string.length; n++) {
var c = string.charCodeAt(n);
if (c < 128) {
utftext += String.fromCharCode(c);
}
else if ((c > 127) && (c < 2048)) {
utftext += String.fromCharCode((c >> 6) | 192);
utftext += String.fromCharCode((c & 63) | 128);
}
else {
utftext += String.fromCharCode((c >> 12) | 224);
utftext += String.fromCharCode(((c >> 6) & 63) | 128);
utftext += String.fromCharCode((c & 63) | 128);
}
}
return utftext;
}
function btoa(input) {
var output = "";
var chr1, chr2, chr3, enc1, enc2, enc3, enc4;
var i = 0;
input = utf8Encode(input);
while (i < input.length) {
chr1 = input.charCodeAt(i++);
chr2 = input.charCodeAt(i++);
chr3 = input.charCodeAt(i++);
enc1 = chr1 >> 2;
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
enc4 = chr3 & 63;
if (isNaN(chr2)) {
enc3 = enc4 = 64;
}
else if (isNaN(chr3)) {
enc4 = 64;
}
output = output
+ base64KeyStr.charAt(enc1) + base64KeyStr.charAt(enc2)
+ base64KeyStr.charAt(enc3) + base64KeyStr.charAt(enc4);
}
return output;
} */
/**
* end borrowed code
*/
/**
* EBSCOhost encodes the target url before posting the form
* Replicated from http://global.ebsco-content.com/interfacefiles/13.4.0.98/javascript/bundled/_layout2/master.js
*/
function urlSafeEncodeBase64(str) {
return btoa(str).replace(/\+/g, "-").replace(/\//g, "_")
.replace(/=*$/, function (m) {
return m.length;
});
}
// var counter;
// eslint-disable-next-line no-unused-vars
function doWeb(doc, url) {
// counter = 0;
var items = {};
var itemInfo = {};
var multiple = getResultList(doc, items, itemInfo);
if (multiple) {
Zotero.selectItems(items, function (items) {
if (!items) {
return;
}
// fetch each url assynchronously
for (let i in items) {
(function (itemInfo) {
ZU.processDocuments(
i.replace(/#.*$/, ''),
function (doc) {
doDelivery(doc, itemInfo);
}
);
})(itemInfo[i]);
}
});
}
else {
doDelivery(doc); // Individual record.
// Record key exists in attribute for add to folder link in DOM
}
}
function doDelivery(doc, itemInfo) {
var folderData;
if (!itemInfo || !itemInfo.folderData) {
// Get the db, AN, and tag from ep.clientData instead
var clientData;
var scripts = doc.getElementsByTagName("script");
for (var i = 0; i < scripts.length; i++) {
clientData = scripts[i].textContent
.match(/var ep\s*=\s*({[^;]*})(?:;|\s*$)/);
if (clientData) break;
}
if (!clientData) {
return;
}
/* We now have the script containing ep.clientData */
clientData = clientData[1].match(/"currentRecord"\s*:\s*({[^}]*})/);
if (!clientData) {
return;
}
/* If this starts throwing exceptions, we should probably start try-catching it */
folderData = JSON.parse(clientData[1]);
}
else {
folderData = JSON.parse(itemInfo.folderData); // The attributes are a little different
folderData.Db = folderData.db;
folderData.Term = folderData.uiTerm;
folderData.Tag = folderData.uiTag;
}
// some preferences for later
var prefs = {};
prefs.mobile = false;
if (ZU.xpathText(doc, '//p[@class="view-layout"]/strong[@class="mobile"]')) {
prefs.mobile = true;
}
// figure out if there's a PDF available
// if PDFs stop downloading, might want to remove this
if (!itemInfo) {
if (doc.location.href.includes('/pdfviewer/')) {
prefs.pdfURL = findPdfUrl(doc);
prefs.fetchPDF = !!prefs.pdfURL;
}
else {
prefs.fetchPDF = !(ZU.xpath(doc, '//div[@id="column1"]//ul[1]/li').length // check for left-side column
&& !ZU.xpath(doc, '//a[contains(@class,"pdf-ft")]').length); // check if there's a PDF there
}
prefs.hasFulltext = !(ZU.xpath(doc, '//div[@id="column1"]//ul[1]/li').length // check for left-side column
&& !ZU.xpath(doc, '//a[contains(@class,"pdf-ft") or contains(@class, "html-ft")]').length);
prefs.itemTitle = ZU.xpathText(doc, '//dd[contains(@class, "citation-title")]/a/span')
|| ZU.xpathText(doc, '//h2[@id="selectionTitle"]');
}
else {
prefs.fetchPDF = itemInfo.fetchPDF;
prefs.hasFulltext = itemInfo.hasFulltext;
prefs.itemType = ebscoToZoteroItemType(itemInfo.itemType);
prefs.itemTitle = itemInfo.itemTitle;
}
if (prefs.itemTitle) {
prefs.itemTitle = ZU.trimInternal(prefs.itemTitle).replace(/([^.])\.$/, '$1');
}
// Z.debug(prefs);
var postURL = ZU.xpathText(doc, '//form[@id="aspnetForm"]/@action');
if (!postURL) {
postURL = doc.location.href; // fallback for mobile site
}
var args = urlToArgs(postURL);
postURL = "/ehost/delivery/ExportPanelSave/"
+ urlSafeEncodeBase64(folderData.Db + "__" + folderData.Term + "__" + folderData.Tag)
+ "?sid=" + args.sid
+ "&vid=" + args.vid
+ "&bdata=" + args.bdata
+ "&theExportFormat=1"; // RIS file
ZU.doGet(postURL, function (text) {
downloadFunction(text, postURL, prefs);
});
}
/** BEGIN TEST CASES **/
var testCases = []
/** END TEST CASES **/