196 lines
7.2 KiB
JavaScript
196 lines
7.2 KiB
JavaScript
{
|
|
"translatorID": "2d174277-7651-458f-86dd-20e168d2f1f3",
|
|
"label": "Canadiana.ca",
|
|
"creator": "Adam Crymble, Sebastian Karcher",
|
|
"target": "^https?://eco\\.canadiana\\.ca",
|
|
"minVersion": "1.0.0b4.r5",
|
|
"maxVersion": "",
|
|
"priority": 100,
|
|
"inRepository": true,
|
|
"translatorType": 4,
|
|
"browserSupport": "gcsibv",
|
|
"lastUpdated": "2012-07-03 16:44:04"
|
|
}
|
|
|
|
function detectWeb(doc, url) {
|
|
if (url.match(/\/view\//)) {
|
|
return "book";
|
|
} else if (url.match(/\/search\?/)) {
|
|
return "multiple";
|
|
}
|
|
}
|
|
|
|
|
|
//Canadiana Translator Coding by Adam Crymble updated and cleaned by Sebastian Karcher
|
|
//because the site uses so many random formats for the "Published" field, it's not always perfect. But it works for MOST entries
|
|
|
|
function associateData(newItem, dataTags, field, zoteroField) {
|
|
if (dataTags[field]) {
|
|
newItem[zoteroField] = dataTags[field];
|
|
}
|
|
}
|
|
|
|
function scrape(doc, url) {
|
|
//declaring variables to be used later.
|
|
var newItem = new Zotero.Item("book");
|
|
var dataTags = new Object();
|
|
var fieldTitle;
|
|
var tagsContent = new Array();
|
|
|
|
//these variables tell the program where to find the data we want in the HTML file we're looking at.
|
|
//in this case, the data is found in a table.
|
|
var xPath1 = '//div[@id="documentRecord"]//table/tbody/tr/th';
|
|
var xPath2 = '//div[@id="documentRecord"]//table/tbody/tr/td';
|
|
|
|
|
|
//at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content.
|
|
// The 3rd object tells us how many items we've found.
|
|
if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {
|
|
var xPath1Results = doc.evaluate(xPath1, doc, null, XPathResult.ANY_TYPE, null);
|
|
var xPath2Results = doc.evaluate(xPath2, doc, null, XPathResult.ANY_TYPE, null);
|
|
var xPathCount = doc.evaluate('count (//div[@id="documentRecord"]//table/tbody/tr/th)', doc, null, XPathResult.ANY_TYPE, null);
|
|
}
|
|
|
|
//At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list
|
|
//with the first item in the second list, and on until the end.
|
|
//If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author"
|
|
if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {
|
|
for (i = 0; i < xPathCount.numberValue; i++) {
|
|
fieldTitle = xPath1Results.iterateNext().textContent.replace(/\s+/g, '');
|
|
//gets the author's name without cleaning it away using cleanTags.
|
|
if (fieldTitle == "Creator" || fieldTitle == "Créateur") {
|
|
fieldTitle = "PrincipalAuthor";
|
|
dataTags[fieldTitle] = (xPath2Results.iterateNext().textContent);
|
|
var authorName = dataTags["PrincipalAuthor"];
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor"], "author"));
|
|
|
|
//Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters.
|
|
} else if (fieldTitle == "Adressebibliographique" || fieldTitle == "Published") {
|
|
|
|
fieldTitle = "Imprint";
|
|
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent);
|
|
|
|
var justDate = dataTags["Imprint"].match(/\d+[-\?\s\d]*/)[0];
|
|
if (justDate) dataTags["Date"] = justDate;
|
|
var place = dataTags["Imprint"].match(/.+?:/)[0];
|
|
if (place) dataTags["Place"] = place.trim().replace(/[\[\]\:]*/g, "")
|
|
var publisher = dataTags["Imprint"].match(/\:[^,\d]+/)[0];
|
|
if (publisher) dataTags["Publisher"] = publisher.replace(/[\[\]:\?]/g, "").trim();
|
|
|
|
// determines how many tags there will be, pushes them into an array and clears away whitespace.
|
|
} else if (fieldTitle == "Subject" || fieldTitle == "Sujet") {
|
|
tagsContent = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.trim());
|
|
tagsContent = tagsContent.replace(/\s*\n+\s*/g, "||").split(/\|\|/);
|
|
Z.debug(tagsContent)
|
|
|
|
}
|
|
//Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader.
|
|
else if (fieldTitle == "Identifier" || fieldTitle == "Identificateur") {
|
|
fieldTitle = "CIHMno.";
|
|
dataTags[fieldTitle] = xPath2Results.iterateNext().textContent;
|
|
|
|
dataTags["CIHMno."] = "CIHM Number: " + dataTags["CIHMno."].trim();
|
|
} else {
|
|
|
|
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, ''));
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
//makes tags of the items in the "tagsContent" array.
|
|
for (var i = 0; i < tagsContent.length; i++) {
|
|
newItem.tags[i] = tagsContent[i];
|
|
}
|
|
|
|
//calls the associateData function to put the data in the correct Zotero field.
|
|
//English
|
|
associateData(newItem, dataTags, "Title", "title");
|
|
associateData(newItem, dataTags, "Place", "place");
|
|
associateData(newItem, dataTags, "Publisher", "publisher");
|
|
associateData(newItem, dataTags, "Date", "date");
|
|
associateData(newItem, dataTags, "Language", "language");
|
|
associateData(newItem, dataTags, "Pages", "pages");
|
|
associateData(newItem, dataTags, "CIHMno.", "extra");
|
|
associateData(newItem, dataTags, "DocumentSource", "rights");
|
|
associateData(newItem, dataTags, "PermanentLink", "URL");
|
|
|
|
//French
|
|
associateData(newItem, dataTags, "Titre", "title");
|
|
associateData(newItem, dataTags, "Langue", "language");
|
|
associateData(newItem, dataTags, "Nombredepages", "pages");
|
|
associateData(newItem, dataTags, "ICMHno", "extra");
|
|
associateData(newItem, dataTags, "Documentoriginal", "rights");
|
|
associateData(newItem, dataTags, "Lienpermanent", "URL");
|
|
//make sure that English language date is marked as en-US so Zotero doesn't get confused
|
|
//about title casing.
|
|
newItem.title = ZU.trimInternal(newItem.title)
|
|
if (newItem.language) {
|
|
if (newItem.language.match(/English|Anglais/)) newItem.language = "en-CA";
|
|
}
|
|
//Saves everything to Zotero.
|
|
newItem.complete();
|
|
|
|
}
|
|
|
|
|
|
function doWeb(doc, url) {
|
|
var articles = new Array();
|
|
if (detectWeb(doc, url) == "multiple") {
|
|
var items = new Object();
|
|
var titles = doc.evaluate('//h2/a[contains(@href, "/view")]', doc, null, XPathResult.ANY_TYPE, null);
|
|
var next_title;
|
|
while (next_title = titles.iterateNext()) {
|
|
items[next_title.href] = next_title.textContent;
|
|
}
|
|
Zotero.selectItems(items, function (items) {
|
|
if (!items) {
|
|
return true;
|
|
}
|
|
for (var i in items) {
|
|
articles.push(i);
|
|
}
|
|
Zotero.Utilities.processDocuments(articles, scrape, function () {
|
|
Zotero.done();
|
|
});
|
|
});
|
|
} else {
|
|
scrape(doc, url);
|
|
}
|
|
}
|
|
|
|
/** BEGIN TEST CASES **/
|
|
var testCases = [
|
|
{
|
|
"type": "web",
|
|
"url": "http://eco.canadiana.ca/view/oocihm.44987/2?r=0&s=1",
|
|
"items": [
|
|
{
|
|
"itemType": "book",
|
|
"title": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital : for the year 1857.",
|
|
"creators": [],
|
|
"date": "1857?",
|
|
"extra": "CIHM Number: 44987",
|
|
"language": "eng",
|
|
"libraryCatalog": "Canadiana.ca",
|
|
"place": "Toronto?",
|
|
"publisher": "s.n.",
|
|
"shortTitle": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital",
|
|
"attachments": [],
|
|
"tags": [
|
|
"Hospitals -- Ontario -- Toronto.",
|
|
"Hôpitaux -- Ontario -- Toronto.",
|
|
"Toronto Lying-In Hospital."
|
|
],
|
|
"notes": [],
|
|
"seeAlso": []
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"type": "web",
|
|
"url": "http://eco.canadiana.ca/search?q=Toronto&field=",
|
|
"items": "multiple"
|
|
}
|
|
]
|
|
/** END TEST CASES **/ |