zotero/translators/Canadiana.ca.js

{
	"translatorID": "2d174277-7651-458f-86dd-20e168d2f1f3",
	"label": "Canadiana.ca",
	"creator": "Adam Crymble, Sebastian Karcher",
	"target": "^https?://eco\\.canadiana\\.ca",
	"minVersion": "1.0.0b4.r5",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "gcsibv",
	"lastUpdated": "2012-07-03 16:44:04"
}

function detectWeb(doc, url) {
	if (url.match(/\/view\//)) {
		return "book";
	} else if (url.match(/\/search\?/)) {
		return "multiple";
	}
}


//Canadiana Translator Coding by Adam Crymble updated and cleaned by Sebastian Karcher
//because the site uses so many random formats for the "Published" field, it's not always perfect. But it works for MOST entries

function associateData(newItem, dataTags, field, zoteroField) {
	if (dataTags[field]) {
		newItem[zoteroField] = dataTags[field];
	}
}

function scrape(doc, url) {
	//declaring variables to be used later.
	var newItem = new Zotero.Item("book");
	var dataTags = new Object();
	var fieldTitle;
	var tagsContent = new Array();

	//these variables tell the program where to find the data we want in the HTML file we're looking at.
	//in this case, the data is found in a table.
	var xPath1 = '//div[@id="documentRecord"]//table/tbody/tr/th';
	var xPath2 = '//div[@id="documentRecord"]//table/tbody/tr/td';


	//at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content.
	// The 3rd object tells us how many items we've found.
	if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {
		var xPath1Results = doc.evaluate(xPath1, doc, null, XPathResult.ANY_TYPE, null);
		var xPath2Results = doc.evaluate(xPath2, doc, null, XPathResult.ANY_TYPE, null);
		var xPathCount = doc.evaluate('count (//div[@id="documentRecord"]//table/tbody/tr/th)', doc, null, XPathResult.ANY_TYPE, null);
	}

	//At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list
	//with the first item in the second list, and on until the end. 
	//If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author"
	if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {
		for (i = 0; i < xPathCount.numberValue; i++) {
			fieldTitle = xPath1Results.iterateNext().textContent.replace(/\s+/g, '');
			//gets the author's name without cleaning it away using cleanTags.
			if (fieldTitle == "Creator" || fieldTitle == "Créateur") {
				fieldTitle = "PrincipalAuthor";
				dataTags[fieldTitle] = (xPath2Results.iterateNext().textContent);
				var authorName = dataTags["PrincipalAuthor"];
				newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor"], "author"));

				//Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters.      		
			} else if (fieldTitle == "Adressebibliographique" || fieldTitle == "Published") {

				fieldTitle = "Imprint";
				dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent);

				var justDate = dataTags["Imprint"].match(/\d+[-\?\s\d]*/)[0];
				if (justDate) dataTags["Date"] = justDate;
				var place = dataTags["Imprint"].match(/.+?:/)[0];
				if (place) dataTags["Place"] = place.trim().replace(/[\[\]\:]*/g, "")
				var publisher = dataTags["Imprint"].match(/\:[^,\d]+/)[0];
				if (publisher) dataTags["Publisher"] = publisher.replace(/[\[\]:\?]/g, "").trim();

				// determines how many tags there will be, pushes them into an array and clears away whitespace.
			} else if (fieldTitle == "Subject" || fieldTitle == "Sujet") {
				tagsContent = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.trim());
				tagsContent = tagsContent.replace(/\s*\n+\s*/g, "||").split(/\|\|/);
				Z.debug(tagsContent)

			}
			//Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader.
			else if (fieldTitle == "Identifier" || fieldTitle == "Identificateur") {
				fieldTitle = "CIHMno.";
				dataTags[fieldTitle] = xPath2Results.iterateNext().textContent;

				dataTags["CIHMno."] = "CIHM Number: " + dataTags["CIHMno."].trim();
			} else {

				dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, ''));

			}
		}
	}

	//makes tags of the items in the "tagsContent" array.
	for (var i = 0; i < tagsContent.length; i++) {
		newItem.tags[i] = tagsContent[i];
	}

	//calls the associateData function to put the data in the correct Zotero field.	
	//English
	associateData(newItem, dataTags, "Title", "title");
	associateData(newItem, dataTags, "Place", "place");
	associateData(newItem, dataTags, "Publisher", "publisher");
	associateData(newItem, dataTags, "Date", "date");
	associateData(newItem, dataTags, "Language", "language");
	associateData(newItem, dataTags, "Pages", "pages");
	associateData(newItem, dataTags, "CIHMno.", "extra");
	associateData(newItem, dataTags, "DocumentSource", "rights");
	associateData(newItem, dataTags, "PermanentLink", "URL");

	//French
	associateData(newItem, dataTags, "Titre", "title");
	associateData(newItem, dataTags, "Langue", "language");
	associateData(newItem, dataTags, "Nombredepages", "pages");
	associateData(newItem, dataTags, "ICMHno", "extra");
	associateData(newItem, dataTags, "Documentoriginal", "rights");
	associateData(newItem, dataTags, "Lienpermanent", "URL");
	//make sure that English language date is marked as en-US so Zotero doesn't get confused
	//about title casing.
	newItem.title = ZU.trimInternal(newItem.title)
	if (newItem.language) {
		if (newItem.language.match(/English|Anglais/)) newItem.language = "en-CA";
	}
	//Saves everything to Zotero.	
	newItem.complete();

}


function doWeb(doc, url) {
	var articles = new Array();
	if (detectWeb(doc, url) == "multiple") {
		var items = new Object();
		var titles = doc.evaluate('//h2/a[contains(@href, "/view")]', doc, null, XPathResult.ANY_TYPE, null);
		var next_title;
		while (next_title = titles.iterateNext()) {
			items[next_title.href] = next_title.textContent;
		}
		Zotero.selectItems(items, function (items) {
			if (!items) {
				return true;
			}
			for (var i in items) {
				articles.push(i);
			}
			Zotero.Utilities.processDocuments(articles, scrape, function () {
				Zotero.done();
			});
		});
	} else {
		scrape(doc, url);
	}
} 

/** BEGIN TEST CASES **/
var testCases = [
	{
		"type": "web",
		"url": "http://eco.canadiana.ca/view/oocihm.44987/2?r=0&s=1",
		"items": [
			{
				"itemType": "book",
				"title": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital : for the year 1857.",
				"creators": [],
				"date": "1857?",
				"extra": "CIHM Number: 44987",
				"language": "eng",
				"libraryCatalog": "Canadiana.ca",
				"place": "Toronto?",
				"publisher": "s.n.",
				"shortTitle": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital",
				"attachments": [],
				"tags": [
					"Hospitals -- Ontario -- Toronto.",
					"Hôpitaux -- Ontario -- Toronto.",
					"Toronto Lying-In Hospital."
				],
				"notes": [],
				"seeAlso": []
			}
		]
	},
	{
		"type": "web",
		"url": "http://eco.canadiana.ca/search?q=Toronto&field=",
		"items": "multiple"
	}
]
/** END TEST CASES **/
the big one 2024-08-27 21:48:20 -05:00			`{`
			`"translatorID": "2d174277-7651-458f-86dd-20e168d2f1f3",`
			`"label": "Canadiana.ca",`
			`"creator": "Adam Crymble, Sebastian Karcher",`
			`"target": "^https?://eco\\.canadiana\\.ca",`
			`"minVersion": "1.0.0b4.r5",`
			`"maxVersion": "",`
			`"priority": 100,`
			`"inRepository": true,`
			`"translatorType": 4,`
			`"browserSupport": "gcsibv",`
			`"lastUpdated": "2012-07-03 16:44:04"`
			`}`

			`function detectWeb(doc, url) {`
			`if (url.match(/\/view\//)) {`
			`return "book";`
			`} else if (url.match(/\/search\?/)) {`
			`return "multiple";`
			`}`
			`}`


			`//Canadiana Translator Coding by Adam Crymble updated and cleaned by Sebastian Karcher`
			`//because the site uses so many random formats for the "Published" field, it's not always perfect. But it works for MOST entries`

			`function associateData(newItem, dataTags, field, zoteroField) {`
			`if (dataTags[field]) {`
			`newItem[zoteroField] = dataTags[field];`
			`}`
			`}`

			`function scrape(doc, url) {`
			`//declaring variables to be used later.`
			`var newItem = new Zotero.Item("book");`
			`var dataTags = new Object();`
			`var fieldTitle;`
			`var tagsContent = new Array();`

			`//these variables tell the program where to find the data we want in the HTML file we're looking at.`
			`//in this case, the data is found in a table.`
			`var xPath1 = '//div[@id="documentRecord"]//table/tbody/tr/th';`
			`var xPath2 = '//div[@id="documentRecord"]//table/tbody/tr/td';`


			`//at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content.`
			`// The 3rd object tells us how many items we've found.`
			`if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {`
			`var xPath1Results = doc.evaluate(xPath1, doc, null, XPathResult.ANY_TYPE, null);`
			`var xPath2Results = doc.evaluate(xPath2, doc, null, XPathResult.ANY_TYPE, null);`
			`var xPathCount = doc.evaluate('count (//div[@id="documentRecord"]//table/tbody/tr/th)', doc, null, XPathResult.ANY_TYPE, null);`
			`}`

			`//At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list`
			`//with the first item in the second list, and on until the end.`
			`//If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author"`
			`if (doc.evaluate('//div[@id="documentRecord"]//table/tbody/tr/th', doc, null, XPathResult.ANY_TYPE, null)) {`
			`for (i = 0; i < xPathCount.numberValue; i++) {`
			`fieldTitle = xPath1Results.iterateNext().textContent.replace(/\s+/g, '');`
			`//gets the author's name without cleaning it away using cleanTags.`
			`if (fieldTitle == "Creator" \|\| fieldTitle == "Créateur") {`
			`fieldTitle = "PrincipalAuthor";`
			`dataTags[fieldTitle] = (xPath2Results.iterateNext().textContent);`
			`var authorName = dataTags["PrincipalAuthor"];`
			`newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor"], "author"));`

			`//Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters.`
			`} else if (fieldTitle == "Adressebibliographique" \|\| fieldTitle == "Published") {`

			`fieldTitle = "Imprint";`
			`dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent);`

			`var justDate = dataTags["Imprint"].match(/\d+[-\?\s\d]*/)[0];`
			`if (justDate) dataTags["Date"] = justDate;`
			`var place = dataTags["Imprint"].match(/.+?:/)[0];`
			`if (place) dataTags["Place"] = place.trim().replace(/[\[\]\:]*/g, "")`
			`var publisher = dataTags["Imprint"].match(/\:[^,\d]+/)[0];`
			`if (publisher) dataTags["Publisher"] = publisher.replace(/[\[\]:\?]/g, "").trim();`

			`// determines how many tags there will be, pushes them into an array and clears away whitespace.`
			`} else if (fieldTitle == "Subject" \|\| fieldTitle == "Sujet") {`
			`tagsContent = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.trim());`
			`tagsContent = tagsContent.replace(/\s\n+\s/g, "\|\|").split(/\\|\\|/);`
			`Z.debug(tagsContent)`

			`}`
			`//Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader.`
			`else if (fieldTitle == "Identifier" \|\| fieldTitle == "Identificateur") {`
			`fieldTitle = "CIHMno.";`
			`dataTags[fieldTitle] = xPath2Results.iterateNext().textContent;`

			`dataTags["CIHMno."] = "CIHM Number: " + dataTags["CIHMno."].trim();`
			`} else {`

			`dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s\|\s$/g, ''));`

			`}`
			`}`
			`}`

			`//makes tags of the items in the "tagsContent" array.`
			`for (var i = 0; i < tagsContent.length; i++) {`
			`newItem.tags[i] = tagsContent[i];`
			`}`

			`//calls the associateData function to put the data in the correct Zotero field.`
			`//English`
			`associateData(newItem, dataTags, "Title", "title");`
			`associateData(newItem, dataTags, "Place", "place");`
			`associateData(newItem, dataTags, "Publisher", "publisher");`
			`associateData(newItem, dataTags, "Date", "date");`
			`associateData(newItem, dataTags, "Language", "language");`
			`associateData(newItem, dataTags, "Pages", "pages");`
			`associateData(newItem, dataTags, "CIHMno.", "extra");`
			`associateData(newItem, dataTags, "DocumentSource", "rights");`
			`associateData(newItem, dataTags, "PermanentLink", "URL");`

			`//French`
			`associateData(newItem, dataTags, "Titre", "title");`
			`associateData(newItem, dataTags, "Langue", "language");`
			`associateData(newItem, dataTags, "Nombredepages", "pages");`
			`associateData(newItem, dataTags, "ICMHno", "extra");`
			`associateData(newItem, dataTags, "Documentoriginal", "rights");`
			`associateData(newItem, dataTags, "Lienpermanent", "URL");`
			`//make sure that English language date is marked as en-US so Zotero doesn't get confused`
			`//about title casing.`
			`newItem.title = ZU.trimInternal(newItem.title)`
			`if (newItem.language) {`
			`if (newItem.language.match(/English\|Anglais/)) newItem.language = "en-CA";`
			`}`
			`//Saves everything to Zotero.`
			`newItem.complete();`

			`}`


			`function doWeb(doc, url) {`
			`var articles = new Array();`
			`if (detectWeb(doc, url) == "multiple") {`
			`var items = new Object();`
			`var titles = doc.evaluate('//h2/a[contains(@href, "/view")]', doc, null, XPathResult.ANY_TYPE, null);`
			`var next_title;`
			`while (next_title = titles.iterateNext()) {`
			`items[next_title.href] = next_title.textContent;`
			`}`
			`Zotero.selectItems(items, function (items) {`
			`if (!items) {`
			`return true;`
			`}`
			`for (var i in items) {`
			`articles.push(i);`
			`}`
			`Zotero.Utilities.processDocuments(articles, scrape, function () {`
			`Zotero.done();`
			`});`
			`});`
			`} else {`
			`scrape(doc, url);`
			`}`
			`}`

			`/ BEGIN TEST CASES /`
			`var testCases = [`
			`{`
			`"type": "web",`
			`"url": "http://eco.canadiana.ca/view/oocihm.44987/2?r=0&s=1",`
			`"items": [`
			`{`
			`"itemType": "book",`
			`"title": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital : for the year 1857.",`
			`"creators": [],`
			`"date": "1857?",`
			`"extra": "CIHM Number: 44987",`
			`"language": "eng",`
			`"libraryCatalog": "Canadiana.ca",`
			`"place": "Toronto?",`
			`"publisher": "s.n.",`
			`"shortTitle": "Toronto Lying-In Hospital. Report of the Toronto Lying-In Hospital",`
			`"attachments": [],`
			`"tags": [`
			`"Hospitals -- Ontario -- Toronto.",`
			`"Hôpitaux -- Ontario -- Toronto.",`
			`"Toronto Lying-In Hospital."`
			`],`
			`"notes": [],`
			`"seeAlso": []`
			`}`
			`]`
			`},`
			`{`
			`"type": "web",`
			`"url": "http://eco.canadiana.ca/search?q=Toronto&field=",`
			`"items": "multiple"`
			`}`
			`]`
			`/ END TEST CASES /`