zotero/translators/National Archives of Austra...

523 lines
15 KiB
JavaScript
Raw Normal View History

2024-08-27 21:48:20 -05:00
{
"translatorID": "50a4cf3f-92ef-4e9f-ab15-815229159b16",
"label": "National Archives of Australia",
"creator": "Tim Sherratt, Aurimas Vinckevicius",
"target": "^https?://recordsearch\\.naa\\.gov\\.au/",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2021-02-13 11:39:57"
}
/*
National Archives of Australia Translator
Copyright (C) 2011 Tim Sherratt (tim@discontents.com.au, @wragge)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
var multiplesRE = /\/(SeriesListing|ItemsListing|PhotoSearchSearchResults|PhotoListing)\.asp/i;
var singleItemRE = /\/(SeriesDetail|ItemDetail|PhotoSearchItemDetail|ViewImage|PhotoDetail)\.asp/i;
function detectWeb(doc, url) {
// RecordSearch - items and series - or Photosearch results
if (multiplesRE.test(url)) {
return getSearchResults(doc, url, true) ? "multiple" : false;
}
else if (singleItemRE.test(url)) {
return "manuscript";
}
return false;
}
function doWeb(doc, url) {
if (detectWeb(doc, url) == "multiple") {
Zotero.selectItems(getSearchResults(doc, url), function (items) {
if (!items) {
return;
}
var urls = [];
for (var i in items) {
urls.push(i);
}
ZU.processDocuments(urls, scrape);
});
}
else {
scrape(doc, url);
}
}
function getSearchResults(doc, url, checkOnly) {
var title,
link,
results,
table,
items = {},
found = false;
var m = url.match(multiplesRE);
if (!m) return false;
switch (m[1].toLowerCase()) {
case 'serieslisting':
table = doc.getElementsByClassName('SearchResults')[0];
if (!table) return false;
results = ZU.xpath(doc, '//table[@class="SearchResults"]//tr[@class!="header"]');
for (let i = 0; i < results.length; i++) {
title = results[i].getElementsByTagName('td')[2];
if (!title) continue;
link = getCleanLinkFromCell(title);
if (!link) continue;
if (checkOnly) return true;
found = true;
items[link] = ZU.trimInternal(title.textContent);
}
break;
case 'itemslisting':
table = doc.getElementsByClassName('SearchResults')[0];
if (!table) return false;
results = ZU.xpath(doc, '//table[@class="SearchResults"]//tr[@class!="header"]');
for (let i = 0; i < results.length; i++) {
title = results[i].getElementsByTagName('td')[3];
if (!title) continue;
link = getCleanLinkFromCell(title);
if (!link) continue;
if (checkOnly) return true;
found = true;
items[link] = ZU.trimInternal(title.textContent);
}
break;
case 'photolisting':
// Try the list view first
results = ZU.xpath(doc, '//table[contains(@id, "PhotoResults")]//table[@class="greyboxdetail"]');
var view = 'list';
// If no results try grid view
if (!results.length) {
results = ZU.xpath(doc, '//table[contains(@id, "PhotoResults")]/tbody/tr/td[@title]');
view = 'grid';
}
for (let i = 0; i < results.length; i++) {
if (view == 'list') {
title = results[i].getElementsByTagName('td')[1];
if (!title) continue;
link = getCleanLinkFromCell(title);
if (!link) continue;
title = title.textContent;
}
else {
title = results[i].getAttribute('title');
if (!title) continue;
link = results[i].getElementsByTagName('a')[0];
if (!link) continue;
link = link.href;
}
if (checkOnly) return true;
found = true;
items[link] = ZU.trimInternal(title);
}
break;
}
return found ? items : false;
}
function getHost(url) {
return url.match(/^https?:\/\/[^/]+/)[0];
}
function scrape(doc, url) {
var m = url.match(singleItemRE);
if (!m) return;
var item;
switch (m[1].toLowerCase()) {
case 'viewimage':
item = scrapeImage(doc, url);
break;
case 'photosearchitemdetail':
item = scrapePhoto(doc, url);
break;
case 'photodetail':
item = scrapePhoto(doc, url);
break;
case 'seriesdetail':
item = scrapeSeries(doc, url);
break;
case 'itemdetail':
item = scrapeItem(doc, url);
break;
default:
throw new Error("Unknown page type: " + m[1]);
}
if (item) {
item.archive = item.libraryCatalog = "National Archives of Australia";
item.complete();
}
}
function createPersistentLink(id, linkType) {
// Create persistent (as possible) links into RS
return 'https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=' + linkType + '&Number=' + id;
}
function stripSeries(series) {
// Return cleaned contents of series cells, removing the extra notice
return series.substr(0, series.search(/(Click|All)/));
}
function getCleanLinkFromCell(cell) {
// Get a url from a cell that has an onclick attribute.
var link = cell.getAttribute('onclick');
link = link.substring(link.indexOf("'"));
return 'https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/' + ZU.superCleanString(link);
}
/**
* Series/Item scraping
*/
function parseItemTable(table) {
var meta = {},
rows = table.getElementsByTagName('tr');
for (let i = 0; i < rows.length; i++) {
var td = rows[i].getElementsByTagName('td');
if (td.length != 2) continue;
var label = ZU.trimInternal(td[0].textContent).toLowerCase();
var data;
if (label == 'series note') {
// grab the full note, instead of the truncation
var notes = table.ownerDocument.getElementById('notes');
if (notes && notes.children.length == 2
&& (notes = notes.getElementsByTagName('pre')[0])
) {
data = notes.textContent;
}
else {
data = ZU.trimInternal(td[1].textContent);
}
}
else if (label == 'related searches') {
var childrens = td[1].getElementsByTagName('a');
data = [];
for (let j = 0; j < childrens.length; j++) {
data.push(childrens[i].textContent.trim());
}
}
else {
data = ZU.trimInternal(td[1].textContent);
}
if (!label || !data) continue;
meta[label] = data;
}
return meta;
}
function scrapeItem(doc) {
var meta = parseItemTable(ZU.xpath(doc, '//div[@class="detailsTable"]//tbody')[0]);
if (!meta) return null;
var item = new Zotero.Item('manuscript');
item.title = meta.title;
item.type = 'item';
item.date = meta['contents date range'];
item.place = meta.location;
var series = stripSeries(meta['series number']);
var control = meta['control symbol'];
item.archiveLocation = series + ', ' + control;
item['access status'] = meta['access status'];
item['access decision'] = meta['date of decision'];
var barcode = encodeURIComponent(meta['item id']);
item.url = createPersistentLink(barcode, 'I');
if (meta['item notes']) {
item.notes.push(meta['item notes']);
}
// Add link to digital copy if available
if (ZU.xpath(doc, '//div[contains(@id, "_pnlDigitalCopy")]/a[contains(normalize-space(text()), "View digital copy")]').length) {
// item.attachments.push({
// title: "Digital copy at National Archives of Australia",
// url: 'https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B=' + barcode,
// mimeType: 'text/html',
// snapshot: false
// });
item.attachments.push({
title: 'National Archives of Australia item PDF',
url: 'https://recordsearch.naa.gov.au/SearchNRetrieve/NAAMedia/ViewPDF.aspx?B=' + barcode + '&D=D',
mimeType: 'application/pdf'
});
}
return item;
}
function scrapeSeries(doc) {
var meta = parseItemTable(ZU.xpath(doc, '//div[@class="detailsTable"]//tbody')[0]);
if (!meta) return null;
var item = new Zotero.Item('manuscript');
item.title = meta.title;
item.type = 'series';
item.date = meta['contents dates'];
// Split multiple holdings with semi-colon
if (meta['quantity and location']) {
item.place = meta['quantity and location'].replace(/([A-Z]{1})([0-9]{1})/g, '$1; $2');
}
item.format = meta['predominant physical format'];
item.abstractNote = meta['series note'];
item.archiveLocation = meta['series number'];
item['number of items'] = stripSeries(meta['items in this series on recordsearch']);
var seriesNumber = encodeURIComponent(meta['series number']);
item.url = createPersistentLink(seriesNumber, 'S');
// Agencies recording into this series
var agencies = ZU.xpath(doc, '//div[@id="provenanceRecording"]//div[@class="linkagesInfo"]');
for (let i = 0; i < agencies.length; i++) {
item.creators.push({
lastName: ZU.trimInternal(agencies[i].textContent),
creatorType: "author",
fieldMode: 1
});
}
return item;
}
/**
* ViewImage
*/
function getImageField(doc, label) {
label = 'lbl' + label;
var data = doc.getElementById(label);
if (!data) return '';
return ZU.trimInternal(data.textContent);
}
function scrapeImage(doc, url) {
var image = doc.getElementById('divImage'),
singleView = image && image.offsetParent; // check if visble
var total = doc.getElementsByName('hTotalPages')[0],
page = doc.getElementsByName('hCurrentPage')[0];
page = page && Number.parseInt(page.value);
total = total && Number.parseInt(total.value);
var item = new Zotero.Item('manuscript');
item.title = getImageField(doc, 'Title');
if (singleView && page && total != 1) {
item.title += ' [' + page + (total ? ' of ' + total : '') + ']';
}
item.date = getImageField(doc, 'ContentsDate');
item.archiveLocation = getImageField(doc, 'Series') + ', ' + getImageField(doc, 'ControlSymbol');
var barcode = getImageField(doc, 'Barcode');
item.url = getHost(url) + '/SearchNRetrieve/Interface/ViewImage.aspx?'
+ 'B=' + encodeURIComponent(barcode)
+ (singleView ? '&S=' + page : '');
var imageUrlBase = '/SearchNRetrieve/NAAMedia/ShowImage.aspx?T=P&B=' + encodeURIComponent(barcode);
// In single view, save current image. In multiples view, save all
// (unless more than 10, then don't save at all)
if ((singleView && page) || (!singleView && total && total < 11)) {
var from = singleView ? page - 1 : 0, to = singleView ? page : total;
// var includeCount = total != 1;
for (let i = from; i < to; i++) {
item.attachments.push({
title: 'Folio'
+ (total != 1
? ' ' + (i + 1) + (total ? ' of ' + total : '')
: '')
+ ' [' + item.archiveLocation + ']',
url: imageUrlBase + '&S=' + page,
mimeType: 'image/jpeg'
});
}
}
return item;
}
/*
* PhotoSearch
*/
function scrapePhoto(doc) {
var table = ZU.xpath(doc, '//table[contains(@id, "PhotoDetailTable")]//table[contains(@id, "GreyBoxTable")]/tbody')[0];
if (!table) return null;
var meta = parseItemTable(table);
var item = new Zotero.Item('manuscript');
item.title = meta.title;
item.type = 'photograph';
item.date = meta.date || meta['date range'];
item.place = meta.location || meta['item location'];
item.archiveLocation = meta['series/control symbol'];
item.url = createPersistentLink(encodeURIComponent(meta['item id']), 'PSI');
if (meta['related searches']) {
item.tags = meta['related searches'];
}
var imageurl = ZU.xpathText(doc, '//table[contains(@id, "PhotoDetailTable")]//img/@src');
if (imageurl) {
imageurl = imageurl.replace(/([?&])T=[^&]*(?:&|$)/g, '$1') + '&T=P'; // T=P better quality
item.attachments.push({
title: 'Digital image of NAA: ' + item.archiveLocation,
url: imageurl,
mimeType: 'image/jpeg' // Seems like that is generally the case
});
}
return item;
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B=12048&S=4",
"defer": true,
"items": [
{
"itemType": "manuscript",
"title": "Carl Gustav Opitz - Naturalization [4 of 7]",
"creators": [],
"date": "1911 - 1912",
"archive": "National Archives of Australia",
"archiveLocation": "A1, 1911/18393",
"libraryCatalog": "National Archives of Australia",
"url": "https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B=12048&S=4",
"attachments": [
{
"title": "Folio 4 of 7 [A1, 1911/18393]",
"mimeType": "image/jpeg"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=PSI&Number=11404473",
"defer": true,
"items": [
{
"itemType": "manuscript",
"title": "Antarctica - A Gentoo penguin on Heard Island guards its nesting mate [photographic image]. 1 photographic negative: b&w, acetate",
"creators": [],
"date": "1949 - 1949",
"archive": "National Archives of Australia",
"archiveLocation": "A1200, L11912",
"libraryCatalog": "National Archives of Australia",
"manuscriptType": "photograph",
"place": "Canberra",
"shortTitle": "Antarctica - A Gentoo penguin on Heard Island guards its nesting mate [photographic image]. 1 photographic negative",
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=PSI&Number=11404473",
"attachments": [
{
"title": "Digital image of NAA: A1200, L11912",
"mimeType": "image/jpeg"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"defer": true,
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=I&Number=149309",
"items": [
{
"itemType": "manuscript",
"title": "The Wragge Estate. Property for sale.",
"creators": [],
"date": "1917 - 1917",
"archive": "National Archives of Australia",
"archiveLocation": "A2479, 17/1306",
"libraryCatalog": "National Archives of Australia",
"manuscriptType": "item",
"place": "Canberra",
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=I&Number=149309",
"attachments": [
{
"title": "National Archives of Australia item PDF",
"mimeType": "application/pdf"
}
],
"tags": [],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=S&Number=A2",
"defer": true,
"items": [
{
"itemType": "manuscript",
"title": "Correspondence files, annual single number series",
"creators": [
{
"lastName": "CA 588, Prime Minister's Office",
"creatorType": "author",
"fieldMode": 1
},
{
"lastName": "CA 12, Prime Minister's Department",
"creatorType": "author",
"fieldMode": 1
}
],
"date": "01 Jan 1895 - 31 Dec 1926",
"abstractNote": "Name Index Volumes, 1917-1951 (ex CP 602/1, AS12/8)",
"archive": "National Archives of Australia",
"archiveLocation": "A2",
"libraryCatalog": "National Archives of Australia",
"manuscriptType": "series",
"place": "35.74 metres held in ACT",
"url": "https://recordsearch.naa.gov.au/scripts/AutoSearch.asp?O=S&Number=A2",
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
}
]
}
]
/** END TEST CASES **/