From 1d944e69564ef099ae6eed1d9e9f4d212e77891a Mon Sep 17 00:00:00 2001
From: Dan Stillman <dstillman@zotero.org>
Date: Thu, 26 Jul 2018 02:47:05 -0400
Subject: [PATCH] Move web translation back to /web and implement other /search
 modes

I had consolidated URL handling into /search, but it didn't really make
sense. Both web translation and text search can return a 300, but with
different responses and different required handling. So clients should
just parse out URLs, send those to /web (now as plain text), and send
everything else to /search like with v1.

Closes #6, closes #7
---
 src/searchEndpoint.js                   |  99 +++--
 src/server.js                           |   2 +
 src/textSearch.js                       | 505 ++++++++++++++++++++++++
 src/webEndpoint.js                      |  97 +++++
 src/{searchSession.js => webSession.js} |  14 +-
 test/server_search_test.js              |  68 ----
 test/web_test.js                        |  66 ++++
 translate_search                        |   9 +
 translate_url                           |   2 +-
 translate_url_multiple                  |   2 +-
 10 files changed, 735 insertions(+), 129 deletions(-)
 create mode 100644 src/textSearch.js
 create mode 100644 src/webEndpoint.js
 rename src/{searchSession.js => webSession.js} (98%)
 delete mode 100644 test/server_search_test.js
 create mode 100644 test/web_test.js
 create mode 100755 translate_search

diff --git a/src/searchEndpoint.js b/src/searchEndpoint.js
index 6fc80e0..53cf121 100644
--- a/src/searchEndpoint.js
+++ b/src/searchEndpoint.js
@@ -23,77 +23,74 @@
     ***** END LICENSE BLOCK *****
 */
 
-const SearchSession = require('./searchSession');
-
-// Timeout for select requests, in seconds
-//const SELECT_TIMEOUT = 120;
-const SELECT_TIMEOUT = 15;
-const sessionsWaitingForSelection = {};
+const config = require('config');
+const Translate = require('./translation/translate');
+const TextSearch = require('./textSearch');
 
 var SearchEndpoint = module.exports = {
-	requestsSinceGC: 0,
-	
 	handle: async function (ctx, next) {
 		ctx.assert(ctx.is('text/plain') || ctx.is('json'), 415);
 		
-		setTimeout(() => {
-			this.gc();
-		});
-		
 		var data = ctx.request.body;
 		
 		if (!data) {
 			ctx.throw(400, "POST data not provided\n");
 		}
 		
-		// If follow-up request, retrieve session and update context
-		var query;
-		var session;
-		if (typeof data == 'object') {
-			let sessionID = data.session;
-			if (!sessionID) {
-				ctx.throw(400, "'session' not provided");
-			}
-			session = sessionsWaitingForSelection[sessionID];
-			if (!session) {
-				ctx.throw(400, "Session not found");
-			}
-			delete sessionsWaitingForSelection[sessionID];
-			session.ctx = ctx;
-			session.next = next;
-			session.data = data;
-		}
-		else {
-			session = new SearchSession(ctx, next, data);
+		// Look for DOI, ISBN, etc.
+		var identifiers = Zotero.Utilities.Internal.extractIdentifiers(data);
+		
+		// Use PMID only if it's the only text in the query
+		if (identifiers.length && identifiers[0].PMID && identifiers[0].PMID !== data.trim()) {
+			identifiers = [];
 		}
 		
-		// URL
-		if (typeof data == 'object' || data.match(/^https?:/)) {
-			await session.handleURL();
-			
-			// Store session if returning multiple choices
-			if (ctx.response.status == 300) {
-				sessionsWaitingForSelection[session.id] = session;
-			}
+		// Text search
+		if (!identifiers.length) {
+			await TextSearch.handle(ctx, next);
 			return;
 		}
 		
-		ctx.throw(501);
+		this.handleIdentifier(ctx, identifiers[0]);
 	},
 	
 	
-	/**
-	 * Perform garbage collection every 10 requests
-	 */
-	gc: function () {
-		if ((++this.requestsSinceGC) == 3) {
-			for (let i in sessionsWaitingForSelection) {
-				let session = sessionsWaitingForSelection[i];
-				if (session.started && Date.now() >= session.started + SELECT_TIMEOUT * 1000) {
-					delete sessionsWaitingForSelection[i];
-				}
+	handleIdentifier: async function (ctx, identifier) {
+		// Identifier
+		try {
+			var translate = new Translate.Search();
+			translate.setIdentifier(identifier);
+			let translators = await translate.getTranslators();
+			if (!translators.length) {
+				ctx.throw(501, "No translators available", { expose: true });
+			}
+			translate.setTranslator(translators);
+			
+			var items = await translate.translate({
+				libraryID: false
+			});
+		}
+		catch (e) {
+			if (e == translate.ERROR_NO_RESULTS) {
+				ctx.throw(501, e, { expose: true });
 			}
-			this.requestsSinceGC = 0;
+			
+			Zotero.debug(e, 1);
+			ctx.throw(
+				500,
+				"An error occurred during translation. "
+					+ "Please check translation with the Zotero client.",
+				{ expose: true }
+			);
 		}
+		
+		// Translation can return multiple items (e.g., a parent item and notes pointing to it),
+		// so we have to return an array with keyed items
+		var newItems = [];
+		items.forEach(item => {
+			newItems.push(...Zotero.Utilities.itemToAPIJSON(item));
+		});
+		
+		ctx.response.body = newItems;
 	}
 };
diff --git a/src/server.js b/src/server.js
index 8ee41e4..6ede225 100644
--- a/src/server.js
+++ b/src/server.js
@@ -33,9 +33,11 @@ require('./zotero');
 const Debug = require('./debug');
 const Translators = require('./translators');
 const SearchEndpoint = require('./searchEndpoint');
+const WebEndpoint = require('./webEndpoint');
 
 const app = module.exports = new Koa();
 app.use(bodyParser({ enableTypes: ['text', 'json']}));
+app.use(_.post('/web', WebEndpoint.handle.bind(WebEndpoint)));
 app.use(_.post('/search', SearchEndpoint.handle.bind(SearchEndpoint)));
 
 Debug.init(1);
diff --git a/src/textSearch.js b/src/textSearch.js
new file mode 100644
index 0000000..90dcdb3
--- /dev/null
+++ b/src/textSearch.js
@@ -0,0 +1,505 @@
+/*
+    ***** BEGIN LICENSE BLOCK *****
+    
+    Copyright © 2018 Center for History and New Media
+                     George Mason University, Fairfax, Virginia, USA
+                     https://www.zotero.org
+    
+    This file is part of Zotero.
+    
+    Zotero is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    
+    Zotero is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+    
+    You should have received a copy of the GNU Affero General Public License
+    along with Zotero.  If not, see <http://www.gnu.org/licenses/>.
+    
+    ***** END LICENSE BLOCK *****
+*/
+
+const config = require('config');
+const HTTP = require('./http');
+
+module.exports = {
+	/**
+	 * Handle text search
+	 *
+	 * @return {Promise<undefined>}
+	 */
+	handle: async function (ctx, next) {
+		// If identifier-search server isn't available, return 501
+		if (!config.has('identifierSearchURL') || !config.get("identifierSearchURL")) {
+			ctx.throw(501, "No identifiers found", { expose: true });
+		}
+		
+		var data = ctx.request.body;
+		
+		var result = await search(
+			data,
+			ctx.query && ctx.query.start
+		);
+		
+		// Throw selection if two or more items are found, or the selection flag is marked
+		if (result.items.length >= 2 || result.items.length >= 1 && result.select) {
+			let newItems = {};
+			
+			for (let item of result.items) {
+				let DOI = item.DOI;
+				let ISBN = item.ISBN;
+				
+				if (!DOI && item.extra) {
+					let m = item.extra.match(/DOI: (.*)/);
+					if (m) DOI = m[1];
+				}
+				
+				if (!ISBN && item.extra) {
+					let m = item.extra.match(/ISBN: (.*)/);
+					if (m) ISBN = m[1];
+				}
+				
+				let identifier;
+				// DOI has a priority over ISBN for items that have both
+				if (DOI) {
+					identifier = DOI;
+				}
+				else if (item.ISBN) {
+					identifier = ISBN.split(' ')[0];
+				}
+				
+				newItems[identifier] = {
+					itemType: item.itemType,
+					title: item.title,
+					description: formatDescription(item),
+				};
+			}
+			
+			let headers = {};
+			// If there were more results, include a link to the next result set
+			if (result.next) {
+				headers.Link = `</search?start=${result.next}>; rel="next"`;
+			}
+			ctx.response.status = 300;
+			ctx.response.headers = headers;
+			
+			
+			//
+			// TODO: Differentiate from web request 300
+			//
+			
+			
+			ctx.response.body = newItems;
+			return;
+		}
+		
+		if (result.items.length === 1) {
+			ctx.response.body = Zotero.Utilities.itemToAPIJSON(result.items[0]);
+			return;
+		}
+		
+		ctx.response.body = [];
+	}
+};
+
+
+async function search(query, start) {
+	const numResults = 3;
+	let identifiers;
+	let moreResults = false;
+	try {
+		let xmlhttp = await HTTP.request(
+			"GET",
+			config.get("identifierSearchURL") + encodeURIComponent(query),
+			{
+				timeout: 15000
+			}
+		);
+		identifiers = JSON.parse(xmlhttp.responseText);
+		
+		// If passed a start= parameter, skip ahead
+		let startPos = 0;
+		if (start) {
+			for (let i = 0; i < identifiers.length; i++) {
+				if (identifierToToken(identifiers[i]) == start) {
+					startPos = i + 1;
+					break;
+				}
+			}
+		}
+		
+		if (identifiers.length > startPos + numResults + 1) {
+			moreResults = true;
+		}
+		
+		identifiers = identifiers.slice(startPos);
+	} catch(e) {
+		Zotero.debug(e, 1);
+		return {select: false, items: []};
+	}
+	
+	let items = [];
+	let nextLastIdentifier = null;
+	for (let identifier of identifiers) {
+		let translate = new Zotero.Translate.Search();
+		try {
+			translate.setIdentifier(identifier);
+			let translators = await translate.getTranslators();
+			if (!translators.length) {
+				continue;
+			}
+			translate.setTranslator(translators);
+			
+			let newItems = await translate.translate({
+				libraryID: false
+			});
+
+			if (newItems.length) {
+				let seq = getLongestCommonSequence(newItems[0].title, query);
+				if (seq.length >= 6 && seq.split(' ').length >= 2) {
+					items.push(newItems[0]);
+					// Keep track of last identifier if we're limiting results
+					if (moreResults) {
+						nextLastIdentifier = identifier;
+					}
+					if (items.length == numResults) {
+						break;
+					}
+				}
+			}
+		}
+		catch (e) {
+			if (e !== translate.ERROR_NO_RESULTS) {
+				Zotero.debug(e, 1);
+			}
+		}
+	}
+	
+	return {
+		// Force item selection, even for a single item
+		select: true,
+		items,
+		next: nextLastIdentifier ? identifierToToken(nextLastIdentifier) : null
+	};
+	
+	// // Query Crossref and LoC/GBV in parallel to respond faster to the client
+	// let [crossrefItems, libraryItems] = await Promise.all([queryCrossref(query), queryLibraries(query)]);
+	//
+	// // Subtract book reviews from Crossref
+	// crossrefItems = subtractCrossrefItems(crossrefItems, libraryItems);
+	//
+	// let items = crossrefItems.concat(libraryItems);
+	//
+	// // Filter out too fuzzy items, by comparing item title (and other metadata) against query
+	// return await filterResults(items, query);
+}
+
+
+function formatDescription(item) {
+	let parts = [];
+	
+	let authors = [];
+	for (let creator of item.creators) {
+		if (creator.creatorType === 'author' && creator.lastName) {
+			authors.push(creator.lastName);
+			if (authors.length === 3) break;
+		}
+	}
+	
+	if(authors.length) parts.push(authors.join(', '));
+	
+	if (item.date) {
+		let m = item.date.toString().match(/[0-9]{4}/);
+		if (m) parts.push(m[0]);
+	}
+	
+	if(item.publicationTitle) {
+		parts.push(item.publicationTitle);
+	} else if(item.publisher) {
+		parts.push(item.publisher);
+	}
+	
+	return parts.join(' \u2013 ');
+}
+
+
+function subtractCrossrefItems(crossrefItems, libraryItems) {
+	let items = [];
+	for(let crossrefItem of crossrefItems) {
+		// Keep books and book sections
+		if(['book', 'bookSection'].includes(crossrefItem.itemType)) {
+			items.push(crossrefItem);
+			continue;
+		}
+		
+		let crossrefTitle = crossrefItem.title;
+		// Remove all tags
+		crossrefTitle = crossrefTitle.replace(/<\/?\w+[^<>]*>/gi, '');
+		crossrefTitle = crossrefTitle.replace(/:/g, ' ');
+		
+		// Normalize title, split to words, filter out empty array elements
+		crossrefTitle = normalize(crossrefTitle).split(' ').filter(x => x).join(' ');
+		
+		let found = false;
+		for(let libraryItem of libraryItems) {
+			let libraryTitle = libraryItem.title;
+			// Remove all tags
+			libraryTitle = libraryTitle.replace(/<\/?\w+[^<>]*>/gi, '');
+			libraryTitle = libraryTitle.replace(/:/g, ' ');
+			
+			// Normalize title, split to words, filter out empty array elements
+			libraryTitle = normalize(libraryTitle).split(' ').filter(x => x).join(' ');
+			
+			if(crossrefTitle.includes(libraryTitle)) {
+				found = true;
+				break;
+			}
+		}
+		
+		if(!found) {
+			items.push(crossrefItem);
+		}
+	}
+	
+	return items;
+}
+
+async function queryCrossref(query) {
+	let items = [];
+	try {
+		let translate = new Zotero.Translate.Search();
+		// Crossref REST
+		translate.setTranslator("0a61e167-de9a-4f93-a68a-628b48855909");
+		translate.setSearch({query});
+		items = await translate.translate({libraryID: false});
+	}
+	catch (e) {
+		Zotero.debug(e, 2);
+	}
+	return items;
+}
+
+/**
+ * Queries LoC and if that fails, queries GBV
+ */
+async function queryLibraries(query) {
+	let items = [];
+	try {
+		let translate = new Zotero.Translate.Search();
+		// Library of Congress ISBN
+		translate.setTranslator("c070e5a2-4bfd-44bb-9b3c-4be20c50d0d9");
+		translate.setSearch({query});
+		items = await translate.translate({libraryID: false});
+	}
+	catch (e) {
+		Zotero.debug(e, 2);
+		try {
+			let translate = new Zotero.Translate.Search();
+			// Gemeinsamer Bibliotheksverbund ISBN
+			translate.setTranslator("de0eef58-cb39-4410-ada0-6b39f43383f9");
+			translate.setSearch({query});
+			items = await translate.translate({libraryID: false});
+		}
+		catch (e) {
+			Zotero.debug(e, 2);
+		}
+	}
+	return items;
+}
+
+/**
+ * Decomposes all accents and ligatures,
+ * filters out symbols that aren't space or alphanumeric,
+ * and lowercases alphabetic symbols.
+ */
+function normalize(text) {
+	let rx = XRegExp('[^\\pL 0-9]', 'g');
+	text = XRegExp.replace(text, rx, '');
+	text = text.normalize('NFKD');
+	text = XRegExp.replace(text, rx, '');
+	text = text.toLowerCase();
+	return text;
+}
+
+/**
+ * Checks if a given word equals to any of the authors' names
+ */
+function hasAuthor(authors, word) {
+	return authors.some(author => {
+		return (author.firstName && normalize(author.firstName).split(' ').includes(word))
+			|| (author.lastName && normalize(author.lastName).split(' ').includes(word));
+	});
+}
+
+/**
+ * Tries to find the longest common words sequence between
+ * item title and query text. Query text must include title (or part of it)
+ * from the beginning. If there are leftover query words, it tries to
+ * validate them against item metadata (currently only authors and year)
+ */
+async function filterResults(items, query) {
+	let filteredItems = [];
+	let select = false;
+
+	// Normalize query, split to words, filter out empty array elements
+	let queryWords = normalize(query).split(' ').filter(x => x);
+	
+	for (let item of items) {
+		let DOI = item.DOI;
+		let ISBN = item.ISBN;
+		
+		if (!DOI && item.extra) {
+			let m = item.extra.match(/DOI: (.*)/);
+			if (m) DOI = m[1];
+		}
+		
+		if (!ISBN && item.extra) {
+			let m = item.extra.match(/ISBN: (.*)/);
+			if (m) ISBN = m[1];
+		}
+		
+		if (!DOI && !ISBN) continue;
+		let title = item.title;
+		// Remove all tags
+		title = title.replace(/<\/?\w+[^<>]*>/gi, '');
+		title = title.replace(/:/g, ' ');
+		
+		// Normalize title, split to words, filter out empty array elements
+		let titleWords = normalize(title).split(' ').filter(x => x);
+		
+		let longestFrom = 0;
+		let longestLen = 0;
+		
+		// Finds the longest common words sequence between query text and item.title
+		for (let i = 0; i < queryWords.length; i++) {
+			for (let j = queryWords.length; j > 0; j--) {
+				let a = queryWords.slice(i, j);
+				for (let k = 0; k < titleWords.length - a.length + 1; k++) {
+					let b = titleWords.slice(k, a.length + k);
+					if (a.length && b.length && a.join(' ') === b.join(' ')) {
+						if (a.length > longestLen) {
+							longestFrom = i;
+							longestLen = b.length;
+						}
+					}
+				}
+			}
+		}
+		
+		// At least two common words sequence must be found between query and title
+		if (longestLen < 1) continue;
+		
+		// Longest common sequence of words
+		let foundPart = queryWords.slice(longestFrom, longestLen);
+		
+		// Remaining words
+		let rems = queryWords.slice(0, longestFrom);
+		rems = rems.concat(queryWords.slice(longestLen));
+		
+		// If at least one remaining word is left, it tries to compare it against item metadata.
+		// Otherwise the whole query text is found in the title, and we have a full match
+		if (rems.length) {
+			let foundAuthor = false;
+			let needYear = false;
+			let foundYear = false;
+			
+			// Still remaining words
+			let rems2 = [];
+			
+			for (let rem of rems) {
+				// Ignore words
+				if (['the', 'a', 'an'].indexOf(rem) >= 0) continue;
+				
+				// If the remaining word has at least 2 chars and exists in metadata authors
+				if (rem.length >= 2 && hasAuthor(item.creators, rem)) {
+					foundAuthor = true;
+					continue;
+				}
+				
+				// If the remaining word is a 4 digit number (year)
+				if (/^[0-9]{4}$/.test(rem)) {
+					needYear = true;
+					
+					if (item.date) {
+						// If the remaining word exists in the item date
+						let m = item.date.toString().match(/[0-9]{4}/);
+						if (m && m[0] === rem) {
+							foundYear = true;
+							continue;
+						}
+					}
+				}
+				
+				// Push the word that is still remaining
+				rems2.push(rem);
+			}
+			
+			// If a year exists in the query, but is not matched to the item date
+			if (needYear && !foundYear) continue;
+			
+			// If there are still remaining words and none of authors are found
+			if (rems2.length && !foundAuthor) continue;
+		}
+		
+		// If the query part that was found in title is shorter than 30 symbols
+		if (foundPart.join(' ').length < 30) select = true;
+		
+		filteredItems.push({
+			matchedLen: foundPart.join(' ').length,
+			titleLen: titleWords.join(' ').length,
+			item
+		});
+	}
+	
+	// Sort results by matched text length
+	// and how close the matched text length is to title length
+	filteredItems.sort(function (a, b) {
+		if (b.matchedLen < a.matchedLen) return -1;
+		if (b.matchedLen > a.matchedLen) return 1;
+		return Math.abs(a.matchedLen - a.titleLen) - Math.abs(b.matchedLen - b.titleLen);
+	});
+	
+	filteredItems = filteredItems.map(item => item.item);
+	
+	return {select, items: filteredItems};
+}
+
+function getLongestCommonSequence(title, query) {
+	title = title.replace(/<\/?\w+[^<>]*>/gi, '');
+	title = title.replace(/:/g, ' ');
+	
+	query = query.replace(/:/g, ' ');
+	
+	// Normalize, split to words and filter out empty array elements
+	let titleWords = normalize(title).split(' ').filter(x => x);
+	let queryWords = normalize(query).split(' ').filter(x => x);
+	
+	let longestFrom = 0;
+	let longestLen = 0;
+	
+	// Finds the longest common words sequence between query text and item.title
+	for (let i = 0; i < queryWords.length; i++) {
+		for (let j = queryWords.length; j > 0; j--) {
+			let a = queryWords.slice(i, j);
+			for (let k = 0; k < titleWords.length - a.length + 1; k++) {
+				let b = titleWords.slice(k, a.length + k);
+				if (a.length && b.length && a.join(' ') === b.join(' ')) {
+					if (a.length > longestLen) {
+						longestFrom = i;
+						longestLen = b.length;
+					}
+				}
+			}
+		}
+	}
+	
+	return queryWords.slice(longestFrom, longestFrom + longestLen).join(' ');
+}
+
+function identifierToToken(identifier) {
+	return Zotero.Utilities.Internal.md5(JSON.stringify(identifier));
+}
diff --git a/src/webEndpoint.js b/src/webEndpoint.js
new file mode 100644
index 0000000..d79f43e
--- /dev/null
+++ b/src/webEndpoint.js
@@ -0,0 +1,97 @@
+/*
+    ***** BEGIN LICENSE BLOCK *****
+    
+    Copyright © 2018 Corporation for Digital Scholarship
+                     Vienna, Virginia, USA
+                     https://www.zotero.org
+    
+    This file is part of Zotero.
+    
+    Zotero is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    
+    Zotero is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+    
+    You should have received a copy of the GNU Affero General Public License
+    along with Zotero.  If not, see <http://www.gnu.org/licenses/>.
+    
+    ***** END LICENSE BLOCK *****
+*/
+
+const WebSession = require('./webSession');
+
+// Timeout for select requests, in seconds
+//const SELECT_TIMEOUT = 120;
+const SELECT_TIMEOUT = 15;
+const sessionsWaitingForSelection = {};
+
+var requestsSinceGC = 0;
+
+
+var WebEndpoint = module.exports = {
+	handle: async function (ctx, next) {
+		ctx.assert(ctx.is('text/plain') || ctx.is('json'), 415);
+		
+		setTimeout(() => {
+			gc();
+		});
+		
+		var data = ctx.request.body;
+		
+		if (!data) {
+			ctx.throw(400, "POST data not provided\n");
+		}
+		
+		// If follow-up URL request, retrieve session and update context
+		var query;
+		var session;
+		if (typeof data == 'object') {
+			let sessionID = data.session;
+			if (!sessionID) {
+				ctx.throw(400, "'session' not provided");
+			}
+			session = sessionsWaitingForSelection[sessionID];
+			if (!session) {
+				ctx.throw(400, "Session not found");
+			}
+			delete sessionsWaitingForSelection[sessionID];
+			session.ctx = ctx;
+			session.next = next;
+			session.data = data;
+		}
+		else {
+			session = new WebSession(ctx, next, data);
+		}
+		
+		if (typeof data != 'object' && !data.match(/^https?:/)) {
+			ctx.throw(400, "URL not provided");
+		}
+		
+		await session.handleURL();
+		
+		// Store session if returning multiple choices
+		if (ctx.response.status == 300) {
+			sessionsWaitingForSelection[session.id] = session;
+		}
+	}
+};
+
+/**
+ * Perform garbage collection every 10 requests
+ */
+function gc() {
+	if ((++requestsSinceGC) == 3) {
+		for (let i in sessionsWaitingForSelection) {
+			let session = sessionsWaitingForSelection[i];
+			if (session.started && Date.now() >= session.started + SELECT_TIMEOUT * 1000) {
+				delete sessionsWaitingForSelection[i];
+			}
+		}
+		requestsSinceGC = 0;
+	}
+}
diff --git a/src/searchSession.js b/src/webSession.js
similarity index 98%
rename from src/searchSession.js
rename to src/webSession.js
index 775dad6..5dfe62d 100644
--- a/src/searchSession.js
+++ b/src/webSession.js
@@ -28,6 +28,7 @@ const urlLib = require('url');
 const Translate = require('./translation/translate');
 const HTTP = require('./http');
 const Translators = require('./translators');
+const SearchEndpoint = require('./searchEndpoint');
 
 const SERVER_TRANSLATION_TIMEOUT = 30;
 
@@ -66,14 +67,16 @@ SearchSession.prototype.handleURL = async function () {
 			if (!doi) {
 				this.ctx.throw(500, "An error occurred retrieving the document\n");
 			}
-			return this.handleDOI(doi);
+			await SearchEndpoint.handleIdentifier(this.ctx, { DOI: doi });
+			return;
 		}
 	}
 	
 	// If a doi.org URL, use search handler
 	if (url.match(/^https?:\/\/[^\/]*doi\.org\//)) {
 		let doi = Zotero.Utilities.cleanDOI(url);
-		return this.handleDOI(doi);
+		await SearchEndpoint.handleIdentifier(this.ctx, { DOI: doi });
+		return;
 	}
 	
 	var urlsToTry = config.get('deproxifyURLs') ? this.deproxifyURL(url) : [url];
@@ -331,12 +334,7 @@ SearchSession.prototype.selectDone = function () {
 };*/
 
 
-/**
- * @return {Promise<undefined>}
- */
-SearchSession.prototype.handleDOI = async function (doi) {
-	this.ctx.throw(501);
-};
+
 
 
 /**
diff --git a/test/server_search_test.js b/test/server_search_test.js
deleted file mode 100644
index 1f629a1..0000000
--- a/test/server_search_test.js
+++ /dev/null
@@ -1,68 +0,0 @@
-describe("/search", function () {
-	describe("URL", function () {
-		it("should translate a generic webpage", async function () {
-			var url = testURL + 'plain';
-			var response = await request()
-				.post('/search')
-				.set('Content-Type', 'text/plain')
-				.send(url);
-			assert.equal(response.statusCode, 200);
-			var json = response.body;
-			
-			assert.lengthOf(json, 1);
-			assert.equal(json[0].itemType, 'webpage');
-			assert.equal(json[0].title, 'Test');
-		});
-		
-		
-		it("should translate a webpage with embedded metadata", async function () {
-			var url = testURL + 'single';
-			var response = await request()
-				.post('/search')
-				.set('Content-Type', 'text/plain')
-				.send(url);
-			assert.equal(response.statusCode, 200);
-			var json = response.body;
-			
-			assert.lengthOf(json, 1);
-			assert.equal(json[0].itemType, 'journalArticle');
-			assert.equal(json[0].title, 'Title');
-		});
-		
-		
-		it("should return multiple results and perform follow-up translation", async function () {
-			var url = testURL + 'multiple';
-			var response = await request()
-				.post('/search')
-				.set('Content-Type', 'text/plain')
-				.send(url);
-			assert.equal(response.statusCode, 300);
-			var json = response.body;
-			assert.equal(json.url, url);
-			assert.property(json, 'session');
-			assert.deepEqual(json.items, { 0: 'A', 1: 'B', 2: 'C' });
-			
-			delete json.items[1];
-			
-			response = await request()
-				.post('/search')
-				.send(json);
-			assert.equal(response.statusCode, 200);
-			json = response.body;
-			assert.lengthOf(json, 2);
-			assert.equal(json[0].title, 'A');
-			assert.equal(json[1].title, 'C');
-		});
-		
-		
-		it("should return 400 if a page returns a 404", async function () {
-			var url = testURL + '404';
-			var response = await request()
-				.post('/search')
-				.set('Content-Type', 'text/plain')
-				.send(url);
-			assert.equal(response.statusCode, 400);
-			assert.equal(response.text, 'Remote page not found');
-		});
-	});
-});
diff --git a/test/web_test.js b/test/web_test.js
new file mode 100644
index 0000000..243a2ed
--- /dev/null
+++ b/test/web_test.js
@@ -0,0 +1,66 @@
+describe("/web", function () {
+	it("should translate a generic webpage", async function () {
+		var url = testURL + 'plain';
+		var response = await request()
+			.post('/web')
+			.set('Content-Type', 'text/plain')
+			.send(url);
+		assert.equal(response.statusCode, 200);
+		var json = response.body;
+		
+		assert.lengthOf(json, 1);
+		assert.equal(json[0].itemType, 'webpage');
+		assert.equal(json[0].title, 'Test');
+	});
+	
+	
+	it("should translate a webpage with embedded metadata", async function () {
+		var url = testURL + 'single';
+		var response = await request()
+			.post('/web')
+			.set('Content-Type', 'text/plain')
+			.send(url);
+		assert.equal(response.statusCode, 200);
+		var json = response.body;
+		
+		assert.lengthOf(json, 1);
+		assert.equal(json[0].itemType, 'journalArticle');
+		assert.equal(json[0].title, 'Title');
+	});
+	
+	
+	it("should return multiple results and perform follow-up translation", async function () {
+		var url = testURL + 'multiple';
+		var response = await request()
+			.post('/web')
+			.set('Content-Type', 'text/plain')
+			.send(url);
+		assert.equal(response.statusCode, 300);
+		var json = response.body;
+		assert.equal(json.url, url);
+		assert.property(json, 'session');
+		assert.deepEqual(json.items, { 0: 'A', 1: 'B', 2: 'C' });
+		
+		delete json.items[1];
+		
+		response = await request()
+			.post('/web')
+			.send(json);
+		assert.equal(response.statusCode, 200);
+		json = response.body;
+		assert.lengthOf(json, 2);
+		assert.equal(json[0].title, 'A');
+		assert.equal(json[1].title, 'C');
+	});
+	
+	
+	it("should return 400 if a page returns a 404", async function () {
+		var url = testURL + '404';
+		var response = await request()
+			.post('/web')
+			.set('Content-Type', 'text/plain')
+			.send(url);
+		assert.equal(response.statusCode, 400);
+		assert.equal(response.text, 'Remote page not found');
+	});
+});
diff --git a/translate_search b/translate_search
new file mode 100755
index 0000000..3853e5e
--- /dev/null
+++ b/translate_search
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+URL="$1"
+if [ -z "$1" ]; then
+	echo "Usage: $0 identifier-or-phrase"
+	exit 1
+fi
+
+curl -v -d "$URL" -H "Content-Type: text/plain" 127.0.0.1:1969/search
diff --git a/translate_url b/translate_url
index 646002c..b0f2011 100755
--- a/translate_url
+++ b/translate_url
@@ -6,4 +6,4 @@ if [ -z "$1" ]; then
 	exit 1
 fi
 
-curl -v -d "$URL" -H "Content-Type: text/plain" 127.0.0.1:1969/search
+curl -v -d "$URL" -H "Content-Type: text/plain" 127.0.0.1:1969/web
diff --git a/translate_url_multiple b/translate_url_multiple
index c1b13c6..1227580 100755
--- a/translate_url_multiple
+++ b/translate_url_multiple
@@ -6,4 +6,4 @@ if [ -z "$1" ]; then
 	exit 1
 fi
 
-curl -v -d "$URL" -H "Content-Type: text/plain" 127.0.0.1:1969/search | jq '{ url: .url, session: .session, items: .items | to_entries | [.[0]] | from_entries }' | curl -v -d @- -H "Content-Type: application/json" 127.0.0.1:1969/search
+curl -v -d "$URL" -H "Content-Type: text/plain" 127.0.0.1:1969/web | jq '{ url: .url, session: .session, items: .items | to_entries | [.[0]] | from_entries }' | curl -v -d @- -H "Content-Type: application/json" 127.0.0.1:1969/web