Skip to content
This repository was archived by the owner on Dec 11, 2019. It is now read-only.

Commit 184abe8

Browse files
committed
Improve sorting for matching subdomains
Auditors: @bsclifton Fix #8982
1 parent 531c171 commit 184abe8

File tree

2 files changed

+68
-18
lines changed

2 files changed

+68
-18
lines changed

app/common/lib/suggestion.js

+23-15
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,10 @@ const getSortByDomain = (userInputLower, userInputHost) => {
234234
// any count or frequency calculation.
235235
// Note that for parsed URLs that are not complete, the pathname contains
236236
// what the user is entering as the host and the host is null.
237-
const host1 = s1.parsedUrl.host || s1.parsedUrl.pathname || s1.location || ''
238-
const host2 = s2.parsedUrl.host || s2.parsedUrl.pathname || s2.location || ''
237+
let host1 = s1.parsedUrl.host || s1.parsedUrl.pathname || s1.location || ''
238+
let host2 = s2.parsedUrl.host || s2.parsedUrl.pathname || s2.location || ''
239+
host1 = host1.replace('www.', '')
240+
host2 = host2.replace('www.', '')
239241

240242
let pos1 = host1.indexOf(userInputHost)
241243
let pos2 = host2.indexOf(userInputHost)
@@ -255,18 +257,6 @@ const getSortByDomain = (userInputLower, userInputHost) => {
255257
return 2
256258
}
257259

258-
// Try the same to see if taking off www. helps.
259-
if (!userInputLower.startsWith('www.')) {
260-
pos1 = host1.indexOf('www.' + userInputLower)
261-
pos2 = host2.indexOf('www.' + userInputLower)
262-
if (pos1 === 0 && pos2 !== 0) {
263-
return -1
264-
}
265-
if (pos1 !== 0 && pos2 === 0) {
266-
return 1
267-
}
268-
}
269-
270260
const sortBySimpleURLResult = sortBySimpleURL(s1, s2)
271261
if (sortBySimpleURLResult !== 0) {
272262
return sortBySimpleURLResult
@@ -300,6 +290,25 @@ const sortBySimpleURL = (s1, s2) => {
300290
if (!url1IsSecure && url2IsSecure) {
301291
return 1
302292
}
293+
294+
// Prefer smaller less complicated domains
295+
const parts1 = s1.parsedUrl.hostname.split('.')
296+
const parts2 = s2.parsedUrl.hostname.split('.')
297+
let parts1Size = parts1.length
298+
let parts2Size = parts2.length
299+
if (parts1[0] === 'www') {
300+
parts1Size--
301+
}
302+
if (parts2[0] === 'www') {
303+
parts2Size--
304+
}
305+
if (parts1Size < parts2Size) {
306+
return -1
307+
}
308+
if (parts1Size > parts2Size) {
309+
return 1
310+
}
311+
return sortByAccessCountWithAgeDecay(s1, s2)
303312
}
304313
return 0
305314
}
@@ -333,7 +342,6 @@ const getSortForSuggestions = (userInputLower) => {
333342
const userInputValue = userInputParts[1] || ''
334343
const sortByDomain = getSortByDomain(userInputLower, userInputHost)
335344
const sortByPath = getSortByPath(userInputLower)
336-
const {sortByAccessCountWithAgeDecay} = require('./suggestion')
337345

338346
return (s1, s2) => {
339347
s1.parsedUrl = s1.parsedUrl || urlParse(getURL(s1) || '')

test/unit/app/common/lib/suggestionTest.js

+45-3
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ describe('suggestion unit tests', function () {
212212
it('Prefers https sipmle URLs', function () {
213213
assert(this.sort('https://brave.com', 'http://brave.com') < 0)
214214
})
215+
it('sorts better matched domains based on more simple domains', function () {
216+
assert(this.sort('https://facebook.github.io/', 'https://facebook.com/') > 0)
217+
})
218+
it('sorts better matched domains based on more simple domains ignoring www.', function () {
219+
assert(this.sort('https://facebook.github.io/', 'https://www.facebook.com/') > 0)
220+
})
215221
})
216222
describe('getSortByDomain', function () {
217223
before(function () {
@@ -241,9 +247,6 @@ describe('suggestion unit tests', function () {
241247
it('negative if first site has a match from the start of domain', function () {
242248
assert(this.sort('https://google.com', 'https://mygoogle.com') < 0)
243249
})
244-
it('positive if second site has a match but without www.', function () {
245-
assert(this.sort('https://www.google.com', 'https://google.com') > 0)
246-
})
247250
it('negative if there is a pos 0 match not including www.', function () {
248251
assert(this.sort('https://www.google.com', 'https://mygoogle.com') < 0)
249252
})
@@ -253,6 +256,34 @@ describe('suggestion unit tests', function () {
253256
it('does not throw error for file:// URL', function () {
254257
assert(this.sort('https://google.com', 'file://') < 0)
255258
})
259+
it('sorts simple domains that match equally on subdomains as the same', function () {
260+
const url1 = 'https://facebook.github.com'
261+
const url2 = 'https://facebook.brave.com'
262+
const sort = suggestion.getSortByDomain('facebook', 'facebook')
263+
assert(sort({
264+
location: url1,
265+
parsedUrl: urlParse(url1)
266+
}, {
267+
location: url2,
268+
parsedUrl: urlParse(url2)
269+
}) === 0)
270+
})
271+
it('sorts simple domains that match equally but have different activity based on activity', function () {
272+
const url1 = 'https://facebook.github.com'
273+
const url2 = 'https://facebook.brave.com'
274+
const sort = suggestion.getSortByDomain('facebook', 'facebook')
275+
assert(sort({
276+
location: url1,
277+
parsedUrl: urlParse(url1),
278+
lastAccessedTime: 1495335766455,
279+
count: 30
280+
}, {
281+
location: url2,
282+
parsedUrl: urlParse(url2),
283+
lastAccessedTime: 1495334766432,
284+
count: 10
285+
}) < 0)
286+
})
256287
})
257288
describe('getSortForSuggestions', function () {
258289
describe('with url entered as path', function () {
@@ -299,6 +330,17 @@ describe('suggestion unit tests', function () {
299330
assert(this.sort('https://brianbondy.com', 'www.brianbondy.com') < 0)
300331
})
301332
})
333+
it('sorts better matched domains based on more simple domains ignoring www.', function () {
334+
const userInputLower = 'facebook'
335+
const internalSort = suggestion.getSortForSuggestions(userInputLower, userInputLower)
336+
const sort = (url1, url2) => {
337+
return internalSort(
338+
{ location: url1, parsedUrl: urlParse(url1) },
339+
{ location: url2, parsedUrl: urlParse(url2) }
340+
)
341+
}
342+
assert(sort('https://facebook.github.io/', 'https://www.facebook.com/') > 0)
343+
})
302344
})
303345
})
304346
})

0 commit comments

Comments
 (0)