Skip to content

Commit

Permalink
Fix to use decimal references if they are shorter
Browse files Browse the repository at this point in the history
Previously, in cases such as where a double quote (`"`) was encoded,
and the shortest reference was requested, a hexadecimal reference
was produced even though decimal references would be shorter.

Closes GH-5.
Closes GH-6.
  • Loading branch information
wooorm authored Jan 12, 2019
1 parent 5775ea7 commit a35f77b
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 3 deletions.
30 changes: 29 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
var entities = require('character-entities-html4')
var legacy = require('character-entities-legacy')
var hexadecimal = require('is-hexadecimal')
var decimal = require('is-decimal')
var alphanumerical = require('is-alphanumerical')
var dangerous = require('./dangerous.json')

Expand Down Expand Up @@ -71,14 +72,35 @@ function one(char, next, options) {
var shortest = options.useShortestReferences
var omit = options.omitOptionalSemicolons
var named
var code
var numeric
var decimal

if ((shortest || options.useNamedReferences) && own.call(characters, char)) {
named = toNamed(characters[char], next, omit, options.attribute)
}

if (shortest || !named) {
numeric = toHexReference(char.charCodeAt(0), next, omit)
code = char.charCodeAt(0)
numeric = toHexReference(code, next, omit)

// Use the shortest numeric reference when requested.
// A simple algorithm would use decimal for all code points under 100, as
// those are shorter than hexadecimal:
//
// * `c` vs `c` (decimal shorter)
// * `d` vs `d` (equal)
//
// However, because we take `next` into consideration when `omit` is used,
// And it would be possible that decimals are shorter on bigger values as
// well if `next` is hexadecimal but not decimal, we instead compare both.
if (shortest) {
decimal = toDecimalReference(code, next, omit)

if (decimal.length < numeric.length) {
numeric = decimal
}
}
}

if (named && (!shortest || named.length < numeric.length)) {
Expand Down Expand Up @@ -110,6 +132,12 @@ function toHexReference(code, next, omit) {
return omit && next && !hexadecimal(next) ? value : value + ';'
}

// Transform `code` into a decimal character reference.
function toDecimalReference(code, next, omit) {
var value = '&#' + String(code)
return omit && next && !decimal(next) ? value : value + ';'
}

// Create an expression for `characters`.
function toExpression(characters) {
return new RegExp('[' + characters.join('') + ']', 'g')
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"character-entities-html4": "^1.0.0",
"character-entities-legacy": "^1.0.0",
"is-alphanumerical": "^1.0.0",
"is-decimal": "^1.0.2",
"is-hexadecimal": "^1.0.0"
},
"devDependencies": {
Expand Down
15 changes: 13 additions & 2 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,22 @@ test('stringifyEntities(value[, options])', function(t) {
)

t.equal(
stringify('alpha © bravo ≠ charlie 𝌆 delta', {useShortestReferences: true}),
'alpha &#xA9; bravo &ne; charlie &#x1D306; delta',
stringify('alpha © bravo ≠ charlie 𝌆 delta " echo', {
useShortestReferences: true
}),
'alpha &#xA9; bravo &ne; charlie &#x1D306; delta &#34; echo',
'Should use shortest entities if `useShortestReferences`'
)

t.equal(
stringify('" "0 "a "z µ µ0 µa µz', {
useShortestReferences: true,
omitOptionalSemicolons: true
}),
'&#34 &#34;0 &#34a &#34z &#xB5 &#xB5;0 &#181a &#xB5z',
'Should pick the shortest numeric reference based on `next` with `omitOptionalSemicolons`'
)

t.equal(
stringify('\'"<>&'),
'&#x27;&#x22;&#x3C;&#x3E;&#x26;',
Expand Down

0 comments on commit a35f77b

Please sign in to comment.