Skip to content

Commit

Permalink
url: fix surrogate handling in encodeAuth()
Browse files Browse the repository at this point in the history
Also factor out common parts in querystring and url.

Backport-of: #11161
  • Loading branch information
TimothyGu authored and italoacasas committed Feb 22, 2017
1 parent 7e37628 commit 8dbd562
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 92 deletions.
15 changes: 15 additions & 0 deletions lib/internal/querystring.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
'use strict';

const hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();

// Instantiating this is faster than explicitly calling `Object.create(null)`
// to get a "clean" empty object (tested with v8 v4.9).
function StorageObject() {}
StorageObject.prototype = Object.create(null);

module.exports = {
hexTable,
StorageObject
};
72 changes: 1 addition & 71 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict';

const util = require('util');
const { StorageObject } = require('internal/querystring');
const binding = process.binding('url');
const context = Symbol('context');
const cannotBeBase = Symbol('cannot-be-base');
Expand All @@ -22,9 +23,6 @@ const IteratorPrototype = Object.getPrototypeOf(
Object.getPrototypeOf([][Symbol.iterator]())
);

function StorageObject() {}
StorageObject.prototype = Object.create(null);

class OpaqueOrigin {
toString() {
return 'null';
Expand Down Expand Up @@ -528,73 +526,6 @@ Object.defineProperties(URL.prototype, {
}
});

const hexTable = new Array(256);

for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
function encodeAuth(str) {
// faster encodeURIComponent alternative for encoding auth uri components
var out = '';
var lastPos = 0;
for (var i = 0; i < str.length; ++i) {
var c = str.charCodeAt(i);

// These characters do not need escaping:
// ! - . _ ~
// ' ( ) * :
// digits
// alpha (uppercase)
// alpha (lowercase)
if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E ||
(c >= 0x27 && c <= 0x2A) ||
(c >= 0x30 && c <= 0x3A) ||
(c >= 0x41 && c <= 0x5A) ||
(c >= 0x61 && c <= 0x7A)) {
continue;
}

if (i - lastPos > 0)
out += str.slice(lastPos, i);

lastPos = i + 1;

// Other ASCII characters
if (c < 0x80) {
out += hexTable[c];
continue;
}

// Multi-byte characters ...
if (c < 0x800) {
out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
continue;
}
if (c < 0xD800 || c >= 0xE000) {
out += hexTable[0xE0 | (c >> 12)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
continue;
}
// Surrogate pair
++i;
var c2;
if (i < str.length)
c2 = str.charCodeAt(i) & 0x3FF;
else
c2 = 0;
c = 0x10000 + (((c & 0x3FF) << 10) | c2);
out += hexTable[0xF0 | (c >> 18)] +
hexTable[0x80 | ((c >> 12) & 0x3F)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
}
if (lastPos === 0)
return str;
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
}

function update(url, params) {
if (!url)
return;
Expand Down Expand Up @@ -1212,7 +1143,6 @@ exports.URL = URL;
exports.URLSearchParams = URLSearchParams;
exports.domainToASCII = domainToASCII;
exports.domainToUnicode = domainToUnicode;
exports.encodeAuth = encodeAuth;
exports.urlToOptions = urlToOptions;
exports.formatSymbol = kFormat;
exports.searchParamsSymbol = searchParams;
15 changes: 3 additions & 12 deletions lib/querystring.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict';

const { Buffer } = require('buffer');
const { StorageObject, hexTable } = require('internal/querystring');
const QueryString = module.exports = {
unescapeBuffer,
// `unescape()` is a JS global, so we need to use a different local name
Expand All @@ -14,13 +16,6 @@ const QueryString = module.exports = {
parse,
decode: parse
};
const Buffer = require('buffer').Buffer;

// This constructor is used to store parsed query string values. Instantiating
// this is faster than explicitly calling `Object.create(null)` to get a
// "clean" empty object (tested with v8 v4.9).
function ParsedQueryString() {}
ParsedQueryString.prototype = Object.create(null);

const unhexTable = [
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0 - 15
Expand Down Expand Up @@ -116,10 +111,6 @@ function qsUnescape(s, decodeSpaces) {
}


const hexTable = [];
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();

// These characters do not need escaping when generating query strings:
// ! - . _ ~
// ' ( ) *
Expand Down Expand Up @@ -263,7 +254,7 @@ const defEqCodes = [61]; // =

// Parse a key/val string.
function parse(qs, sep, eq, options) {
const obj = new ParsedQueryString();
const obj = new StorageObject();

if (typeof qs !== 'string' || qs.length === 0) {
return obj;
Expand Down
84 changes: 75 additions & 9 deletions lib/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ function importPunycode() {

const { toASCII } = importPunycode();

const { StorageObject, hexTable } = require('internal/querystring');
const internalUrl = require('internal/url');
const encodeAuth = internalUrl.encodeAuth;
exports.parse = urlParse;
exports.resolve = urlResolve;
exports.resolveObject = urlResolveObject;
Expand Down Expand Up @@ -76,12 +76,6 @@ const slashedProtocol = {
};
const querystring = require('querystring');

// This constructor is used to store parsed query string values. Instantiating
// this is faster than explicitly calling `Object.create(null)` to get a
// "clean" empty object (tested with v8 v4.9).
function ParsedQueryString() {}
ParsedQueryString.prototype = Object.create(null);

function urlParse(url, parseQueryString, slashesDenoteHost) {
if (url instanceof Url) return url;

Expand Down Expand Up @@ -190,7 +184,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
}
} else if (parseQueryString) {
this.search = '';
this.query = new ParsedQueryString();
this.query = new StorageObject();
}
return this;
}
Expand Down Expand Up @@ -380,7 +374,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
} else if (parseQueryString) {
// no query string, but parseQueryString still requested
this.search = '';
this.query = new ParsedQueryString();
this.query = new StorageObject();
}

var firstIdx = (questionIdx !== -1 &&
Expand Down Expand Up @@ -959,3 +953,75 @@ function spliceOne(list, index) {
list[i] = list[k];
list.pop();
}

// These characters do not need escaping:
// ! - . _ ~
// ' ( ) * :
// digits
// alpha (uppercase)
// alpha (lowercase)
const noEscapeAuth = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 // 0x70 - 0x7F
];

function encodeAuth(str) {
// faster encodeURIComponent alternative for encoding auth uri components
var out = '';
var lastPos = 0;
for (var i = 0; i < str.length; ++i) {
var c = str.charCodeAt(i);

// ASCII
if (c < 0x80) {
if (noEscapeAuth[c] === 1)
continue;
if (lastPos < i)
out += str.slice(lastPos, i);
lastPos = i + 1;
out += hexTable[c];
continue;
}

if (lastPos < i)
out += str.slice(lastPos, i);

// Multi-byte characters ...
if (c < 0x800) {
lastPos = i + 1;
out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
continue;
}
if (c < 0xD800 || c >= 0xE000) {
lastPos = i + 1;
out += hexTable[0xE0 | (c >> 12)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
continue;
}
// Surrogate pair
++i;
var c2;
if (i < str.length)
c2 = str.charCodeAt(i) & 0x3FF;
else
c2 = 0;
lastPos = i + 1;
c = 0x10000 + (((c & 0x3FF) << 10) | c2);
out += hexTable[0xF0 | (c >> 18)] +
hexTable[0x80 | ((c >> 12) & 0x3F)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
}
if (lastPos === 0)
return str;
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
}
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
'lib/internal/process/stdio.js',
'lib/internal/process/warning.js',
'lib/internal/process.js',
'lib/internal/querystring.js',
'lib/internal/readline.js',
'lib/internal/repl.js',
'lib/internal/socket_list.js',
Expand Down
9 changes: 9 additions & 0 deletions test/parallel/test-url.js
Original file line number Diff line number Diff line change
Expand Up @@ -1231,6 +1231,15 @@ const formatTests = {
protocol: 'file',
pathname: '/home/user',
path: '/home/user'
},

// surrogate in auth
'http://%F0%9F%98%[email protected]/': {
href: 'http://%F0%9F%98%[email protected]/',
protocol: 'http:',
auth: '\uD83D\uDE00',
hostname: 'www.example.com',
pathname: '/'
}
};
for (const u in formatTests) {
Expand Down

1 comment on commit 8dbd562

@jasnell
Copy link
Member

@jasnell jasnell commented on 8dbd562 Mar 7, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@italoacasas ... just a heads up, even backport commits need to have the appropriate PR-URL and reviewed-by metadata. Typically those are pulled from the original PR unless the backport PR includes additional changes

Please sign in to comment.