Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: port encoding WPT into core #25321

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
48 changes: 43 additions & 5 deletions test/common/wpt.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* eslint-disable node-core/required-modules */
'use strict';

const assert = require('assert');
const common = require('../common');
const fixtures = require('../common/fixtures');
const fs = require('fs');
const fsPromises = fs.promises;
Expand Down Expand Up @@ -160,12 +160,49 @@ class WPTTest {
getContent() {
return fs.readFileSync(this.getAbsolutePath(), 'utf8');
}
}

const kIntlRequirement = {
none: 0,
small: 1,
full: 2,
// TODO(joyeecheung): we may need to deal with --with-intl=system-icu
};

class IntlRequirement {
constructor() {
this.currentIntl = kIntlRequirement.none;
if (process.config.variables.v8_enable_i18n_support === 0) {
this.currentIntl = kIntlRequirement.none;
return;
}
// i18n enabled
if (process.config.variables.icu_small) {
this.currentIntl = kIntlRequirement.small;
} else {
this.currentIntl = kIntlRequirement.full;
}
}

requireIntl() {
return this.requires.has('intl');
/**
* @param {Set} requires
* @returns {string|false} The config that the build is lacking, or false
*/
isLacking(requires) {
const current = this.currentIntl;
if (requires.has('full-icu') && current !== kIntlRequirement.full) {
return 'full-icu';
}
if (requires.has('small-icu') && current < kIntlRequirement.small) {
return 'small-icu';
}
return false;
}
}

const intlRequirements = new IntlRequirement();


class StatusLoader {
constructor(path) {
this.path = path;
Expand Down Expand Up @@ -498,8 +535,9 @@ class WPTRunner {
continue;
}

if (!common.hasIntl && test.requireIntl()) {
this.skip(filename, [ 'missing Intl' ]);
const lackingIntl = intlRequirements.isLacking(test.requires);
if (lackingIntl) {
this.skip(filename, [ `requires ${lackingIntl}` ]);
continue;
}

Expand Down
5 changes: 3 additions & 2 deletions test/fixtures/wpt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ See [test/wpt](../../wpt/README.md) for information on how these tests are run.

Last update:

- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
- interfaces: https://github.com/web-platform-tests/wpt/tree/db7f86289e/interfaces
- console: https://github.com/web-platform-tests/wpt/tree/9786a4b131/console
- encoding: https://github.com/web-platform-tests/wpt/tree/a093a659ed/encoding
- url: https://github.com/web-platform-tests/wpt/tree/75b0f336c5/url
- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
- interfaces: https://github.com/web-platform-tests/wpt/tree/712c9f275e/interfaces

[Web Platform Tests]: https://github.com/web-platform-tests/wpt
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/master/docs/git-node.md#git-node-wpt
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/META.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
spec: https://encoding.spec.whatwg.org/
suggested_reviewers:
- inexorabletash
- annevk
52 changes: 52 additions & 0 deletions test/fixtures/wpt/encoding/api-basics.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// META: title=Encoding API: Basics

test(function() {
assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8');
assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8');
}, 'Default encodings');

test(function() {
assert_array_equals(new TextEncoder().encode(), [], 'input default should be empty string')
assert_array_equals(new TextEncoder().encode(undefined), [], 'input default should be empty string')
}, 'Default inputs');


function testDecodeSample(encoding, string, bytes) {
test(function() {
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
}, 'Decode sample: ' + encoding);
}

// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
// byte-swapped BOM (non-character U+FFFE)
var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';

test(function() {
var encoding = 'utf-8';
var string = sample;
var bytes = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE];
var encoded = new TextEncoder().encode(string);
assert_array_equals([].slice.call(encoded), bytes);
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
}, 'Encode/decode round trip: utf-8');

testDecodeSample(
'utf-16le',
sample,
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
);

testDecodeSample(
'utf-16be',
sample,
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE]
);

testDecodeSample(
'utf-16',
sample,
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
);
24 changes: 24 additions & 0 deletions test/fixtures/wpt/encoding/api-invalid-label.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// META: title=Encoding API: invalid label
// META: timeout=long
// META: script=resources/encodings.js

var tests = ["invalid-invalidLabel"];
setup(function() {
encodings_table.forEach(function(section) {
section.encodings.forEach(function(encoding) {
encoding.labels.forEach(function(label) {
["\u0000", "\u000b", "\u00a0", "\u2028", "\u2029"].forEach(function(ws) {
tests.push(ws + label);
tests.push(label + ws);
tests.push(ws + label + ws);
});
});
});
});
});

tests.forEach(function(input) {
test(function() {
assert_throws(new RangeError(), function() { new TextDecoder(input); });
}, 'Invalid label ' + format_value(input) + ' should be rejected by TextDecoder.');
});
15 changes: 15 additions & 0 deletions test/fixtures/wpt/encoding/api-replacement-encodings.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// META: title=Encoding API: replacement encoding
// META: script=resources/encodings.js

encodings_table.forEach(function(section) {
section.encodings.filter(function(encoding) {
return encoding.name === 'replacement';
}).forEach(function(encoding) {
encoding.labels.forEach(function(label) {
test(function() {
assert_throws(new RangeError(), function() { new TextDecoder(label); });
}, 'Label for "replacement" should be rejected by API: ' + label);
});
});
});

48 changes: 48 additions & 0 deletions test/fixtures/wpt/encoding/api-surrogates-utf8.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// META: title=Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding

var badStrings = [
{
input: 'abc123',
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
decoded: 'abc123',
name: 'Sanity check'
},
{
input: '\uD800',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (low)'
},
{
input: '\uDC00',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (high)'
},
{
input: 'abc\uD800123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (low), in a string'
},
{
input: 'abc\uDC00123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (high), in a string'
},
{
input: '\uDC00\uD800',
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
decoded: '\uFFFD\uFFFD',
name: 'Wrong order'
}
];

badStrings.forEach(function(t) {
test(function() {
var encoded = new TextEncoder().encode(t.input);
assert_array_equals([].slice.call(encoded), t.expected);
assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded);
}, 'Invalid surrogates encoded into UTF-8: ' + t.name);
});
33 changes: 33 additions & 0 deletions test/fixtures/wpt/encoding/big5-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<!doctype html>
<meta charset=big5> <!-- test breaks if the server overrides this -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
function encode(input, output, desc) {
test(function() {
var a = document.createElement("a"); // <a> uses document encoding for URL's query
// Append and prepend X to test for off-by-one errors
a.href = "https://example.com/?X" + input + "X";
assert_equals(a.search.substr(1), "X" + output + "X"); // remove leading "?"
}, "big5 encoder: " + desc);
}

encode("ab", "ab", "very basic")
// edge cases
encode("\u9EA6", "%26%2340614%3B", "Highest-pointer BMP character excluded from encoder");
encode("\uD858\uDE6B", "%26%23156267%3B", "Highest-pointer character excluded from encoder");
encode("\u3000", "%A1@", "Lowest-pointer character included in encoder");
encode("\u20AC", "%A3%E1", "Euro; the highest-pointer character before a range of 30 unmapped pointers");
encode("\u4E00", "%A4@", "The lowest-pointer character after the range of 30 unmapped pointers");
encode("\uD85D\uDE07", "%C8%A4", "The highest-pointer character before a range of 41 unmapped pointers");
encode("\uFFE2", "%C8%CD", "The lowest-pointer character after the range of 41 unmapped pointers");
encode("\u79D4", "%FE%FE", "The last character in the index");
// not in index
encode("\u2603", "%26%239731%3B", "The canonical BMP test character that is not in the index");
encode("\uD83D\uDCA9", "%26%23128169%3B", "The canonical astral test character that is not in the index");
// duplicate low bits
encode("\uD840\uDFB5", "%FDj", "A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer");
// prefer last
encode("\u2550", "%F9%F9", "A duplicate-mapped code point that prefers the highest pointer in the encoder");
</script>
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-shift_jis-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=shift_jis>
<title>Shift_JIS file ending with a truncated sequence</title>
One-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-shift_jis.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=shift_jis>
<title>Shift_JIS file ending with a truncated sequence</title>
<link rel=match href=/encoding/eof-shift_jis-ref.html>
One-byte truncated sequence:�
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-one-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a one-byte truncated sequence</title>
One-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-one.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a one-byte truncated sequence</title>
<link rel=match href="eof-utf-8-one-ref.html">
One-byte truncated sequence:�
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-three-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a three-byte truncated sequence</title>
Three-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-three.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a three-byte truncated sequence</title>
<link rel=match href="eof-utf-8-three-ref.html">
Three-byte truncated sequence:�
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-two-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a two-byte truncated sequence</title>
Two-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-two.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a two-byte truncated sequence</title>
<link rel=match href="eof-utf-8-two-ref.html">
Two-byte truncated sequence:�
21 changes: 21 additions & 0 deletions test/fixtures/wpt/encoding/gb18030-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!doctype html>
<meta charset=gb18030> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
function encode(input, output, desc) {
test(function() {
var a = document.createElement("a") // <a> uses document encoding for URL's query
a.href = "https://example.com/?" + input
assert_equals(a.search.substr(1), output) // remove leading "?"
}, "gb18030 encoder: " + desc)
}

encode("s", "s", "very basic")
encode("\u20AC", "%A2%E3", "Euro")
encode("\u4E02", "%81@", "character")
encode("\uE4C6", "%A1@", "PUA")
encode("\uE4C5", "%FE%FE", "PUA #2")
encode("\ud83d\udca9", "%949%DA3", "poo")
</script>
21 changes: 21 additions & 0 deletions test/fixtures/wpt/encoding/gbk-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!doctype html>
<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
function encode(input, output, desc) {
test(function() {
var a = document.createElement("a") // <a> uses document encoding for URL's query
a.href = "https://example.com/?" + input
assert_equals(a.search.substr(1), output) // remove leading "?"
}, "gbk encoder: " + desc)
}

encode("s", "s", "very basic")
encode("\u20AC", "%80", "Euro")
encode("\u4E02", "%81@", "character")
encode("\uE4C6", "%A1@", "PUA")
encode("\uE4C5", "%FE%FE", "PUA #2")
encode("\ud83d\udca9", "%26%23128169%3B", "poo")
</script>
14 changes: 14 additions & 0 deletions test/fixtures/wpt/encoding/idlharness.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// META: global=window,worker
// META: script=/resources/WebIDLParser.js
// META: script=/resources/idlharness.js

idl_test(
['encoding'],
[], // No deps
idl_array => {
idl_array.add_objects({
TextEncoder: ['new TextEncoder()'],
TextDecoder: ['new TextDecoder()']
});
}
);
Loading