From c5af5a4f4fd833796b630578feba07d5f06ee664 Mon Sep 17 00:00:00 2001 From: Gil Tayar Date: Fri, 19 May 2023 18:50:50 +0300 Subject: [PATCH] module: change default resolver to not throw on unknown scheme Fixes https://github.com/nodejs/loaders/issues/138 PR-URL: https://github.com/nodejs/node/pull/47824 Reviewed-By: Moshe Atlow Reviewed-By: Benjamin Gruenbaum Reviewed-By: Guy Bedford Reviewed-By: Geoffrey Booth Reviewed-By: Antoine du Hamel --- doc/api/esm.md | 143 +++++++++++------- lib/internal/modules/esm/load.js | 32 ++++ lib/internal/modules/esm/resolve.js | 36 ----- test/es-module/test-esm-dynamic-import.js | 2 +- .../test-esm-import-meta-resolve.mjs | 2 + .../test-esm-loader-default-resolver.mjs | 52 +++++++ .../es-module-loaders/byop-dummy-loader.mjs | 30 ++++ .../es-module-loaders/http-loader.mjs | 18 --- 8 files changed, 203 insertions(+), 112 deletions(-) create mode 100644 test/es-module/test-esm-loader-default-resolver.mjs create mode 100644 test/fixtures/es-module-loaders/byop-dummy-loader.mjs diff --git a/doc/api/esm.md b/doc/api/esm.md index 0e48b2d9c4e943..8f147b9c2445b2 100644 --- a/doc/api/esm.md +++ b/doc/api/esm.md @@ -139,8 +139,9 @@ There are three types of specifiers: * _Absolute specifiers_ like `'file:///opt/nodejs/config.js'`. They refer directly and explicitly to a full path. -Bare specifier resolutions are handled by the [Node.js module resolution -algorithm][]. All other specifier resolutions are always only resolved with +Bare specifier resolutions are handled by the [Node.js module +resolution and loading algorithm][]. +All other specifier resolutions are always only resolved with the standard relative [URL][] resolution semantics. Like in CommonJS, module files within packages can be accessed by appending a @@ -1007,28 +1008,6 @@ and there is no security. // https-loader.mjs import { get } from 'node:https'; -export function resolve(specifier, context, nextResolve) { - const { parentURL = null } = context; - - // Normally Node.js would error on specifiers starting with 'https://', so - // this hook intercepts them and converts them into absolute URLs to be - // passed along to the later hooks below. - if (specifier.startsWith('https://')) { - return { - shortCircuit: true, - url: specifier, - }; - } else if (parentURL && parentURL.startsWith('https://')) { - return { - shortCircuit: true, - url: new URL(specifier, parentURL).href, - }; - } - - // Let Node.js handle all other specifiers. - return nextResolve(specifier); -} - export function load(url, context, nextLoad) { // For JavaScript to be loaded over the network, we need to fetch and // return it. @@ -1069,9 +1048,7 @@ prints the current version of CoffeeScript per the module at the URL in #### Transpiler loader Sources that are in formats Node.js doesn't understand can be converted into -JavaScript using the [`load` hook][load hook]. Before that hook gets called, -however, a [`resolve` hook][resolve hook] needs to tell Node.js not to -throw an error on unknown file types. +JavaScript using the [`load` hook][load hook]. This is less performant than transpiling source files before running Node.js; a transpiler loader should only be used for development and testing @@ -1087,25 +1064,6 @@ import CoffeeScript from 'coffeescript'; const baseURL = pathToFileURL(`${cwd()}/`).href; -// CoffeeScript files end in .coffee, .litcoffee, or .coffee.md. -const extensionsRegex = /\.coffee$|\.litcoffee$|\.coffee\.md$/; - -export async function resolve(specifier, context, nextResolve) { - if (extensionsRegex.test(specifier)) { - const { parentURL = baseURL } = context; - - // Node.js normally errors on unknown file extensions, so return a URL for - // specifiers ending in the CoffeeScript file extensions. - return { - shortCircuit: true, - url: new URL(specifier, parentURL).href, - }; - } - - // Let Node.js handle all other specifiers. - return nextResolve(specifier); -} - export async function load(url, context, nextLoad) { if (extensionsRegex.test(url)) { // Now that we patched resolve to let CoffeeScript URLs through, we need to @@ -1198,27 +1156,99 @@ loaded from disk but before Node.js executes it; and so on for any `.coffee`, `.litcoffee` or `.coffee.md` files referenced via `import` statements of any loaded file. -## Resolution algorithm +#### "import map" loader + +The previous two loaders defined `load` hooks. This is an example of a loader +that does its work via the `resolve` hook. This loader reads an +`import-map.json` file that specifies which specifiers to override to another +URL (this is a very simplistic implemenation of a small subset of the +"import maps" specification). + +```js +// import-map-loader.js +import fs from 'node:fs/promises'; + +const { imports } = JSON.parse(await fs.readFile('import-map.json')); + +export async function resolve(specifier, context, nextResolve) { + if (Object.hasOwn(imports, specifier)) { + return nextResolve(imports[specifier], context); + } + + return nextResolve(specifier, context); +} +``` + +Let's assume we have these files: + +```js +// main.js +import 'a-module'; +``` + +```json +// import-map.json +{ + "imports": { + "a-module": "./some-module.js" + } +} +``` + +```js +// some-module.js +console.log('some module!'); +``` + +If you run `node --experimental-loader ./import-map-loader.js main.js` +the output will be `some module!`. + +## Resolution and loading algorithm ### Features -The resolver has the following properties: +The default resolver has the following properties: * FileURL-based resolution as is used by ES modules -* Support for builtin module loading * Relative and absolute URL resolution * No default extensions * No folder mains * Bare specifier package resolution lookup through node\_modules +* Does not fail on unknown extensions or protocols +* Can optionally provide a hint of the format to the loading phase + +The default loader has the following properties -### Resolver algorithm +* Support for builtin module loading via `node:` URLs +* Support for "inline" module loading via `data:` URLs +* Support for `file:` module loading +* Fails on any other URL protocol +* Fails on unknown extensions for `file:` loading + (supports only `.cjs`, `.js`, and `.mjs`) + +### Resolution algorithm The algorithm to load an ES module specifier is given through the **ESM\_RESOLVE** method below. It returns the resolved URL for a module specifier relative to a parentURL. +The resolution algorithm determines the full resolved URL for a module +load, along with its suggested module format. The resolution algorithm +does not determine whether the resolved URL protocol can be loaded, +or whether the file extensions are permitted, instead these validations +are applied by Node.js during the load phase +(for example, if it was asked to load a URL that has a protocol that is +not `file:`, `data:`, `node:`, or if `--experimental-network-imports` +is enabled, `https:`). + +The algorithm also tries to determine the format of the file based +on the extension (see `ESM_FILE_FORMAT` algorithm below). If it does +not recognize the file extension (eg if it is not `.mjs`, `.cjs`, or +`.json`), then a format of `undefined` is returned, +which will throw during the load phase. + The algorithm to determine the module format of a resolved URL is -provided by **ESM\_FORMAT**, which returns the unique module +provided by **ESM\_FILE\_FORMAT**, which returns the unique module format for any file. The _"module"_ format is returned for an ECMAScript Module, while the _"commonjs"_ format is used to indicate loading through the legacy CommonJS loader. Additional formats such as _"addon"_ can be extended in @@ -1245,7 +1275,7 @@ The resolver can throw the following errors: * _Unsupported Directory Import_: The resolved path corresponds to a directory, which is not a supported target for module imports. -### Resolver Algorithm Specification +### Resolution Algorithm Specification **ESM\_RESOLVE**(_specifier_, _parentURL_) @@ -1279,7 +1309,7 @@ The resolver can throw the following errors: > 8. Otherwise, > 1. Set _format_ the module format of the content type associated with the > URL _resolved_. -> 9. Load _resolved_ as module format, _format_. +> 9. Return _format_ and _resolved_ to the loading phase **PACKAGE\_RESOLVE**(_packageSpecifier_, _parentURL_) @@ -1484,9 +1514,9 @@ _isImports_, _conditions_) > 7. If _pjson?.type_ exists and is _"module"_, then > 1. If _url_ ends in _".js"_, then > 1. Return _"module"_. -> 2. Throw an _Unsupported File Extension_ error. +> 2. Return **undefined**. > 8. Otherwise, -> 1. Throw an _Unsupported File Extension_ error. +> 1. Return **undefined**. **LOOKUP\_PACKAGE\_SCOPE**(_url_) @@ -1552,7 +1582,7 @@ success! [Import Assertions proposal]: https://github.com/tc39/proposal-import-assertions [JSON modules]: #json-modules [Loaders API]: #loaders -[Node.js Module Resolution Algorithm]: #resolver-algorithm-specification +[Node.js Module Resolution And Loading Algorithm]: #resolution-algorithm-specification [Terminology]: #terminology [URL]: https://url.spec.whatwg.org/ [`"exports"`]: packages.md#exports @@ -1580,7 +1610,6 @@ success! [custom https loader]: #https-loader [load hook]: #loadurl-context-nextload [percent-encoded]: url.md#percent-encoding-in-urls -[resolve hook]: #resolvespecifier-context-nextresolve [special scheme]: https://url.spec.whatwg.org/#special-scheme [status code]: process.md#exit-codes [the official standard format]: https://tc39.github.io/ecma262/#sec-modules diff --git a/lib/internal/modules/esm/load.js b/lib/internal/modules/esm/load.js index a01852a241f9b4..b13e035386abac 100644 --- a/lib/internal/modules/esm/load.js +++ b/lib/internal/modules/esm/load.js @@ -77,6 +77,8 @@ async function defaultLoad(url, context) { source, } = context; + throwIfUnsupportedURLScheme(new URL(url), experimentalNetworkImports); + if (format == null) { format = await defaultGetFormat(url, context); } @@ -100,6 +102,36 @@ async function defaultLoad(url, context) { }; } +/** + * throws an error if the protocol is not one of the protocols + * that can be loaded in the default loader + * @param {URL} parsed + * @param {boolean} experimentalNetworkImports + */ +function throwIfUnsupportedURLScheme(parsed, experimentalNetworkImports) { + // Avoid accessing the `protocol` property due to the lazy getters. + const protocol = parsed?.protocol; + if ( + protocol && + protocol !== 'file:' && + protocol !== 'data:' && + protocol !== 'node:' && + ( + !experimentalNetworkImports || + ( + protocol !== 'https:' && + protocol !== 'http:' + ) + ) + ) { + const schemes = ['file', 'data', 'node']; + if (experimentalNetworkImports) { + ArrayPrototypePush(schemes, 'https', 'http'); + } + throw new ERR_UNSUPPORTED_ESM_URL_SCHEME(parsed, schemes); + } +} + module.exports = { defaultLoad, }; diff --git a/lib/internal/modules/esm/resolve.js b/lib/internal/modules/esm/resolve.js index e5f3089bd60dc3..2cac6f2d450fab 100644 --- a/lib/internal/modules/esm/resolve.js +++ b/lib/internal/modules/esm/resolve.js @@ -3,7 +3,6 @@ const { ArrayIsArray, ArrayPrototypeJoin, - ArrayPrototypePush, ArrayPrototypeShift, JSONParse, JSONStringify, @@ -57,7 +56,6 @@ const { ERR_PACKAGE_PATH_NOT_EXPORTED, ERR_UNSUPPORTED_DIR_IMPORT, ERR_NETWORK_IMPORT_DISALLOWED, - ERR_UNSUPPORTED_ESM_URL_SCHEME, } = require('internal/errors').codes; const { Module: CJSModule } = require('internal/modules/cjs/loader'); @@ -1036,37 +1034,6 @@ function checkIfDisallowedImport(specifier, parsed, parsedParentURL) { } } -function throwIfUnsupportedURLProtocol(url) { - // Avoid accessing the `protocol` property due to the lazy getters. - const protocol = url.protocol; - if (protocol !== 'file:' && protocol !== 'data:' && - protocol !== 'node:') { - throw new ERR_UNSUPPORTED_ESM_URL_SCHEME(url); - } -} - -function throwIfUnsupportedURLScheme(parsed, experimentalNetworkImports) { - // Avoid accessing the `protocol` property due to the lazy getters. - const protocol = parsed?.protocol; - if ( - protocol && - protocol !== 'file:' && - protocol !== 'data:' && - ( - !experimentalNetworkImports || - ( - protocol !== 'https:' && - protocol !== 'http:' - ) - ) - ) { - const schemes = ['file', 'data']; - if (experimentalNetworkImports) { - ArrayPrototypePush(schemes, 'https', 'http'); - } - throw new ERR_UNSUPPORTED_ESM_URL_SCHEME(parsed, schemes); - } -} async function defaultResolve(specifier, context = {}) { let { parentURL, conditions } = context; @@ -1142,7 +1109,6 @@ async function defaultResolve(specifier, context = {}) { // This must come after checkIfDisallowedImport if (parsed && parsed.protocol === 'node:') return { url: specifier }; - throwIfUnsupportedURLScheme(parsed, experimentalNetworkImports); const isMain = parentURL === undefined; if (isMain) { @@ -1189,8 +1155,6 @@ async function defaultResolve(specifier, context = {}) { throw error; } - throwIfUnsupportedURLProtocol(url); - return { // Do NOT cast `url` to a string: that will work even when there are real // problems, silencing them diff --git a/test/es-module/test-esm-dynamic-import.js b/test/es-module/test-esm-dynamic-import.js index 95d34244357e7c..ac6b35ebc1bc15 100644 --- a/test/es-module/test-esm-dynamic-import.js +++ b/test/es-module/test-esm-dynamic-import.js @@ -59,7 +59,7 @@ function expectFsNamespace(result) { 'ERR_UNSUPPORTED_ESM_URL_SCHEME'); if (common.isWindows) { const msg = - 'Only URLs with a scheme in: file and data are supported by the default ' + + 'Only URLs with a scheme in: file, data, and node are supported by the default ' + 'ESM loader. On Windows, absolute paths must be valid file:// URLs. ' + "Received protocol 'c:'"; expectModuleError(import('C:\\example\\foo.mjs'), diff --git a/test/es-module/test-esm-import-meta-resolve.mjs b/test/es-module/test-esm-import-meta-resolve.mjs index dc45610b460280..69ec84291d3cd6 100644 --- a/test/es-module/test-esm-import-meta-resolve.mjs +++ b/test/es-module/test-esm-import-meta-resolve.mjs @@ -31,6 +31,8 @@ const fixtures = dirname.slice(0, dirname.lastIndexOf('/', dirname.length - 2) + }) ) ); + assert.strictEqual(await import.meta.resolve('http://some-absolute/url'), 'http://some-absolute/url'); + assert.strictEqual(await import.meta.resolve('some://weird/protocol'), 'some://weird/protocol'); assert.strictEqual(await import.meta.resolve('baz/', fixtures), fixtures + 'node_modules/baz/'); })().then(mustCall()); diff --git a/test/es-module/test-esm-loader-default-resolver.mjs b/test/es-module/test-esm-loader-default-resolver.mjs new file mode 100644 index 00000000000000..27320fcfcfe862 --- /dev/null +++ b/test/es-module/test-esm-loader-default-resolver.mjs @@ -0,0 +1,52 @@ +import { spawnPromisified } from '../common/index.mjs'; +import * as fixtures from '../common/fixtures.mjs'; +import assert from 'node:assert'; +import { execPath } from 'node:process'; +import { describe, it } from 'node:test'; + +describe('default resolver', () => { + // In these tests `byop` is an acronym for "bring your own protocol", and is the + // protocol our byop-dummy-loader.mjs can load + it('should accept foreign schemas without exception (e.g. byop://something/or-other)', async () => { + const { code, stdout, stderr } = await spawnPromisified(execPath, [ + '--no-warnings', + '--experimental-loader', + fixtures.fileURL('/es-module-loaders/byop-dummy-loader.mjs'), + '--input-type=module', + '--eval', + "import 'byop://1/index.mjs'", + ]); + assert.strictEqual(code, 0); + assert.strictEqual(stdout.trim(), 'index.mjs!'); + assert.strictEqual(stderr, ''); + }); + + it('should resolve foreign schemas by doing regular url absolutization', async () => { + const { code, stdout, stderr } = await spawnPromisified(execPath, [ + '--no-warnings', + '--experimental-loader', + fixtures.fileURL('/es-module-loaders/byop-dummy-loader.mjs'), + '--input-type=module', + '--eval', + "import 'byop://1/index2.mjs'", + ]); + assert.strictEqual(code, 0); + assert.strictEqual(stdout.trim(), '42'); + assert.strictEqual(stderr, ''); + }); + + // In this test, `byoe` is an acronym for "bring your own extension" + it('should accept foreign extensions without exception (e.g. ..//something.byoe)', async () => { + const { code, stdout, stderr } = await spawnPromisified(execPath, [ + '--no-warnings', + '--experimental-loader', + fixtures.fileURL('/es-module-loaders/byop-dummy-loader.mjs'), + '--input-type=module', + '--eval', + "import 'byop://1/index.byoe'", + ]); + assert.strictEqual(code, 0); + assert.strictEqual(stdout.trim(), 'index.byoe!'); + assert.strictEqual(stderr, ''); + }); +}); diff --git a/test/fixtures/es-module-loaders/byop-dummy-loader.mjs b/test/fixtures/es-module-loaders/byop-dummy-loader.mjs new file mode 100644 index 00000000000000..17a3430b44e3f6 --- /dev/null +++ b/test/fixtures/es-module-loaders/byop-dummy-loader.mjs @@ -0,0 +1,30 @@ +export function load(url, context, nextLoad) { + switch (url) { + case 'byop://1/index.mjs': + return { + source: 'console.log("index.mjs!")', + format: 'module', + shortCircuit: true, + }; + case 'byop://1/index2.mjs': + return { + source: 'import c from "./sub.mjs"; console.log(c);', + format: 'module', + shortCircuit: true, + }; + case 'byop://1/sub.mjs': + return { + source: 'export default 42', + format: 'module', + shortCircuit: true, + }; + case 'byop://1/index.byoe': + return { + source: 'console.log("index.byoe!")', + format: 'module', + shortCircuit: true, + }; + default: + return nextLoad(url, context); + } +} diff --git a/test/fixtures/es-module-loaders/http-loader.mjs b/test/fixtures/es-module-loaders/http-loader.mjs index 8096dd9bb73a4c..4fe8fcb4797436 100644 --- a/test/fixtures/es-module-loaders/http-loader.mjs +++ b/test/fixtures/es-module-loaders/http-loader.mjs @@ -1,23 +1,5 @@ import { get } from 'http'; -export function resolve(specifier, context, nextResolve) { - const { parentURL = null } = context; - - if (specifier.startsWith('http://')) { - return { - shortCircuit: true, - url: specifier, - }; - } else if (parentURL?.startsWith('http://')) { - return { - shortCircuit: true, - url: new URL(specifier, parentURL).href, - }; - } - - return nextResolve(specifier); -} - export function load(url, context, nextLoad) { if (url.startsWith('http://')) { return new Promise((resolve, reject) => {