'use strict'; // TODO: Use the `URL` global when targeting Node.js 10 const URLParser = typeof URL === 'undefined' ? require('url').URL : URL; // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; const testParameter = (name, filters) => { return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); }; const normalizeDataURL = (urlString, {stripHash}) => { const parts = urlString.match(/^data:(.*?),(.*?)(?:#(.*))?$/); if (!parts) { throw new Error(`Invalid URL: ${urlString}`); } const mediaType = parts[1].split(';'); const body = parts[2]; const hash = stripHash ? '' : parts[3]; let base64 = false; if (mediaType[mediaType.length - 1] === 'base64') { mediaType.pop(); base64 = true; } // Lowercase MIME type const mimeType = (mediaType.shift() || '').toLowerCase(); const attributes = mediaType .map(attribute => { let [key, value = ''] = attribute.split('=').map(string => string.trim()); // Lowercase `charset` if (key === 'charset') { value = value.toLowerCase(); if (value === DATA_URL_DEFAULT_CHARSET) { return ''; } } return `${key}${value ? `=${value}` : ''}`; }) .filter(Boolean); const normalizedMediaType = [ ...attributes ]; if (base64) { normalizedMediaType.push('base64'); } if (normalizedMediaType.length !== 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { normalizedMediaType.unshift(mimeType); } return `data:${normalizedMediaType.join(';')},${base64 ? body.trim() : body}${hash ? `#${hash}` : ''}`; }; const normalizeUrl = (urlString, options) => { options = { defaultProtocol: 'http:', normalizeProtocol: true, forceHttp: false, forceHttps: false, stripAuthentication: true, stripHash: false, stripWWW: true, removeQueryParameters: [/^utm_\w+/i], removeTrailingSlash: true, removeDirectoryIndex: false, sortQueryParameters: true, ...options }; // TODO: Remove this at some point in the future if (Reflect.has(options, 'normalizeHttps')) { throw new Error('options.normalizeHttps is renamed to options.forceHttp'); } if (Reflect.has(options, 'normalizeHttp')) { throw new Error('options.normalizeHttp is renamed to options.forceHttps'); } if (Reflect.has(options, 'stripFragment')) { throw new Error('options.stripFragment is renamed to options.stripHash'); } urlString = urlString.trim(); // Data URL if (/^data:/i.test(urlString)) { return normalizeDataURL(urlString, options); } const hasRelativeProtocol = urlString.startsWith('//'); const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); // Prepend protocol if (!isRelativeUrl) { urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); } const urlObj = new URLParser(urlString); if (options.forceHttp && options.forceHttps) { throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); } if (options.forceHttp && urlObj.protocol === 'https:') { urlObj.protocol = 'http:'; } if (options.forceHttps && urlObj.protocol === 'http:') { urlObj.protocol = 'https:'; } // Remove auth if (options.stripAuthentication) { urlObj.username = ''; urlObj.password = ''; } // Remove hash if (options.stripHash) { urlObj.hash = ''; } // Remove duplicate slashes if not preceded by a protocol if (urlObj.pathname) { // TODO: Use the following instead when targeting Node.js 10 // `urlObj.pathname = urlObj.pathname.replace(/(?<!https?:)\/{2,}/g, '/');` urlObj.pathname = urlObj.pathname.replace(/((?!:).|^)\/{2,}/g, (_, p1) => { if (/^(?!\/)/g.test(p1)) { return `${p1}/`; } return '/'; }); } // Decode URI octets if (urlObj.pathname) { urlObj.pathname = decodeURI(urlObj.pathname); } // Remove directory index if (options.removeDirectoryIndex === true) { options.removeDirectoryIndex = [/^index\.[a-z]+$/]; } if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) { let pathComponents = urlObj.pathname.split('/'); const lastComponent = pathComponents[pathComponents.length - 1]; if (testParameter(lastComponent, options.removeDirectoryIndex)) { pathComponents = pathComponents.slice(0, pathComponents.length - 1); urlObj.pathname = pathComponents.slice(1).join('/') + '/'; } } if (urlObj.hostname) { // Remove trailing dot urlObj.hostname = urlObj.hostname.replace(/\.$/, ''); // Remove `www.` if (options.stripWWW && /^www\.([a-z\-\d]{2,63})\.([a-z.]{2,5})$/.test(urlObj.hostname)) { // Each label should be max 63 at length (min: 2). // The extension should be max 5 at length (min: 2). // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names urlObj.hostname = urlObj.hostname.replace(/^www\./, ''); } } // Remove query unwanted parameters if (Array.isArray(options.removeQueryParameters)) { for (const key of [...urlObj.searchParams.keys()]) { if (testParameter(key, options.removeQueryParameters)) { urlObj.searchParams.delete(key); } } } // Sort query parameters if (options.sortQueryParameters) { urlObj.searchParams.sort(); } if (options.removeTrailingSlash) { urlObj.pathname = urlObj.pathname.replace(/\/$/, ''); } // Take advantage of many of the Node `url` normalizations urlString = urlObj.toString(); // Remove ending `/` if ((options.removeTrailingSlash || urlObj.pathname === '/') && urlObj.hash === '') { urlString = urlString.replace(/\/$/, ''); } // Restore relative protocol, if applicable if (hasRelativeProtocol && !options.normalizeProtocol) { urlString = urlString.replace(/^http:\/\//, '//'); } // Remove http/https if (options.stripProtocol) { urlString = urlString.replace(/^(?:https?:)?\/\//, ''); } return urlString; }; module.exports = normalizeUrl; // TODO: Remove this for the next major release module.exports.default = normalizeUrl;