Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalid URL Fix: Decoding manually for some reserved characters #1682

Merged
merged 14 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/core/src/discovery.js
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ export function createDiscoveryQueue(percy) {
authorization: snapshot.discovery.authorization,
userAgent: snapshot.discovery.userAgent,
captureMockedServiceWorker: snapshot.discovery.captureMockedServiceWorker,
meta: snapshot.meta,
meta: { ...snapshot.meta, snapshotURL: snapshot.url },
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • We are adding a snapshot url there, to use it in network.js


// enable network inteception
intercept: {
Expand Down
10 changes: 9 additions & 1 deletion packages/core/src/network.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { request as makeRequest } from '@percy/client/utils';
import logger from '@percy/logger';
import mime from 'mime-types';
import { DefaultMap, createResource, hostnameMatches, normalizeURL, waitFor } from './utils.js';
import { DefaultMap, createResource, hostnameMatches, normalizeURL, waitFor, decodeAndEncodeURLWithLogging } from './utils.js';

const MAX_RESOURCE_SIZE = 25 * (1024 ** 2); // 25MB
const ALLOWED_STATUSES = [200, 201, 301, 302, 304, 307, 308];
Expand Down Expand Up @@ -207,6 +207,14 @@ export class Network {
// do not handle data urls
if (request.url.startsWith('data:')) return;

// Browsers handle URL encoding leniently.
// This code checks for issues such as `%` and leading spaces and warns the user accordingly.
decodeAndEncodeURLWithLogging(request.url, this.log, {
meta: { ...this.meta, url: request.url },
shouldLogWarning: request.url !== this.meta?.snapshotURL,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Since snapshot.url also goes via network.js, so if snapshot.url is invalid the error message is already been shown before so no need to show again. Only show for assets urls

warningMessage: `An invalid URL was detected for url: ${request.url} - the snapshot may fail on Percy. Please verify that your asset URL is valid.`
});

if (this.intercept) {
this.#pending.set(requestId, event);
if (this.captureMockedServiceWorker) {
Expand Down
20 changes: 19 additions & 1 deletion packages/core/src/snapshot.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import {
request,
hostnameMatches,
yieldTo,
snapshotLogName
snapshotLogName,
decodeAndEncodeURLWithLogging
} from './utils.js';
import { JobData } from './wait-for-job.js';

Expand All @@ -24,6 +25,21 @@ function validURL(url, base) {
}
}

function validateAndFixSnapshotUrl(snapshot) {
let log = logger('core:snapshot');
// encoding snapshot url, if contians invalid URI characters/syntax
let modifiedURL = decodeAndEncodeURLWithLogging(snapshot.url, log, {
meta: { snapshot: { name: snapshot.name || snapshot.url } },
shouldLogWarning: true,
warningMessage: `Invalid URL detected for url: ${snapshot.url} - the snapshot may fail on Percy. Please confirm that your website URL is valid.`
});

if (modifiedURL !== snapshot.url) {
log.debug(`Snapshot URL modified to: ${modifiedURL}`);
snapshot.url = modifiedURL;
}
}

// used to deserialize regular expression strings
const RE_REGEXP = /^\/(.+)\/(\w+)?$/;

Expand Down Expand Up @@ -86,6 +102,8 @@ function mapSnapshotOptions(snapshots, context) {
// transform snapshot URL shorthand into an object
if (typeof snapshot === 'string') snapshot = { url: snapshot };

if (process.env.PERCY_MODIFY_SNAPSHOT_URL !== 'false') validateAndFixSnapshotUrl(snapshot);

// normalize the snapshot url and use it for the default name
let url = validURL(snapshot.url, context?.baseUrl);
snapshot.name ||= `${url.pathname}${url.search}${url.hash}`;
Expand Down
70 changes: 70 additions & 0 deletions packages/core/src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,76 @@ export function base64encode(content) {
.toString('base64');
}

const RESERVED_CHARACTERS = {
'%3A': ':',
'%23': '#',
'%24': '$',
'%26': '&',
'%2B': '+',
'%2C': ',',
'%2F': '/',
'%3B': ';',
'%3D': '=',
'%3F': '?',
'%40': '@'
};

function _replaceReservedCharactersWithPlaceholder(url) {
let result = url;
let matchedPattern = {};
let placeHolderCount = 0;
for (let key of Object.keys(RESERVED_CHARACTERS)) {
let regex = new RegExp(key, 'g');
if (regex.test(result)) {
let placeholder = `__PERCY_PLACEHOLDER_${placeHolderCount}__`;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using __PERCY_PLACEHOLDER_0__ as a placeholder, as these alphanumerical and _ doesn't get encoded as it is a part of a valid URL and not a part of reserved characters.

result = result.replace(regex, placeholder);
matchedPattern[placeholder] = key;
placeHolderCount++;
}
}
return { url: result, matchedPattern };
}

function _replacePlaceholdersWithReservedCharacters(matchedPattern, url) {
let result = url;
for (let [key, value] of Object.entries(matchedPattern)) {
let regex = new RegExp(key, 'g');
result = result.replace(regex, value);
}
return result;
}

// This function replaces invalid character that are not the
// part of valid URI syntax with there correct encoded value.
// Also, if a character is a part of valid URI syntax, those characters
// are not encoded
// Eg: [abc] -> gets encoded to %5Babc%5D
// ab c -> ab%20c
export function decodeAndEncodeURLWithLogging(url, logger, options = {}) {
// In case the url is partially encoded, then directly using encodeURI()
// will encode those characters again. Therefore decodeURI once helps is decoding
// partially encoded URL and then after encoding it again, full URL get encoded
// correctly.
const {
meta,
shouldLogWarning,
warningMessage
} = options;
try {
let { url: placeholderURL, matchedPattern } = _replaceReservedCharactersWithPlaceholder(url);
let decodedURL = decodeURI(placeholderURL);
let encodedURL = encodeURI(decodedURL);
encodedURL = _replacePlaceholdersWithReservedCharacters(matchedPattern, encodedURL);
return encodedURL;
} catch (error) {
logger.debug(error, meta);
if (error.name === 'URIError' && shouldLogWarning) {
logger.warn(warningMessage);
}
return url;
}
}

export function snapshotLogName(name, meta) {
if (meta?.snapshot?.testCase) {
return `testCase: ${meta.snapshot.testCase}, ${name}`;
Expand Down
43 changes: 43 additions & 0 deletions packages/core/test/discovery.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,49 @@ describe('Discovery', () => {
]));
});

it('debug error log only for invalid network url', async () => {
let err = new Error('Some Invalid Error');
spyOn(global, 'decodeURI').and.callFake((url) => {
if (url === 'http://localhost:8000/style.css') {
throw err;
}
return url;
});

percy.loglevel('debug');
await percy.snapshot({
name: 'test snapshot',
url: 'http://localhost:8000',
domSnapshot: testDOM
});

expect(logger.stderr).toEqual(jasmine.arrayContaining([
'[percy:core:page] Navigate to: http://localhost:8000/',
'[percy:core:discovery] Handling request: http://localhost:8000/',
'[percy:core:discovery] - Serving root resource',
`[percy:core:discovery] ${err.stack}`,
'[percy:core:discovery] Handling request: http://localhost:8000/style.css',
'[percy:core:discovery] Handling request: http://localhost:8000/img.gif'
]));
});

it('detect invalid network url', async () => {
percy.loglevel('debug');
await percy.snapshot({
name: 'test snapshot',
url: 'http://localhost:8000',
domSnapshot: testDOM.replace('style.css', 'http://localhost:404/%style.css')
});

expect(logger.stderr).toEqual(jasmine.arrayContaining([
'[percy:core:discovery] Handling request: http://localhost:8000/',
'[percy:core:discovery] - Serving root resource',
'[percy:core:discovery] An invalid URL was detected for url: http://localhost:404/%style.css - the snapshot may fail on Percy. Please verify that your asset URL is valid.',
'[percy:core:discovery] Handling request: http://localhost:404/%style.css',
'[percy:core:discovery] Handling request: http://localhost:8000/img.gif'
]));
});

it('allows setting a custom discovery user-agent', async () => {
let userAgent;

Expand Down
143 changes: 143 additions & 0 deletions packages/core/test/snapshot.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,149 @@ describe('Snapshot', () => {
]);
});

describe('snapshot urls', () => {
beforeEach(() => {
percy.loglevel('debug');
});

describe('with invalid snapshot URL', () => {
const sharedExpectModifiedSnapshotURL = (expectedURL) => {
expect(logger.stderr).toEqual(jasmine.arrayContaining([
`[percy:core:snapshot] Snapshot URL modified to: ${expectedURL}`
]));
};

it('modifies if url contains space', async () => {
await percy.snapshot([
{ url: 'http://localhost:8000/ a' }
]);

sharedExpectModifiedSnapshotURL('http://localhost:8000/%20a');
});

it('modifies if url contains [ ] to %5B and %5D', async () => {
let givenURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dcb7fd081043/results/[%229db14794-47f0-406a-a3b4-3f89280122b7%22]/structures/by-ccdc-number/123457';
await percy.snapshot([
{ url: givenURL }
]);

sharedExpectModifiedSnapshotURL('http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dcb7fd081043/results/%5B%229db14794-47f0-406a-a3b4-3f89280122b7%22%5D/structures/by-ccdc-number/123457');
});

it('modifies if url contains {} to %7B and %7D', async () => {
let givenURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-{ abc [dbd] }/2253';
await percy.snapshot([
{ url: givenURL }
]);

sharedExpectModifiedSnapshotURL('http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-%7B%20abc%20%5Bdbd%5D%20%7D/2253');
});

it('modifies if url is partially encoded', async () => {
// Here some character are encoded properly, this test ensure those encoed character should not gets encoded again
let partiallyEncodedURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-%7B%20abc%20[dbd]%20%7D/2253';
await percy.snapshot([
{ url: partiallyEncodedURL }
]);

sharedExpectModifiedSnapshotURL('http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dctwo-%7B%20abc%20%5Bdbd%5D%20%7D/2253');
});

it('do not encode reserved URI character(%2F) in snapshot url', async () => {
let partiallyEncodedURL = 'http://localhost:8000/https%2F/abc[123].html';
await percy.snapshot([
{ url: partiallyEncodedURL }
]);

sharedExpectModifiedSnapshotURL('http://localhost:8000/https%2F/abc%5B123%5D.html');
});

it('do not double encode if multiple reserved URI characters in snapshot url', async () => {
let partiallyEncodedURL = 'http://localhost:8000/https%2F/abc[123]%3A%3F_abc.html';
await percy.snapshot([
{ url: partiallyEncodedURL }
]);

sharedExpectModifiedSnapshotURL('http://localhost:8000/https%2F/abc%5B123%5D%3A%3F_abc.html');
});

describe('does not modifies snapshot url', () => {
let partiallyEncodedURL = 'http://localhost:8000/https%2F/abc[123].html';
const sharedExpect = () => {
expect(logger.stderr).toEqual(jasmine.arrayContaining([
'[percy:core:snapshot] Received snapshot: /https%2F/abc[123].html',
'[percy:core:snapshot] - url: http://localhost:8000/https%2F/abc[123].html'
]));

expect(logger.stderr).not.toEqual(jasmine.arrayContaining([
'[percy:core:snapshot] Snapshot URL modified to: http://localhost:8000/https//abc%5B123%5D.html'
]));
};

describe('with PERCY_MODIFY_SNAPSHOT_URL=false', () => {
beforeEach(() => {
process.env.PERCY_MODIFY_SNAPSHOT_URL = 'false';
});

afterEach(() => {
process.env.PERCY_MODIFY_SNAPSHOT_URL = 'true';
});

it('uses original snapshot url', async () => {
await percy.snapshot([
{ url: partiallyEncodedURL }
]);

sharedExpect();
});
});
});

it('warns for invalid characters that can not get encoded properly', async () => {
let givenURL = 'http://localhost:8000/advanced-search/cf1a5848%-f658-4939-be11-dct';
await percy.snapshot([
{ url: givenURL }
]);

expect(logger.stderr).toEqual(jasmine.arrayContaining([
`[percy:core:snapshot] Invalid URL detected for url: ${givenURL} - the snapshot may fail on Percy. Please confirm that your website URL is valid.`
]));
});

it('debug logs and does not warn if error other than URIError', async () => {
spyOn(global, 'decodeURI').and.callFake(() => {
throw new Error('Some Invalid Error');
});

let givenURL = 'http://localhost:8000/';
await percy.snapshot([
{ url: givenURL }
]);

expect(logger.stderr).not.toEqual(jasmine.arrayContaining([
`[percy] Invalid URL detected for url: ${givenURL}`
]));
});
});

describe('with valid snpashot url', () => {
const sharedExpectNonModifiedSnapshotURL = (expectedURL) => {
expect(logger.stdout).not.toEqual(jasmine.arrayContaining([
`[percy] Snapshot URL modified to: ${expectedURL}`
]));
};

it('not modifies if the url does not invalid characters', async () => {
let givenURL = 'http://localhost:8000/advanced-search/cf1a5848-f658-4939-be11-dx3';
await percy.snapshot([
{ url: givenURL }
]);

sharedExpectNonModifiedSnapshotURL(givenURL);
});
});
});

it('errors if the url is invalid', async () => {
await percy.snapshot({
name: 'test snapshot',
Expand Down
48 changes: 48 additions & 0 deletions packages/core/test/utils.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { decodeAndEncodeURLWithLogging } from '../src/utils.js';
import { logger } from './helpers/index.js';
import percyLogger from '@percy/logger';

describe('utils', () => {
let log;
beforeEach(async () => {
log = percyLogger();
logger.reset(true);
await logger.mock({ level: 'debug' });
});

describe('decodeAndEncodeURLWithLogging', () => {
it('does not warn invalid url when options params is null', async () => {
decodeAndEncodeURLWithLogging('https://abc.com/test%abc', log);
expect(logger.stderr).toEqual([]);
});

it('does not warn invalid url when shouldLogWarning = false', async () => {
decodeAndEncodeURLWithLogging('https://abc.com/test%abc', log,
{
shouldLogWarning: false
});

expect(logger.stderr).toEqual([]);
});

it('returns modified url', async () => {
const url = decodeAndEncodeURLWithLogging('https://abc.com/test[ab]c', log,
{
shouldLogWarning: false
});
expect(logger.stderr).toEqual([]);
expect(url).toEqual('https://abc.com/test%5Bab%5Dc');
});

it('warns if invalid url when shouldLogWarning = true', async () => {
decodeAndEncodeURLWithLogging(
'https://abc.com/test%abc',
log,
{
shouldLogWarning: true,
warningMessage: 'some Warning Message'
});
expect(logger.stderr).toEqual(['[percy] some Warning Message']);
});
});
});
Loading