Skip to content

Commit

Permalink
feat: Capture redirected assets (#349)
Browse files Browse the repository at this point in the history
* WIP: Capture redirected assets in asset discovery

axios will follow the redirect chain for us, so we can just request the asset
and get the content back. Then save that content with the original requests URL.

* Refactor common code out into shared methods, add inline docs

* Send the correct content type, make sure all file types work

* Pull status code array out to a constant var

* Match axois max redirect length, dont capture dupe redirected assets

* Update test & comment misspelling

* Peer review feedback
  • Loading branch information
Robdel12 committed Sep 19, 2019
1 parent 4a81eaa commit aa918cc
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 24 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"cross-spawn": "^6.0.5",
"deepmerge": "^4.0.0",
"express": "^4.16.3",
"follow-redirects": "^1.9.0",
"generic-pool": "^3.7.1",
"globby": "^10.0.1",
"js-yaml": "^3.13.1",
Expand Down
113 changes: 90 additions & 23 deletions src/services/response-service.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import Axios from 'axios'
import * as crypto from 'crypto'
// @ts-ignore
import * as followRedirects from 'follow-redirects'
import * as fs from 'fs'
import * as os from 'os'
import * as path from 'path'
Expand All @@ -9,10 +12,11 @@ import Constants from './constants'
import PercyClientService from './percy-client-service'
import ResourceService from './resource-service'

const REDIRECT_STATUSES = [301, 302, 304, 307, 308]
const ALLOWED_RESPONSE_STATUSES = [200, 201, ...REDIRECT_STATUSES]

export default class ResponseService extends PercyClientService {
resourceService: ResourceService

readonly ALLOWED_RESPONSE_STATUSES = [200, 201, 304]
responsesProcessed: Map<string, string> = new Map()
allowedHostnames: string[]

Expand Down Expand Up @@ -52,6 +56,7 @@ export default class ResponseService extends PercyClientService {

const request = response.request()
const parsedRootResourceUrl = new URL(rootResourceUrl)
const isRedirect = REDIRECT_STATUSES.includes(response.status())
const rootUrl = `${parsedRootResourceUrl.protocol}//${parsedRootResourceUrl.host}`

if (request.url() === rootResourceUrl) {
Expand All @@ -60,7 +65,7 @@ export default class ResponseService extends PercyClientService {
return
}

if (!this.ALLOWED_RESPONSE_STATUSES.includes(response.status())) {
if (!ALLOWED_RESPONSE_STATUSES.includes(response.status())) {
// Only allow 2XX responses:
logger.debug(`Skipping [disallowed_response_status_${response.status()}] [${width} px]: ${response.url()}`)
return
Expand All @@ -72,45 +77,107 @@ export default class ResponseService extends PercyClientService {
return
}

if (!this.shouldCaptureResource(rootUrl, response.url())) {
// Disallow remote redirects.
logger.debug(`Skipping [is_remote_redirect] [${width} px]: ${response.url()}`)
return
if (isRedirect) {
// We don't want to follow too deep of a chain
// `followRedirects` is the npm package axios uses to follow redirected requests
// we'll use their max redirect setting as a guard here
if (request.redirectChain().length > followRedirects.maxRedirects) {
logger.debug(`Skipping [redirect_too_deep: ${request.redirectChain().length}] [${width} px]: ${response.url()}`)
return
}

const redirectedURL = `${rootUrl}${response.headers().location}` as string
return this.handleRedirectResouce(url, redirectedURL, width, logger)
}

const localCopy = await this.makeLocalCopy(response, logger)
return this.handlePuppeteerResource(url, response, width, logger)
}

/**
* Handle processing and saving a resource that has a redirect chain. This
* will download the resource from node, and save the content as the orignal
* requesting url. This works since axios follows the redirect chain
* automatically.
*/
async handleRedirectResouce(originalURL: string, redirectedURL: string, width: number, logger: any) {
logger.debug(`Making local copy of redirected response: ${originalURL}`)

const { data, headers } = await Axios(originalURL, { responseType: 'arraybuffer' }) as any
const buffer = Buffer.from(data)
const sha = crypto.createHash('sha256').update(buffer).digest('hex')
const localCopy = path.join(os.tmpdir(), sha)
const didWriteFile = this.maybeWriteFile(localCopy, buffer)
const { fileIsTooLarge, responseBodySize } = this.checkFileSize(localCopy)

const responseBodySize = fs.statSync(localCopy).size
if (responseBodySize > Constants.MAX_FILE_SIZE_BYTES) {
// Skip large resources
logger.debug(`Skipping [max_file_size_exceeded_${responseBodySize}] [${width} px]: ${response.url()}`)
if (!didWriteFile) {
logger.debug(`Skipping file copy [already_copied]: ${originalURL}`)
}

if (fileIsTooLarge) {
logger.debug(`Skipping [max_file_size_exceeded_${responseBodySize}] [${width} px]: ${originalURL}`)
return
}

const contentType = response.headers()['content-type']
const resource = this.resourceService.createResourceFromFile(url, localCopy, contentType, logger)
this.responsesProcessed.set(url, resource)
const contentType = headers['content-type'] as string
const resource = this.resourceService.createResourceFromFile(originalURL, localCopy, contentType, logger)

this.responsesProcessed.set(originalURL, resource)
this.responsesProcessed.set(redirectedURL, resource)

return resource
}

async makeLocalCopy(response: puppeteer.Response, logger: any): Promise<string> {
/**
* Handle processing and saving a resource coming from Puppeteer. This will
* take the response object from Puppeteer and save the asset locally.
*/
async handlePuppeteerResource(url: string, response: puppeteer.Response, width: number, logger: any) {
logger.debug(`Making local copy of response: ${response.url()}`)

const buffer = await response.buffer()
const sha = crypto.createHash('sha256').update(buffer).digest('hex')
const filename = path.join(this.tmpDir(), sha)
const localCopy = path.join(os.tmpdir(), sha)
const didWriteFile = this.maybeWriteFile(localCopy, buffer)

if (!fs.existsSync(filename)) {
fs.writeFileSync(filename, buffer)
} else {
if (!didWriteFile) {
logger.debug(`Skipping file copy [already_copied]: ${response.url()}`)
}

return filename
const contentType = response.headers()['content-type']
const { fileIsTooLarge, responseBodySize } = this.checkFileSize(localCopy)

if (fileIsTooLarge) {
logger.debug(`Skipping [max_file_size_exceeded_${responseBodySize}] [${width} px]: ${response.url()}`)
return
}

const resource = this.resourceService.createResourceFromFile(url, localCopy, contentType, logger)
this.responsesProcessed.set(url, resource)

return resource
}

tmpDir(): string {
return os.tmpdir()
/**
* Write a local copy of the SHA only if it doesn't exist on the file system
* already.
*/
maybeWriteFile(filePath: string, buffer: any): boolean {
if (!fs.existsSync(filePath)) {
fs.writeFileSync(filePath, buffer)
return true
} else {
return false
}
}

/**
* Ensures the saved file is not larger than what the Percy API accepts. It
* returns if the file is too large, as well as the files size.
*/
checkFileSize(filePath: string) {
const responseBodySize = fs.statSync(filePath).size
const fileIsTooLarge = responseBodySize > Constants.MAX_FILE_SIZE_BYTES

return { fileIsTooLarge, responseBodySize }
}
}
8 changes: 8 additions & 0 deletions test/integration/agent-integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ describe('Integration test', () => {
expect(domSnapshot).contains('Buildkite')
})

it('snapshots a site with redirected assets', async () => {
await page.goto('https://sdk-test.percy.dev/redirects/')
await page.waitFor('.note')
const domSnapshot = await snapshot(page, 'Redirects snapshot')

expect(domSnapshot).contains('correctly snapshotted')
})

})

describe('on local test cases', () => {
Expand Down
9 changes: 8 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3217,7 +3217,7 @@ debug@3.1.0, debug@=3.1.0, debug@~3.1.0:
dependencies:
ms "2.0.0"

debug@3.2.6, debug@^3.1.0, debug@^3.2.6:
debug@3.2.6, debug@^3.0.0, debug@^3.1.0, debug@^3.2.6:
version "3.2.6"
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.6.tgz#e83d17de16d8a7efb7717edbe5fb10135eee629b"
integrity sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==
Expand Down Expand Up @@ -4339,6 +4339,13 @@ follow-redirects@^1.0.0:
dependencies:
debug "^3.2.6"

follow-redirects@^1.9.0:
version "1.9.0"
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.9.0.tgz#8d5bcdc65b7108fe1508649c79c12d732dcedb4f"
integrity sha512-CRcPzsSIbXyVDl0QI01muNDu69S8trU4jArW9LpOt2WtC6LyUJetcIrmfHsRBx7/Jb6GHJUiuqyYxPooFfNt6A==
dependencies:
debug "^3.0.0"

for-in@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/for-in/-/for-in-1.0.2.tgz#81068d295a8142ec0ac726c6e2200c30fb6d5e80"
Expand Down

0 comments on commit aa918cc

Please sign in to comment.