Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report Memory Consumption of the DF Graph #832

Merged
merged 6 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.heapsnapshot
doc/
*-flowr.log.*
statistics-out*/
Expand Down
9 changes: 9 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@
"n-readlines": "^1.0.1",
"n3": "^1.17.2",
"object-hash": "^3.0.0",
"object-sizeof": "^2.6.4",
"rotating-file-stream": "^3.1.1",
"semver": "^7.5.4",
"tar": "^7.1.0",
Expand Down
20 changes: 16 additions & 4 deletions src/benchmark/slicer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
* @module
*/


import type { IStoppableStopwatch } from './stopwatch'
import { Measurements } from './stopwatch'
import fs from 'fs'
Expand All @@ -16,6 +15,7 @@ import { PipelineExecutor } from '../core/pipeline-executor'
import { guard } from '../util/assert'
import { withoutWhitespace } from '../util/strings'
import type {
BenchmarkMemoryMeasurement,
CommonSlicerMeasurements,
ElapsedTime,
PerSliceMeasurements,
Expand All @@ -33,6 +33,7 @@ import type { SlicingCriteriaFilter } from '../slicing/criterion/collect-all'
import { collectAllSlicingCriteria } from '../slicing/criterion/collect-all'
import { RType } from '../r-bridge/lang-4.x/ast/model/type'
import { visitAst } from '../r-bridge/lang-4.x/ast/model/processing/visitor'
import { getSizeOfDfGraph } from './stats/size-of'

export const benchmarkLogger = log.getSubLogger({ name: 'benchmark' })

Expand Down Expand Up @@ -64,6 +65,7 @@ export interface BenchmarkSingleSliceStats extends MergeableRecord {
code: ReconstructionResult
}


/**
* A slicer that can be used to slice exactly one file (multiple times).
* It holds its own {@link RShell} instance, maintains a cached dataflow and keeps measurements.
Expand All @@ -75,8 +77,9 @@ export interface BenchmarkSingleSliceStats extends MergeableRecord {
*/
export class BenchmarkSlicer {
/** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
private readonly commonMeasurements = new Measurements<CommonSlicerMeasurements>()
private readonly commonMeasurements = new Measurements<CommonSlicerMeasurements>()
private readonly perSliceMeasurements = new Map<SlicingCriteria, PerSliceStats>()
private readonly deltas = new Map<CommonSlicerMeasurements, BenchmarkMemoryMeasurement>()
private readonly shell: RShell
private stats: SlicerStats | undefined
private loadedXml: string | undefined
Expand Down Expand Up @@ -164,6 +167,7 @@ export class BenchmarkSlicer {
this.stats = {
commonMeasurements: new Map<CommonSlicerMeasurements, ElapsedTime>(),
perSliceMeasurements: this.perSliceMeasurements,
memory: this.deltas,
request,
input: {
numberOfLines: split.length,
Expand All @@ -181,7 +185,8 @@ export class BenchmarkSlicer {
numberOfNodes: [...this.dataflow.graph.vertices(true)].length,
numberOfEdges: numberOfEdges,
numberOfCalls: numberOfCalls,
numberOfFunctionDefinitions: numberOfDefinitions
numberOfFunctionDefinitions: numberOfDefinitions,
sizeOfObject: getSizeOfDfGraph(this.dataflow.graph)
}
}
}
Expand Down Expand Up @@ -251,10 +256,17 @@ export class BenchmarkSlicer {
expectedStep: Step,
keyToMeasure: CommonSlicerMeasurements
): Promise<PipelineStepOutputWithName<typeof DEFAULT_SLICING_PIPELINE, Step>> {
const memoryInit = process.memoryUsage()
const { result } = await this.commonMeasurements.measureAsync(
keyToMeasure, () => this.pipeline.nextStep(expectedStep)
)

const memoryEnd = process.memoryUsage()
this.deltas.set(keyToMeasure, {
heap: memoryEnd.heapUsed - memoryInit.heapUsed,
rss: memoryEnd.rss - memoryInit.rss,
external: memoryEnd.external - memoryInit.external,
buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
})
return result as PipelineStepOutputWithName<typeof DEFAULT_SLICING_PIPELINE, Step>
}

Expand Down
19 changes: 17 additions & 2 deletions src/benchmark/stats/print.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,18 @@ function printCountSummarizedMeasurements(stats: SummarizedMeasurement): string
return `${range} (median: ${stats.median}, mean: ${stats.mean}, std: ${stats.std})`
}

const units = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']

// based on https://stackoverflow.com/a/39906526
function convertNumberToNiceBytes(x: number){
let n = Math.abs(x)
let l = 0
while(n >= 1024 && ++l){
n = n/1024
}
return pad((x < 0 ? '-' : '') + n.toFixed(n < 10 && l > 0 ? 1 : 0) + ' ' + units[l])
}

/**
* Converts the given stats to a human-readable string.
* You may have to {@link summarizeSlicerStats | summarize} the stats first.
Expand Down Expand Up @@ -133,7 +145,8 @@ Dataflow:
Number of nodes: ${pad(stats.dataflow.numberOfNodes)}
Number of edges: ${pad(stats.dataflow.numberOfEdges)}
Number of calls: ${pad(stats.dataflow.numberOfCalls)}
Number of function defs: ${pad(stats.dataflow.numberOfFunctionDefinitions)}`
Number of function defs: ${pad(stats.dataflow.numberOfFunctionDefinitions)}
Size of graph: ${convertNumberToNiceBytes(stats.dataflow.sizeOfObject)}`
}

export function ultimateStats2String(stats: UltimateSlicerStats): string {
Expand Down Expand Up @@ -173,7 +186,9 @@ Dataflow:
Number of nodes: ${formatSummarizedMeasure(stats.dataflow.numberOfNodes)}
Number of edges: ${formatSummarizedMeasure(stats.dataflow.numberOfEdges)}
Number of calls: ${formatSummarizedMeasure(stats.dataflow.numberOfCalls)}
Number of function defs: ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}`
Number of function defs: ${formatSummarizedMeasure(stats.dataflow.numberOfFunctionDefinitions)}
Size of graph: ${formatSummarizedMeasure(stats.dataflow.sizeOfObject, convertNumberToNiceBytes)}
`
}

function reduction2String(title: string, reduction: Reduction<SummarizedMeasurement>) {
Expand Down
72 changes: 72 additions & 0 deletions src/benchmark/stats/size-of.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import type { IEnvironment } from '../../dataflow/environments/environment'
import { BuiltInEnvironment } from '../../dataflow/environments/environment'
import type { DataflowGraph } from '../../dataflow/graph/graph'
import type { DataflowGraphVertexInfo } from '../../dataflow/graph/vertex'
import { VertexType } from '../../dataflow/graph/vertex'
import type { Identifier, IdentifierDefinition } from '../../dataflow/environments/identifier'
import sizeof from 'object-sizeof'

/* we have to kill all processors linked in the default environment as they cannot be serialized and they are shared anyway */
function killBuiltInEnv(env: IEnvironment | undefined): IEnvironment {
if(env === undefined) {
return undefined as unknown as IEnvironment
} else if(env.id === BuiltInEnvironment.id) {
/* in this case, the reference would be shared for sure */
return {
id: env.id,
parent: killBuiltInEnv(env.parent),
memory: new Map<Identifier, IdentifierDefinition[]>()
}
}

const memory = new Map<Identifier, IdentifierDefinition[]>()
for(const [k, v] of env.memory) {
memory.set(k, v.filter(v => !v.kind.startsWith('built-in') && !('processor' in v)))
}

return {
id: env.id,
parent: killBuiltInEnv(env.parent),
memory
}
}

/** Returns the size of the given df graph in bytes (without sharing in-memory) */
export function getSizeOfDfGraph(df: DataflowGraph): number {
const verts = []
for(const [, v] of df.vertices(true)) {
let vertex: DataflowGraphVertexInfo = v
if(vertex.environment) {
vertex = {
...vertex,
environment: {
...vertex.environment,
current: killBuiltInEnv(v.environment.current)
}
} as DataflowGraphVertexInfo
}

if(vertex.tag === VertexType.FunctionDefinition) {
vertex = {

Check warning on line 50 in src/benchmark/stats/size-of.ts

View check run for this annotation

Codecov / codecov/patch

src/benchmark/stats/size-of.ts#L50

Added line #L50 was not covered by tests
...vertex,
subflow: {
...vertex.subflow,
environment: {
...vertex.subflow.environment,
current: killBuiltInEnv(vertex.subflow.environment.current)
}
}
} as DataflowGraphVertexInfo
}

vertex = {
...vertex,
/* shared anyway by using constants */
tag: 0 as unknown
} as DataflowGraphVertexInfo

verts.push(vertex)
}

return sizeof([...verts, ...df.edges()])
}
20 changes: 20 additions & 0 deletions src/benchmark/stats/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { SingleSlicingCriterion, SlicingCriteria } from '../../slicing/crit
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'
import type { ReconstructionResult } from '../../reconstruct/reconstruct'
import type { RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever'
import type { MergeableRecord } from '../../util/objects'

export const CommonSlicerMeasurements = ['initialize R session', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'] as const
export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number]
Expand Down Expand Up @@ -38,6 +39,24 @@ export interface SlicerStatsDataflow<T = number> {
numberOfEdges: T
numberOfCalls: T
numberOfFunctionDefinitions: T
/* size of object in bytes as measured by v8 serialization */
sizeOfObject: T
}


/**
* Please note, that these measurement can be negative as there is no guarantee that the memory usage will increase
* due to, e.g., garbage collection.
*/
export interface BenchmarkMemoryMeasurement<T = number> extends MergeableRecord {
/* used heap memory delta as reported by the node process in bytes */
heap: T
/* resident set size delta as reported by the node process in bytes */
rss: T
/* external memory delta as reported by the node process in bytes */
external: T
/* (array) buffer memory delta as reported by the node process in bytes */
buffs: T
}

/**
Expand All @@ -46,6 +65,7 @@ export interface SlicerStatsDataflow<T = number> {
export interface SlicerStats {
commonMeasurements: Map<CommonSlicerMeasurements, ElapsedTime>
perSliceMeasurements: Map<SlicingCriteria, PerSliceStats>
memory: Map<CommonSlicerMeasurements, BenchmarkMemoryMeasurement>,
request: RParseRequestFromFile | RParseRequestFromText
input: SlicerStatsInput
dataflow: SlicerStatsDataflow
Expand Down
15 changes: 11 additions & 4 deletions src/benchmark/summarizer/first-phase/input.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { guard } from '../../../util/assert'
import { escape } from '../../../util/ansi'
import { jsonReplacer } from '../../../util/json'
import { readLineByLineSync } from '../../../util/files'
import type { CommonSlicerMeasurements, PerSliceMeasurements, PerSliceStats, SlicerStats } from '../../stats/stats'
import type { BenchmarkMemoryMeasurement, CommonSlicerMeasurements, PerSliceMeasurements, PerSliceStats, SlicerStats } from '../../stats/stats'
import type { SlicingCriteria } from '../../../slicing/criterion/parse'
import { stats2string } from '../../stats/print'

Expand All @@ -27,9 +27,13 @@ export async function processRunMeasurement(line: Buffer, fileNum: number, lineN
'file-id': got['file-id'],
'run-num': got['run-num'],
stats: {
input: got.stats.input,
request: got.stats.request,
dataflow: got.stats.dataflow,
input: got.stats.input,
request: got.stats.request,
dataflow: got.stats.dataflow,
memory: new Map(
(got.stats.memory as unknown as [CommonSlicerMeasurements, BenchmarkMemoryMeasurement][])
.map(([k, v]) => [k, v])
),
commonMeasurements: new Map(
(got.stats.commonMeasurements as unknown as [CommonSlicerMeasurements, string][])
.map(([k, v]) => {
Expand All @@ -49,6 +53,9 @@ export async function processRunMeasurement(line: Buffer, fileNum: number, lineN
let atSliceNumber = 0
const summarized = await summarizeSlicerStats(got.stats, (criterion, stats) => {
console.log(`${escape}1F${escape}1G${escape}2K [${++atSliceNumber}/${totalSlices}] Summarizing ${JSON.stringify(criterion)} (reconstructed has ${stats.reconstructedCode.code.length} characters)`)
if(stats.reconstructedCode.code.length < 50) {
console.log(`Reconstructed code: ${stats.reconstructedCode.code}`)
}
})

console.log(` - Append raw summary to ${outputPath}`)
Expand Down
1 change: 1 addition & 0 deletions src/benchmark/summarizer/first-phase/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@
}

export function summarizeSummarizedMeasurement(data: SummarizedMeasurement[]): SummarizedMeasurement {
data = data.filter(isNotUndefined)

Check warning on line 225 in src/benchmark/summarizer/first-phase/process.ts

View check run for this annotation

Codecov / codecov/patch

src/benchmark/summarizer/first-phase/process.ts#L225

Added line #L225 was not covered by tests
const min = data.map(d => d.min).filter(isNotUndefined).reduce((a, b) => Math.min(a, b), Infinity)
const max = data.map(d => d.max).filter(isNotUndefined).reduce((a, b) => Math.max(a, b), -Infinity)
// calculate median of medians (don't just average the median!)
Expand Down
30 changes: 20 additions & 10 deletions src/benchmark/summarizer/second-phase/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type { SummarizedMeasurement } from '../../../util/summarizer'
import { summarizeMeasurement } from '../../../util/summarizer'
import { guard } from '../../../util/assert'
import type {
BenchmarkMemoryMeasurement,
SlicerStatsDataflow,
SlicerStatsInput
} from '../../stats/stats'
Expand All @@ -16,6 +17,7 @@ import {
export function summarizeAllSummarizedStats(stats: SummarizedSlicerStats[]): UltimateSlicerStats {
const commonMeasurements = new DefaultMap<CommonSlicerMeasurements, number[]>(() => [])
const perSliceMeasurements = new DefaultMap<PerSliceMeasurements, SummarizedMeasurement[]>(() => [])
const memory = new DefaultMap<CommonSlicerMeasurements, BenchmarkMemoryMeasurement[]>(() => [])
const reductions: Reduction<SummarizedMeasurement>[] = []
const reductionsNoFluff: Reduction<SummarizedMeasurement>[] = []
const inputs: SlicerStatsInput[] = []
Expand All @@ -31,6 +33,9 @@ export function summarizeAllSummarizedStats(stats: SummarizedSlicerStats[]): Ult
for(const [k, v] of stat.perSliceMeasurements.measurements) {
perSliceMeasurements.get(k).push(v)
}
for(const [k, v] of stat.memory) {
memory.get(k).push(v)
}
reductions.push(stat.perSliceMeasurements.reduction)
reductionsNoFluff.push(stat.perSliceMeasurements.reductionNoFluff)
inputs.push(stat.input)
Expand Down Expand Up @@ -69,19 +74,19 @@ export function summarizeAllSummarizedStats(stats: SummarizedSlicerStats[]): Ult
numberOfNodes: summarizeMeasurement(dataflows.map(d => d.numberOfNodes)),
numberOfFunctionDefinitions: summarizeMeasurement(dataflows.map(d => d.numberOfFunctionDefinitions)),
numberOfCalls: summarizeMeasurement(dataflows.map(d => d.numberOfCalls)),
numberOfEdges: summarizeMeasurement(dataflows.map(d => d.numberOfEdges))
numberOfEdges: summarizeMeasurement(dataflows.map(d => d.numberOfEdges)),
sizeOfObject: summarizeMeasurement(dataflows.map(d => d.sizeOfObject))
}
}
}

export function summarizeAllUltimateStats(stats: UltimateSlicerStats[]): UltimateSlicerStats {
return {
// these should be deterministic, so we don't technically need to use max, but we do just in case something unexpected happens :)
totalRequests: Math.max(...stats.map(s => s.totalRequests)),
totalSlices: Math.max(...stats.map(s => s.totalSlices)),
failedToRepParse: Math.max(...stats.map(s => s.failedToRepParse)),
timesHitThreshold: Math.max(...stats.map(s => s.timesHitThreshold)),

totalRequests: Math.max(...stats.map(s => s.totalRequests)),
totalSlices: Math.max(...stats.map(s => s.totalSlices)),
failedToRepParse: Math.max(...stats.map(s => s.failedToRepParse)),
timesHitThreshold: Math.max(...stats.map(s => s.timesHitThreshold)),
// average out / summarize other measurements
commonMeasurements: new Map(CommonSlicerMeasurements.map(m => [m, summarizeSummarizedMeasurement(stats.map(s => s.commonMeasurements.get(m) as SummarizedMeasurement))])),
perSliceMeasurements: new Map(PerSliceMeasurements.map(m => [m, summarizeSummarizedMeasurement(stats.map(s => s.perSliceMeasurements.get(m) as SummarizedMeasurement))])),
Expand All @@ -103,7 +108,8 @@ export function summarizeAllUltimateStats(stats: UltimateSlicerStats[]): Ultimat
numberOfNodes: summarizeSummarizedMeasurement(stats.map(s => s.dataflow.numberOfNodes)),
numberOfFunctionDefinitions: summarizeSummarizedMeasurement(stats.map(s => s.dataflow.numberOfFunctionDefinitions)),
numberOfCalls: summarizeSummarizedMeasurement(stats.map(s => s.dataflow.numberOfCalls)),
numberOfEdges: summarizeSummarizedMeasurement(stats.map(s => s.dataflow.numberOfEdges))
numberOfEdges: summarizeSummarizedMeasurement(stats.map(s => s.dataflow.numberOfEdges)),
sizeOfObject: summarizeSummarizedMeasurement(stats.map(s => s.dataflow.sizeOfObject))
}
}
}
Expand All @@ -112,9 +118,13 @@ export function processNextSummary(line: Buffer, allSummarized: SummarizedSlicer
let got = JSON.parse(line.toString()) as { summarize: SummarizedSlicerStats }
got = {
summarize: {
input: got.summarize.input,
request: got.summarize.request,
dataflow: got.summarize.dataflow,
input: got.summarize.input,
request: got.summarize.request,
dataflow: got.summarize.dataflow,
memory: new Map(
(got.summarize.memory as unknown as [CommonSlicerMeasurements, BenchmarkMemoryMeasurement][])
.map(([k, v]) => [k, v])
),
commonMeasurements: new Map(
(got.summarize.commonMeasurements as unknown as [CommonSlicerMeasurements, string][])
.map(([k, v]) => {
Expand Down
Loading
Loading