Skip to content

Commit

Permalink
Magic Comments for the Slicing (#895)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce committed Aug 20, 2024
2 parents 104b2f4 + c377e5c commit 28fe74f
Show file tree
Hide file tree
Showing 19 changed files with 315 additions and 74 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/deploy-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Upload the compressed image to the workflow
uses: actions/upload-artifact@v4
with:
path: flowr-$THE_FLOWR_VERSION.tar.gz
6 changes: 4 additions & 2 deletions src/benchmark/slicer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import { collectAllSlicingCriteria } from '../slicing/criterion/collect-all'
import { RType } from '../r-bridge/lang-4.x/ast/model/type'
import { visitAst } from '../r-bridge/lang-4.x/ast/model/processing/visitor'
import { getSizeOfDfGraph } from './stats/size-of'
import type { AutoSelectPredicate } from '../reconstruct/auto-select/auto-select-defaults'

/**
* The logger to be used for benchmarking as a global object.
Expand Down Expand Up @@ -105,13 +106,14 @@ export class BenchmarkSlicer {
* Initialize the slicer on the given request.
* Can only be called once for each instance.
*/
public async init(request: RParseRequestFromFile | RParseRequestFromText) {
public async init(request: RParseRequestFromFile | RParseRequestFromText, autoSelectIf?: AutoSelectPredicate) {
guard(this.stats === undefined, 'cannot initialize the slicer twice')

this.pipeline = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, {
shell: this.shell,
request: { ...request },
criterion: []
criterion: [],
autoSelectIf
})

this.loadedXml = await this.measureCommonStep('parse', 'retrieve AST from R code')
Expand Down
19 changes: 10 additions & 9 deletions src/cli/common/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,16 @@ export const exportQuadsOptions: OptionDefinition[] = [
]

export const slicerOptions: OptionDefinition[] = [
{ name: 'verbose', alias: 'v', type: Boolean, description: 'Run with verbose logging' },
{ name: 'help', alias: 'h', type: Boolean, description: 'Print this usage guide' },
{ name: 'input', alias: 'i', type: String, description: '(Required) Pass a single file to slice', multiple: false, defaultOption: true, typeLabel: '{underline files}' },
{ name: 'input-is-text', alias: 'r', type: Boolean, description: 'Indicate, that the input is *not* a file, but R code to directly consume' },
{ name: 'diff', alias: 'd', type: Boolean, description: 'This requires ansi-output and only works if the api option is not set. It visualizes the slice as a diff.' },
{ name: 'criterion', alias: 'c', type: String, description: '(Required) Slicing criterion either in the form {underline line:col} or {underline line@variable}, multiple can be separated by \'{bold ;}\'. If you do not want to slice but only process the file, pass an empty string.', multiple: false },
{ name: 'stats', alias: 's', type: Boolean, description: 'Print stats and write them to {italic <output>.stats} (runtimes etc.)', multiple: false },
{ name: 'output', alias: 'o', type: String, description: 'File to write all the generated quads to (defaults to the commandline)', typeLabel: '{underline file}' },
{ name: 'api', type: Boolean, description: 'Instead of human-readable output, dump a lot of json with the results of all intermediate steps.' },
{ name: 'verbose', alias: 'v', type: Boolean, description: 'Run with verbose logging' },
{ name: 'help', alias: 'h', type: Boolean, description: 'Print this usage guide' },
{ name: 'input', alias: 'i', type: String, description: '(Required) Pass a single file to slice', multiple: false, defaultOption: true, typeLabel: '{underline files}' },
{ name: 'input-is-text', alias: 'r', type: Boolean, description: 'Indicate, that the input is *not* a file, but R code to directly consume' },
{ name: 'diff', alias: 'd', type: Boolean, description: 'This requires ansi-output and only works if the api option is not set. It visualizes the slice as a diff.' },
{ name: 'criterion', alias: 'c', type: String, description: '(Required) Slicing criterion either in the form {underline line:col} or {underline line@variable}, multiple can be separated by \'{bold ;}\'. If you do not want to slice but only process the file, pass an empty string.', multiple: false },
{ name: 'stats', alias: 's', type: Boolean, description: 'Print stats and write them to {italic <output>.stats} (runtimes etc.)', multiple: false },
{ name: 'output', alias: 'o', type: String, description: 'File to write all the generated quads to (defaults to the commandline)', typeLabel: '{underline file}' },
{ name: 'no-magic-comments', alias: 'm', type: Boolean, description: 'Disable the effects of magic comments which force lines to be included.' },
{ name: 'api', type: Boolean, description: 'Instead of human-readable output, dump a lot of json with the results of all intermediate steps.' },
]

const featureNameList = [...allFeatureNames].map(s => `"${s}"`).join(', ')
Expand Down
8 changes: 7 additions & 1 deletion src/cli/repl/server/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ import { DataflowGraph } from '../../../dataflow/graph/graph'
import * as tmp from 'tmp'
import fs from 'fs'
import type { RParseRequests } from '../../../r-bridge/retriever'
import { autoSelectLibrary } from '../../../reconstruct/auto-select/auto-select-defaults'
import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments'

/**
* Each connection handles a single client, answering to its requests.
Expand Down Expand Up @@ -216,7 +218,11 @@ export class FlowRServerConnection {
return
}

fileInformation.pipeline.updateRequest({ criterion: request.criterion })
fileInformation.pipeline.updateRequest({
criterion: request.criterion,
autoSelectIf: request.noMagicComments ? autoSelectLibrary : makeMagicCommentHandler(autoSelectLibrary)
})

void fileInformation.pipeline.allRemainingSteps(true).then(results => {
sendMessage<SliceResponseMessage>(this.socket, {
type: 'response-slice',
Expand Down
10 changes: 7 additions & 3 deletions src/cli/repl/server/messages/slice.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@ import type { DEFAULT_DATAFLOW_PIPELINE, DEFAULT_SLICING_PIPELINE } from '../../
* can slice the respective file given the respective criteria.
*/
export interface SliceRequestMessage extends IdMessageBase {
type: 'request-slice',
type: 'request-slice',
/** The {@link FileAnalysisRequestMessage#filetoken} of the file/data to slice */
filetoken: string,
filetoken: string,
/** The slicing criteria to use */
criterion: SlicingCriteria
criterion: SlicingCriteria,
/**
* Should the magic comments (force-including lines within the slice) be ignord?
*/
noMagicComments?: boolean
}

export const requestSliceMessage: MessageDefinition<SliceRequestMessage> = {
Expand Down
28 changes: 18 additions & 10 deletions src/cli/slicer-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,20 @@ import type { SingleSlicingCriterion, SlicingCriteria } from '../slicing/criteri
import type { ReconstructionResult } from '../reconstruct/reconstruct'
import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id'
import { stats2string } from '../benchmark/stats/print'
import { autoSelectLibrary } from '../reconstruct/auto-select/auto-select-defaults'
import { makeMagicCommentHandler } from '../reconstruct/auto-select/magic-comments'

export interface SlicerCliOptions {
verbose: boolean
help: boolean
input: string | undefined
criterion: string | undefined
output: string | undefined
diff: boolean
'input-is-text': boolean
stats: boolean
api: boolean
verbose: boolean
help: boolean
input: string | undefined
criterion: string | undefined
output: string | undefined
diff: boolean
'input-is-text': boolean
stats: boolean
api: boolean
'no-magic-comments': boolean
}


Expand All @@ -41,7 +44,12 @@ async function getSlice() {
guard(options.input !== undefined, 'input must be given')
guard(options.criterion !== undefined, 'a slicing criterion must be given')

await slicer.init(options['input-is-text'] ? { request: 'text', content: options.input } : { request: 'file', content: options.input })
await slicer.init(
options['input-is-text']
? { request: 'text', content: options.input }
: { request: 'file', content: options.input },
options['no-magic-comments'] ? autoSelectLibrary : makeMagicCommentHandler(autoSelectLibrary)
)

let mappedSlices: { criterion: SingleSlicingCriterion, id: NodeId }[] = []
let reconstruct: ReconstructionResult | undefined = undefined
Expand Down
2 changes: 1 addition & 1 deletion src/core/steps/all/static-slicing/10-reconstruct.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import type { IPipelineStep } from '../../pipeline-step'
import { PipelineStepStage } from '../../pipeline-step'
import type { DeepReadonly } from 'ts-essentials'
import type { SliceResult } from '../../../../slicing/static/slicer-types'
import type { AutoSelectPredicate } from '../../../../reconstruct/reconstruct'
import { reconstructToCode } from '../../../../reconstruct/reconstruct'
import type { NormalizedAst } from '../../../../r-bridge/lang-4.x/ast/model/processing/decorate'
import type { AutoSelectPredicate } from '../../../../reconstruct/auto-select/auto-select-defaults'

export interface ReconstructRequiredInput {
autoSelectIf?: AutoSelectPredicate
Expand Down
2 changes: 1 addition & 1 deletion src/core/steps/pipeline/default-pipelines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { STATIC_SLICE } from '../all/static-slicing/00-slice'
import { NAIVE_RECONSTRUCT } from '../all/static-slicing/10-reconstruct'

export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT)
export const DEFAULT_RECONSTRUCT_PIPELINE = DEFAULT_SLICING_PIPELINE
export const DEFAULT_SLICE_AND_RECONSTRUCT_PIPELINE = DEFAULT_SLICING_PIPELINE

export const DEFAULT_DATAFLOW_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW)

Expand Down
10 changes: 5 additions & 5 deletions src/r-bridge/lang-4.x/ast/parser/json/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ export function prepareParsedData(data: string): Entry[] {
// iterate a second time to set parent-child relations (since they may be out of order in the csv)
for(const entry of ret.values()) {
if(entry.parent != RootId) {
const parent = ret.get(entry.parent)
if(parent) {
parent.children ??= []
parent.children.push(entry)
}
/** it turns out that comments may return a negative id pair to their parent */
const parent = ret.get(Math.abs(entry.parent))
guard(parent !== undefined, () => `Could not find parent ${entry.parent} for entry ${entry.id}`)
parent.children ??= []
parent.children.push(entry)
} else {
roots.push(entry)
}
Expand Down
1 change: 0 additions & 1 deletion src/r-bridge/lang-4.x/ast/parser/json/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ export const parseLog = log.getSubLogger({ name: 'ast-parser' })
export function normalize(jsonString: string, getId: IdGenerator<NoInfo> = deterministicCountingIdGenerator(0)): NormalizedAst {
const data: NormalizerData = { currentRange: undefined, currentLexeme: undefined }
const object = convertPreparedParsedData(prepareParsedData(jsonString))

return decorateAst(normalizeRootObjToAst(data, object), getId)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import type { RComment } from '../../../../model/nodes/r-comment'
export function normalizeComment(data: NormalizerData, obj: XmlBasedJson): RComment {
const { location, content } = retrieveMetaStructure(obj)
guard(content.startsWith('#'), 'comment must start with #')

return {
type: RType.Comment,
location,
Expand Down
34 changes: 34 additions & 0 deletions src/reconstruct/auto-select/auto-select-defaults.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import type { NoInfo, RNode } from '../../r-bridge/lang-4.x/ast/model/model'
import type { ParentInformation, NormalizedAst } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'
import { RType } from '../../r-bridge/lang-4.x/ast/model/type'

/**
* The structure of the predicate that should be used to determine
* if a given normalized node should be included in the reconstructed code,
* independent of if it is selected by the slice or not.
*
* @see reconstructToCode
* @see doNotAutoSelect
* @see autoSelectLibrary
*/
export type AutoSelectPredicate = (node: RNode<ParentInformation>, fullAst: NormalizedAst) => boolean

/**
* A variant of the {@link AutoSelectPredicate} which does not select any additional statements (~&gt; false)
*/
export function doNotAutoSelect(_node: RNode): boolean {
return false
}

const libraryFunctionCall = /^(library|require|((require|load|attach)Namespace))$/

/**
* A variant of the {@link AutoSelectPredicate} which does its best
* to select any kind of library import automatically.
*/
export function autoSelectLibrary<Info = NoInfo>(node: RNode<Info>): boolean {
if(node.type !== RType.FunctionCall || !node.named) {
return false
}
return libraryFunctionCall.test(node.functionName.content)
}
96 changes: 96 additions & 0 deletions src/reconstruct/auto-select/magic-comments.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import type { RNode } from '../../r-bridge/lang-4.x/ast/model/model'
import type { RComment } from '../../r-bridge/lang-4.x/ast/model/nodes/r-comment'
import type { NormalizedAst, ParentInformation } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'
import { visitAst } from '../../r-bridge/lang-4.x/ast/model/processing/visitor'
import { RType } from '../../r-bridge/lang-4.x/ast/model/type'
import { guard } from '../../util/assert'
import type { SourceRange } from '../../util/range'
import type { AutoSelectPredicate } from './auto-select-defaults'

function getLoc({ location, info: { fullRange } }: RNode): SourceRange {
const loc = location ?? fullRange
guard(loc !== undefined, 'TODO: support location-less nodes!')
return loc
}

type MagicCommentConsumer = (n: RComment, stack: number[]) => number[] | undefined

const magicCommentIdMapper: Record<string, MagicCommentConsumer> = {
'include_next_line': (n: RComment) => {
return [getLoc(n)[0] + 1]
},
'include_this_line': (n: RComment) => {
return [getLoc(n)[0]]
},
'include_start': (n: RComment, stack: number[]) => {
stack.push(getLoc(n)[0] + 1)
return undefined
},
'include_end': (n: RComment, stack: number[]) => {
const to = getLoc(n)[0]
guard(stack.length >= 1, `mismatched magic start and end at ${to}`)
const from = stack.pop() as number
const ret = new Array<number>(to - from - 1)
for(let i = from; i < to; i++) {
ret[i - from] = i
}
return ret
}
}

const commentTriggerRegex = / flowr@(\w+)/

/**
* This takes an {@link NormalizedAst} and returns an auto-select predicate for {@link reconstructToCode},
* which will automatically include lines marked by these special comments!
* Please make sure to create one per source as it will use it to cache.
*
* We support two formats:
* - Line comments in the form of `# flowr@include_next_line` or `# flowr@include_this_line`.
* - Block comments which start with `# flowr@include_start` and end with `# flowr@include_end`.
* This supports nesting, but they have to appear on a single line.
*
* Please note that these comments have to start exactly with this content to work.
*
* @param and - Predicate to composite this one with, If you do not pass a predicate, you may assume composition with
* {@link doNotAutoSelect}.
*/
export function makeMagicCommentHandler(and?: AutoSelectPredicate): AutoSelectPredicate {
let lines: Set<number> | undefined = undefined
return (node: RNode<ParentInformation>, normalizedAst: NormalizedAst) => {
if(!lines) {
lines = new Set<number>()
const startLineStack: number[] = []
visitAst(normalizedAst.ast, n => {
const comments = n.info.additionalTokens
if(!comments) {
return
}
for(const c of comments) {
if(c.type !== RType.Comment || !c.content.startsWith(' flowr@')) {
continue
}
const match = commentTriggerRegex.exec(c.content)
guard(match !== null, `invalid magic comment: ${c.content}`)
const idMapper = magicCommentIdMapper[match[1]]
guard(idMapper !== undefined, `unknown magic comment: ${match[1]}`)
const ls = idMapper(c, startLineStack)
if(ls !== undefined) {
for(const l of ls) {
(lines as Set<number>).add(l)
}
}
}
})
guard(startLineStack.length === 0, `mismatched magic start and end at end of file (${JSON.stringify(startLineStack)})`)
}
const loc = node.location ?? node.info.fullRange

if(loc && lines.has(loc[0])) {
return true
}
return and?.(node, normalizedAst) ?? false
}
}


Loading

0 comments on commit 28fe74f

Please sign in to comment.