Skip to content

Commit

Permalink
Dealing With Cycles in the Quad-Export (#490)
Browse files Browse the repository at this point in the history
* feat-fix: rudimentary cycle detection for quad generation

* feat-fix: another cycle guard round
  • Loading branch information
EagleoutIce committed Nov 10, 2023
1 parent f738e10 commit 4d3f42c
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions src/util/quads.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ export function serialize2quads(obj: RecordForQuad, config: QuadSerializationCon
guard(isObjectOrArray(obj), 'cannot serialize non-object to rdf!')
guard(!Array.isArray(obj), 'cannot serialize arrays (must wrap in object)!')

store = new Set()
const quads: Quad[] = []
serializeObject(obj, quads, useConfig)
return writer.quadsToString(quads)
Expand Down Expand Up @@ -148,7 +149,7 @@ export function graph2quads<AdditionalVertexInformation extends MergeableRecord,

function processArrayEntries(key: string, value: unknown[], obj: DataForQuad, quads: Quad[], config: Required<QuadSerializationConfiguration>) {
for(const [index, element] of value.entries()) {
if(isObjectOrArray(element)) {
if(element !== null && element !== undefined && isObjectOrArray(element)) {
const context= retrieveContext(config.context, obj)
quads.push(quad(
namedNode(domain + config.getId(obj, context)),
Expand Down Expand Up @@ -200,6 +201,9 @@ function processObjectEntry(key: string, value: unknown, obj: DataForQuad, quads
if(config.ignore(key, value)) {
return
}
if(guardCycle(value)) {
return
}
if(isObjectOrArray(value)) {
if(Array.isArray(value)) {
processArrayEntries(key, value, obj, quads, config)
Expand All @@ -211,8 +215,24 @@ function processObjectEntry(key: string, value: unknown, obj: DataForQuad, quads
}
}

let store = new Set()

function guardCycle(obj: unknown) {
// @ts-expect-error we do not care about the type here
if(isObjectOrArray(obj) && 'id' in obj) {
if(store.has(obj.id)) {
return true
}
store.add(obj.id)
}
return false
}

function serializeObject(obj: DataForQuad | undefined | null, quads: Quad[], config: Required<QuadSerializationConfiguration>): void {
if(obj === undefined || obj === null) {
if(obj === null || obj === undefined) {
return
}
if(guardCycle(obj)) {
return
}
if(obj instanceof Map) {
Expand Down

2 comments on commit 4d3f42c

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 4d3f42c Previous: 266b087 Ratio
Total per-file 5452.034364727273 ms (7648.052447720122) 3179.838332181818 ms (3672.286438980555) 1.71
Retrieve AST from R code 131.11573036363637 ms (274.7388620975581) 67.57201345454546 ms (135.00742353958452) 1.94
Normalize R AST 168.6592694090909 ms (280.7599881126948) 92.62022559090909 ms (152.11002159362633) 1.82
Produce dataflow information 120.39669122727274 ms (303.48300690880615) 65.08133054545455 ms (166.00596407178642) 1.85
Total per-slice 3.331009226431365 ms (2.6218332648516167) 1.8579071164832888 ms (1.2960021301432842) 1.79
Static slicing 2.4383710870331305 ms (2.4442887947012215) 1.3913231260599883 ms (1.2140789790282374) 1.75
Reconstruct code 0.8609467545664561 ms (0.6132870903161539) 0.44898673756392055 ms (0.21332097434246836) 1.92
failed to reconstruct/re-parse 0 # 0 # NaN
times hit threshold 0 # 0 # NaN
reduction (characters) 0.7329390759026896 # 0.7329390759026896 # 1
reduction (normalized tokens) 0.720988345209971 # 0.720988345209971 # 1

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 4d3f42c Previous: 266b087 Ratio
Total per-file 8153.48934922 ms (9797.388236416144) 6628.01911806 ms (7920.64451462989) 1.23
Retrieve AST from R code 127.83018528 ms (116.50122001837688) 99.94474708 ms (91.9298826769113) 1.28
Normalize R AST 181.39552738 ms (121.5366507193692) 145.95498125999998 ms (97.05366533791602) 1.24
Produce dataflow information 269.1350243 ms (468.0595270612567) 202.67506718 ms (350.35799969129454) 1.33
Total per-slice 13.721631894704784 ms (23.7674140618194) 11.209356158365976 ms (19.217066969533313) 1.22
Static slicing 12.868697870416845 ms (23.6332104901076) 10.588980339286781 ms (19.12716492275005) 1.22
Reconstruct code 0.8361486913306615 ms (0.5091525709406034) 0.6050062891914059 ms (0.3596537358962295) 1.38
failed to reconstruct/re-parse 9 # 9 # 1
times hit threshold 967 # 967 # 1
reduction (characters) 0.8987761232201357 # 0.8987761232201357 # 1
reduction (normalized tokens) 0.8582032343145828 # 0.8582032343145828 # 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.