From 17ef3fcdf27b6b359be4c17b6d94a7c66336eec8 Mon Sep 17 00:00:00 2001 From: xiyang Date: Tue, 28 Nov 2023 17:39:36 +0800 Subject: [PATCH] Add nquad file type, add scan multiple rdf files --- dataset/rdf/spb1k/copy.cypher | 4 + .../spb1k/generatedCreativeWorks-000001.nq | 294 ++++++++++++++++ .../spb1k/generatedCreativeWorks-000002.nq | 191 +++++++++++ .../spb1k/generatedCreativeWorks-000003.nq | 296 +++++++++++++++++ .../spb1k/generatedCreativeWorks-000004.nq | 314 ++++++++++++++++++ dataset/rdf/spb1k/schema.cypher | 1 + src/binder/bind/bind_copy.cpp | 14 +- src/binder/binder.cpp | 9 +- src/common/copier_config/reader_config.cpp | 6 + .../common/copier_config/reader_config.h | 9 +- .../persistent/reader/csv/serial_csv_reader.h | 2 +- .../persistent/reader/rdf/rdf_reader.h | 41 ++- src/planner/plan/plan_copy.cpp | 3 +- src/processor/map/map_copy_from.cpp | 7 +- .../persistent/reader/rdf/rdf_reader.cpp | 117 ++++--- test/graph_test/base_graph_test.cpp | 3 +- test/test_files/rdf/spb1k.test | 16 + 17 files changed, 1257 insertions(+), 70 deletions(-) create mode 100644 dataset/rdf/spb1k/copy.cypher create mode 100644 dataset/rdf/spb1k/generatedCreativeWorks-000001.nq create mode 100644 dataset/rdf/spb1k/generatedCreativeWorks-000002.nq create mode 100644 dataset/rdf/spb1k/generatedCreativeWorks-000003.nq create mode 100644 dataset/rdf/spb1k/generatedCreativeWorks-000004.nq create mode 100644 dataset/rdf/spb1k/schema.cypher create mode 100644 test/test_files/rdf/spb1k.test diff --git a/dataset/rdf/spb1k/copy.cypher b/dataset/rdf/spb1k/copy.cypher new file mode 100644 index 00000000000..d305d1b2d4e --- /dev/null +++ b/dataset/rdf/spb1k/copy.cypher @@ -0,0 +1,4 @@ +COPY spb_resource_t FROM "dataset/rdf/spb1k/*.nq"; +COPY spb_literal_t FROM glob("dataset/rdf/spb1k/*.nq"); +COPY spb_resource_triples_t FROM glob("dataset/rdf/spb1k/*.nq"); +COPY spb_literal_triples_t FROM glob("dataset/rdf/spb1k/*.nq"); diff --git a/dataset/rdf/spb1k/generatedCreativeWorks-000001.nq b/dataset/rdf/spb1k/generatedCreativeWorks-000001.nq new file mode 100644 index 00000000000..29ffb5fdcc5 --- /dev/null +++ b/dataset/rdf/spb1k/generatedCreativeWorks-000001.nq @@ -0,0 +1,294 @@ + . + "Guillermo Lohmann Villena heckled unfairer sauce settee abuts nervy pathogen stowaways curtseyed payloads." . + "replaceable derived collectible avails hereto other previewers reviewers subplot oinks." . + . + "pun commending indirect handbags putt unimaginable comedic stinks showcased ballerina topsides bloods jujubes." . + . + . + . + . + . + . + . + . + . + "true"^^ . + . + "2011-02-10T09:58:41.784+08:00"^^ . + "2011-09-29T18:18:54.872+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/3#id" . + . + . + . + . + . + . + . + "Caltex Woolworths presidency wit appalled foolhardiness definitive towns ideologies breeze anguishes radiogram." . + "ticketing kiddie squeaked spastics outraged howls condominium barrenness synchs carrier." . + . + "lasciviousness exploited indorsing hesitancy pagan cells masking jimmying penury dad hallucination ventral refurnishing mars whispers wallowed eradication cheesiest." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-11-28T17:11:27.281+08:00"^^ . + "2011-12-03T14:40:57.575+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/8#id" . + . + . + . + . + . + . + . + . + . + "Jesse Bartley Milam regale sniffing perceptiveness redressing humorist snip denaturing unsubstantial socked underpaid." . + "ah impatiences displeasure's thins attraction part restaurants glands phenotype vamped." . + . + "badinage's tilting boastfulness resigning islets source purblind startles curvacious encryption bimbos malleable impale cardinal amazes dominants wainscotted friendliness narrowly thick sheep." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + "2011-05-19T03:01:44.536+08:00"^^ . + "2011-06-07T11:43:25.640+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/14#id" . + . + . + . + . + . + "Syl Apps turtleneck disbarred cactus plague transposes serf unavoidable passing pulsars chippers." . + "disbanded guardhouse copilots burlesqued sociopaths magnanimous graft swanked voodooed inflammation." . + . + "olive's typhus valedictory vasectomy dizzies rabbinate slugging classifiable tangential lamps rant inaccuracies." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-06-22T08:27:32.751+08:00"^^ . + "2011-09-25T23:30:59.601+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/17#id" . + . + . + . + . + . + . + . + . + . + "Ikram Akhtar macerates mazurka dryest stencilling demolition amperage premiss's ribs sparest emoluments." . + "reclassify jeopardise magazine outgoes costars amnesiacs culminates eavesdropping orients compatibly." . + . + "cocooning ovaries gambler vacancies manipulate butte hewers broiler starlight lions whatever memoranda condemns cochlea disconnect arbors enmity pouring." . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-02-27T11:31:11.624+08:00"^^ . + "2011-11-27T02:59:59.775+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/20#id" . + . + . + . + . + . + . + . + . + . + "Pam Peters subtracts rebelling persecutor nighties eyeballs mattering calmest charmers melodic whittled." . + "lodgers defensive quarterdeck microscopic bottling unearthing forefronts deceivers tarry commodore." . + . + "sowers avails scants thereupon voluminously intended medically softie handymen scads contributor ruggedest lamp handfuls overtake." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-10-26T01:53:42.403+08:00"^^ . + "2012-02-20T09:29:47.314+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/25#id" . + . + . + . + . + . + "Tomás Valdemar Hintnaus furrowed incapability rattlesnakes shinier books canvassing nosey wiliness slakes spoilsports." . + "maharajas blacklisting saps calumniated bearable offend précis spontaneously pusses tilt." . + . + "eucalypti choked vats roams filming overdue gushier coward snaffled incise downgrades stupefied." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-04-21T05:42:10.554+08:00"^^ . + "2012-02-19T22:33:07.738+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/30#id" . + . + . + . + "Life Line Screening nicks displeased pissed allowing taster plants crudities balalaikas undercharge worry." . + "incense destination creaks persuasion escarole marauders browbeating lifesaver cockade continuing." . + . + "memberships rodents womanhood commingling empties exotics micron captious steamrolling wrench kickback dispatching reanimating disagreement bordering untying hokum toddlers." . + . + . + . + . + . + . + . + . + . + "true"^^ . + . + "2011-11-10T11:18:33.514+08:00"^^ . + "2011-11-20T05:33:46.343+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/32#id" . + . + . + . + . + . + . + . + "Jimi Simmons armies misdeals cooperative menagerie nonrepresentational contravened hooray revealings distantly opus." . + "keynoting kilobytes proffers undervalued italic levered volume's suppresses genetics peopled." . + . + "healthier lubricated fortune glorify scoreboard rewards teeming cheep needier arenas dissimulating squirmiest savvier sympathiser meld signifies virginal bats etymologists reptilian accolade lucratively." . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-02-17T17:58:19.290+08:00"^^ . + "2011-12-17T05:38:38.130+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/34#id" . + . + . + . + . + . + . + . + . + . + "Vita e pensiero (publishing) avowals informative relaid telemarketing manses disgusted standard blindness pigment bailiff." . + "joyousness shabbily employer unhealthful shamefulness dawdling invade foolishness groundswell stopcocks." . + . + "fleeces ungentlemanly deadlines ostracise whiten pedestal airing mistimes shrouding waywardly disappointment lactose kibitzes chainsaw suppositions misappropriated." . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + "true"^^ . + . + "2011-08-30T22:48:07.906+08:00"^^ . + "2012-01-19T04:58:06.193+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/37#id" . + . + . + . + "Brendan Mundorf deadened intentness frothier assertiveness absolves healthfully geez gorillas manifests announcing." . + "on woolgathering cesarean massing scrubbier boys decorum abated bowlegged heckled." . + . + "ogles muddies optimiser hypnotising enthralled newsboy shiftiness prairies reservation minting fireplace snoopiest straggled chunkier markers arenas expeditiously." . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-09-19T04:24:58.974+08:00"^^ . + "2012-01-21T08:10:08.987+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/42#id" . + . + . + . + . + . + . + . + . diff --git a/dataset/rdf/spb1k/generatedCreativeWorks-000002.nq b/dataset/rdf/spb1k/generatedCreativeWorks-000002.nq new file mode 100644 index 00000000000..8a91b1745f2 --- /dev/null +++ b/dataset/rdf/spb1k/generatedCreativeWorks-000002.nq @@ -0,0 +1,191 @@ + . + "Jeffrey Kofman freethinkers performs blarneying shadowy assuaged fête signatories abrasion alkaloid paper." . + "animation weatherises handy foreordains bookworms foregrounding notified manes diverted doted." . + . + "lunacies resultant extincts resources fastening typewrote immorally violation blubber gems giving spreadsheets vectoring fellows." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-04-17T16:17:03.747+08:00"^^ . + "2011-05-26T17:35:51.517+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/1#id" . + . + . + . + . + . + "Lasse Stromstedt wither document pellet gaggles elaborateness curvacious taffy veranda baffles venom." . + "usurping boron usurpation lankiness baptising hermetically assume supplier brayed knocks." . + . + "reiterating tatted platen crap owners ritually reprisals secedes colleague independently perfected misapplied skateboard dance checkmating regarded gouges infiltration disappointment olive's outrages sunscreens episcopacy." . + . + . + . + . + . + . + . + . + . + "true"^^ . + . + "2011-12-26T16:16:54.536+08:00"^^ . + "2012-03-10T01:59:57.720+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/9#id" . + . + . + . + . + . + . + . + "Edgewood Management maddeningly amorously dandies contrails scoffs woodchucks qualities turds reclusive pressures." . + "brightness pothook oligarchies zealous secretaries walkouts orifices bravuras superintends panoramic." . + . + "crib localisation publicising swivelled succumbs edgy treasures dine slowdowns prefabbing appendices contiguous." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-09-22T19:50:00.005+08:00"^^ . + "2012-01-12T16:27:02.952+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/13#id" . + . + . + . + . + . + "Seiko Instruments roughhousing authoritarians dimples officiously procrastinating contents meals fragmentation antifreeze deepest." . + "obediently respired sundials cages vacillate stoles hallucination clapper committees belabors." . + . + "razing exonerate phonetics commending staph kroner recoiling quiz screwier hiker miscalculates sluice boulders ameba substituted brews." . + . + . + . + . + . + . + "false"^^ . + . + "2011-12-24T08:03:40.986+08:00"^^ . + "2012-02-10T18:07:26.357+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/19#id" . + . + . + . + "Edward R. Dewey immensities spotlights bold anterior excreted tramples standard scattered coal hunch." . + "scuffed dissing alterations landed blared hafnium circumventing birettas actuates sidewalk." . + . + "terminology probe roseate palpitated glittery erection vasts alliance communed caftan doorway overexposes demonstrable orphanages exonerate imprudent vegetating avaricious tabulation engulfed." . + . + . + . + "false"^^ . + . + . + "2011-04-01T01:04:11.062+08:00"^^ . + "2011-08-31T15:34:27.151+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/26#id" . + . + . + . + "Roger Bolton unrecognisable miscounts mitre fathering annual albums concern midwinter televises blushes." . + "ginkgo those closures yeshivot monoxide homey commendable mediated matter schist." . + . + "reluctantly huckleberry fearless whens explicable refrained bootblacks plod magnum's courtrooms entirely chuckhole solenoids plaits windshields hated." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-08-17T00:09:57.462+08:00"^^ . + "2011-09-06T07:29:33.633+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/29#id" . + . + . + . + . + . + . + . + "Orange Games decides tapered justification shipping libels unhealthful attrition fixation everyplace creepy." . + "napkins blasts corrector bereaving neurotically typify mortgager's brooms hared cheated." . + . + "dressiest hoggish slurred antiquates splinted hoodwinking license auxiliary hackney fillets chewy acidity sponges sinful." . + . + . + . + "false"^^ . + . + . + "2011-10-10T22:30:35.137+08:00"^^ . + "2011-11-28T15:09:57.815+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/36#id" . + . + . + . + . + . + . + . + . + . + "Raymond Massey nonaligned improvement rapidest antagonism recovers showery joggle prioresses irradiates mulishly." . + "identically pacts transferred archaism intern defeatism bumper's penlites whirred brutality." . + . + "barricades jetted triads shunned substance cohabiting enrolment misunderstands untold." . + . + . + . + . + . + . + "false"^^ . + . + "2011-05-10T18:51:04.650+08:00"^^ . + "2011-09-02T12:03:47.993+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/39#id" . + . + . + . + . + . + . diff --git a/dataset/rdf/spb1k/generatedCreativeWorks-000003.nq b/dataset/rdf/spb1k/generatedCreativeWorks-000003.nq new file mode 100644 index 00000000000..001cebf8f1e --- /dev/null +++ b/dataset/rdf/spb1k/generatedCreativeWorks-000003.nq @@ -0,0 +1,296 @@ + . + "James Burke arbitration pun relearn pasturing minima wenches cheapskates enhance oversimplifying returnables." . + "including unscrambles zithers punctuation inventoried allergies steppingstones subjectivity semiprivate revise." . + . + "computer scrubby headings paddled protesters guardsmen sleaze slowest occluded earmark sailboats volume's." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-05-21T02:35:34.416+08:00"^^ . + "2011-07-10T19:44:02.935+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/4#id" . + . + . + . + . + . + . + . + . + . + "Khowar Academy receptiveness dressed erectly galvanised sandbagging adulterated mismanages musk albums sinuous." . + "borers twitch advertise reproducing shipping undeceiving gnarliest listlessness weevils trafficking." . + . + "hypersensitivities worshipful quiches warranties benchmarks rectify nailing restaurants sanatoria shortness nuptials." . + . + . + . + . + . + . + "true"^^ . + . + "2011-04-26T18:22:29.287+08:00"^^ . + "2012-03-14T22:42:42.922+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/6#id" . + . + . + . + . + . + . + . + "Fritz Spindler panhandles lasts staking dedicate groundless monarch convulses sidekicks insurmountable depressant." . + "rapidest fewest bagatelles hexes ouster snobs copies bossily conservatory beacon." . + . + "wart dizzies ugly skedaddle corn tobacconists maxing sellouts parenthetical asked geez unacquainted downright shimmy abbreviation expense epileptic thickets." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-10-02T10:45:46.938+08:00"^^ . + "2012-01-03T21:42:59.727+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/11#id" . + . + . + . + . + . + . + . + . + . + "Spirit Airlines buys begetting couples patronised tuber foils idled proclivities preaches gabby." . + "hoagie bloods curviest hallucinogens absenteeism spacewalk sublet griped postcode arched." . + . + "swanked idolising fates lighthouse modified bogeys face inoculating perforates somnolent intangible smokers debtors utilitarian gouty unspoilt body emplacement enlarges." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-03-28T00:04:56.432+08:00"^^ . + "2011-09-08T04:31:45.143+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/16#id" . + . + . + . + . + . + . + . + "Mems-ID melodramas unhooks sculptors carillons mopping bind collectible ruffle lifesaving adiabatic." . + "showed impatiences harpoon reservoir imposingly vend rims depressed congealed hurdled." . + . + "arteriosclerosis brawlers outnumbers misinterpreting tizzy indecipherable cinematographers hoisted furthers chows precipitants instructors trimarans tarragon stammered reconditioning." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-03-01T03:13:47.304+08:00"^^ . + "2011-05-04T08:25:34.129+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/18#id" . + . + . + . + . + . + . + . + . + . + "Salvador Fidalgo vestige squares oxbows topless floury mulch eighths callisthenics rubbishes canonise." . + "unintentionally fever progressed bittern illegitimate clacking reelect friskiest apparatuses restorers." . + . + "quinces purification joggers overindulging willowy meridian rungs redistricted sheared allocating snitches breeze." . + . + . + . + . + . + . + "false"^^ . + . + "2011-08-19T06:27:14.969+08:00"^^ . + "2012-01-06T03:58:22.309+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/21#id" . + . + . + . + . + . + "Yachting Pages expurgations paradigms harkened exporter superficiality spread militant pancreas halves radiologists." . + "epitomised awkwardness polarising saboteur scuba change incubation officialdom downier livelier." . + . + "bitch toilers corkscrewed imploding abjectly enliven wallops chirps roustabout." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-02-02T02:31:54.295+08:00"^^ . + "2011-09-30T15:12:15.948+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/23#id" . + . + . + . + . + . + . + . + "Brown Aeronautical Company maritime trafficking necklace ragas banyan scapegoats entities captivates breezes cemented." . + "aesthete animus proves cogitating dwarfs wowed neighboured tonne republic châtelaine." . + . + "alright furnishings excels anthracite splashiest shuckses clans kidnaper visioned libels prod inspect rummaged neighbourhoods anaesthetised ribs." . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-02-02T15:13:20.343+08:00"^^ . + "2011-09-15T06:28:04.980+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/28#id" . + . + . + . + "Johannes Agricola kiss archly joshes vacuum thistle divots crack condiment rotundas investigate." . + "diagnose enthrone reservedly breaststroke wadded sedater successor ancestress undertook tyroes." . + . + "duped darns unscrupulous meant annihilated topology sunfishes savages unclasps soars vase wispier." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + "2011-01-10T12:15:43.203+08:00"^^ . + "2011-02-12T23:54:01.672+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/31#id" . + . + . + . + . + . + "Mike Senese partizans terms accusatives bacteriologists desensitisation sensation facetious borsch kookiness engulfed." . + "electrocute lonelier amalgamations overbooked postnatal insurers palimony wayfarer thirdly investment." . + . + "methought hip binaries wheedled wedges patent fricasseed hallucinatory rapper toffees shysters limousines tarot acclimate volunteers saviour camerawoman fugitives embittered hailstones tubing embezzles." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-05-27T07:00:10.527+08:00"^^ . + "2012-01-25T10:55:03.441+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/35#id" . + . + . + . + . + . + "Kim Wells racket teaks distillations phloem gores suffered particularity debonair probable deliberately." . + "uncleanest pushover oversimplifying abasement moonscapes rubberising crannies disturbed intermezzos incise." . + . + "motivation emancipated combatants distantly blissful equivocated priestesses misquoted mulishly ligature imminent." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-05-29T08:40:25.936+08:00"^^ . + "2011-11-01T23:35:17.662+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/40#id" . + . + . + . + . + . + . diff --git a/dataset/rdf/spb1k/generatedCreativeWorks-000004.nq b/dataset/rdf/spb1k/generatedCreativeWorks-000004.nq new file mode 100644 index 00000000000..c4cf917f0c6 --- /dev/null +++ b/dataset/rdf/spb1k/generatedCreativeWorks-000004.nq @@ -0,0 +1,314 @@ + . + "Erv Dusak interstates insidious lodes winsome seeming diplomat colorless drably ammunition mediated." . + "tragedy deficiency knocks shaykhs wailing blond crematoriums chlorinate firebug gated." . + . + "evidences rough tripod emblazons meetinghouses inextinguishable expressionless trudging regionalisms crossbones." . + . + . + . + "false"^^ . + . + "2011-11-02T13:37:06.641+08:00"^^ . + "2012-08-07T04:51:56.865+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/2#id" . + . + . + . + . + . + "Johann Bernhard Von Rechberg Und Rothenlowen phonier counselings brimstone prefacing hoopla concessionaire bleeping encoring nimbi extinct." . + "glutting expressionist coverts toddy upped crouch fenced promenading pillories cappuccino." . + . + "allotted funereal expended pesticide deckhand appoints inters dysfunctions insurance yoked catalogued trombones retypes tromping conquer tweets filaments swish's parlaying ninepin clitorises bills earnests." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-08-02T19:40:54.627+08:00"^^ . + "2011-10-01T11:08:40.652+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/5#id" . + . + . + . + . + . + . + . + "Avantgarde Music amazes allergists shadowbox equinoxes automobile chortle mothball crutches sparseness snuggest." . + "eons milquetoast embezzled sheers shrubbiest mists writhing irradiates eroded lush." . + . + "inflammatory transitioned hub douche materialistic chilli daughters waterfowls alphas avenging cornflowers grizzled practices mu vanes descending shambles pompoms apostates stare overpopulation cosponsored." . + . + . + . + . + . + . + "false"^^ . + . + "2011-10-28T14:37:40.581+08:00"^^ . + "2012-11-28T18:04:42.933+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/7#id" . + . + . + . + "Aldo Dolcetti salutary depot eradication policy exalting twaddled traversed synchronise insurrectionist fiddly." . + "poppas matzo instructively hydrotherapy utilisation tow stratified uncultivated modernity catchy." . + . + "basted whatchamacallit ninepin expounding creepy sunder warmly keeling." . + . + . + . + . + . + . + . + . + . + . + . + . + "true"^^ . + . + "2011-07-31T23:25:09.566+08:00"^^ . + "2011-09-12T20:49:40.544+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/10#id" . + . + . + . + . + . + . + . + . + . + "Barbara Cupisti doting intimation politeness stamp pulchritude unfeasible retreats dance minima gingerly." . + "relational airbrushes rooms orchid enfolding presumably undemanding weakly vowel microfiches." . + . + "committed nonempty chartered stringers lineups circumstances rearward inquiringly petting." . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + "true"^^ . + . + "2011-02-07T18:18:39.482+08:00"^^ . + "2011-11-05T14:18:04.661+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/12#id" . + . + . + . + . + . + "Coles Online helpful uphills ripped monarch improvable photojournalism balsa seaboard environs crags." . + "clamor thickets trellises tasks moodily expeditiously reconciled heritages fidgeting integuments." . + . + "jiggle raunchiest heated mooches integrals vitalises fresco dreamed anorexia obscurity straightness hotbeds mortar continuing sidewalk doodle auburn chancels executable fluorescing swashbuckling mystique vanes refers exactly." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-06-19T18:56:21.499+08:00"^^ . + "2011-10-06T07:40:21.143+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/15#id" . + . + . + . + "James Collier encountered besotted lancet pronounce retrofitted suggestions tangibles pacifiers sunroof exigencies." . + "chatterbox squaws hostess gouger adds boarding pew functional hedonists demolition." . + . + "blissful parting butterier marquise utilitarian sirens deathblow seismologist intrusts defied memoranda crookeder." . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + . + "true"^^ . + . + "2011-09-23T00:12:50.791+08:00"^^ . + "2012-07-23T18:06:42.882+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/22#id" . + . + . + . + . + . + . + . + "Frances Cleveland Axtell unselfishly videotaping soap organisms limousines islands resurgences conspiracy k keys." . + "stratospheres radio shuttlecocking baptising submissive freedom campsite radioisotopes auxiliary subscript." . + . + "copeck sweetener menses blithe dis forestry crease confining rectify travestied corrupts burble clumsy outgrowths jettisoning foreshadowing planets admiring complacent parries allow bumps." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-11-21T07:33:45.677+08:00"^^ . + "2012-11-17T05:14:23.297+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/24#id" . + . + . + . + . + . + . + . + . + . + "Ian Eskelin oversexed helmsman horizons debriefings cloudy transitted thickeners disenfranchising westward moodily." . + "motherland bleakest cession bookstores hovels entertained include bulb fountainheads peerage." . + . + "rewording shelving fault usually repatriates lube ambidextrously droll regalia pointillism feverish vibrations paragraphing chickweed stodgiest nonconformists coherent finest excoriations knitter." . + . + . + . + . + . + . + "true"^^ . + . + "2011-06-13T11:02:39.528+08:00"^^ . + "2011-07-05T12:11:57.080+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/27#id" . + . + . + . + . + . + "Robert Oliver Cunningham uncorrelated overtures unconvincing trademarks chars godliness soulfully pushy ransack monotheism." . + "invalidate vamped specification blunderbuss smudged madrases paying publicise fairs messes." . + . + "outmanoeuvres foxtrotting nervelessly darken pronto manias spiffier deserter tweets cactus." . + . + . + . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + . + "2011-05-05T19:32:09.521+08:00"^^ . + "2012-03-28T03:35:19.876+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/33#id" . + . + . + . + . + . + . + . + "Federal Records swashes carotid retriever pigeonhole's worrywarts savants quills morose flashed starter's." . + "stutterer eardrums articulately buzzers clearinghouses pedagog centipede permafrost quirk's abash." . + . + "bossily autographed dishonouring aggrieve blameworthy gurgle dressiest sours dogtrotting conglomeration altimeters exhaustive upped defrosted specced padded pled shoguns gratifies cloth landlady schmuck fir backslapper." . + . + . + . + "false"^^ . + . + . + "2011-02-23T23:51:11.836+08:00"^^ . + "2011-08-06T10:43:43.186+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/38#id" . + . + . + . + "Grady Higginbotham atrocious heaping insoles appositives figured omitting cloudiest coddling impotent menfolk." . + "apathetic becalm temperaments knackwurst abolition soulless acetone incriminatory citric elicits." . + . + "disadvantaging hashish trimmings posthumous groundwork diagram awe flit hermits ladder primp parsnip grant ooze bowmen sheltered indisputable doting windshields bludgeons." . + . + . + . + . + . + . + . + . + . + "false"^^ . + . + "2011-11-28T19:51:49.102+08:00"^^ . + "2012-10-04T15:14:55.328+08:00"^^ . + . + "thumbnail atlText for CW http://www.bbc.co.uk/context/41#id" . + . + . + . + . + . + . + . + . diff --git a/dataset/rdf/spb1k/schema.cypher b/dataset/rdf/spb1k/schema.cypher new file mode 100644 index 00000000000..f5adb0b4bf7 --- /dev/null +++ b/dataset/rdf/spb1k/schema.cypher @@ -0,0 +1 @@ +CREATE RDF GRAPH spb; diff --git a/src/binder/bind/bind_copy.cpp b/src/binder/bind/bind_copy.cpp index 68a67799d6f..88b44fcffd8 100644 --- a/src/binder/bind/bind_copy.cpp +++ b/src/binder/bind/bind_copy.cpp @@ -88,15 +88,21 @@ std::unique_ptr Binder::bindCopyFromClause(const Statement& stat } switch (tableSchema->tableType) { case TableType::NODE: - if (readerConfig->fileType == FileType::TURTLE) { + switch (readerConfig->fileType) { + case FileType::TURTLE: + case FileType::NQUADS: { return bindCopyRdfNodeFrom(statement, std::move(readerConfig), tableSchema); - } else { + } + default: return bindCopyNodeFrom(statement, std::move(readerConfig), tableSchema); } case TableType::REL: { - if (readerConfig->fileType == FileType::TURTLE) { + switch (readerConfig->fileType) { + case FileType::TURTLE: + case FileType::NQUADS: { return bindCopyRdfRelFrom(statement, std::move(readerConfig), tableSchema); - } else { + } + default: return bindCopyRelFrom(statement, std::move(readerConfig), tableSchema); } } diff --git a/src/binder/binder.cpp b/src/binder/binder.cpp index b2f89b37cd7..4764306f2f5 100644 --- a/src/binder/binder.cpp +++ b/src/binder/binder.cpp @@ -217,19 +217,20 @@ function::TableFunction* Binder::getScanFunction(FileType fileType, const Reader inputTypes.push_back(&stringType); auto functions = catalog.getBuiltInFunctions(); switch (fileType) { - case common::FileType::PARQUET: { + case FileType::PARQUET: { func = functions->matchScalarFunction(READ_PARQUET_FUNC_NAME, inputTypes); } break; - case common::FileType::NPY: { + case FileType::NPY: { func = functions->matchScalarFunction(READ_NPY_FUNC_NAME, inputTypes); } break; - case common::FileType::CSV: { + case FileType::CSV: { auto csvConfig = reinterpret_cast(config.extraConfig.get()); func = functions->matchScalarFunction( csvConfig->parallel ? READ_CSV_PARALLEL_FUNC_NAME : READ_CSV_SERIAL_FUNC_NAME, inputTypes); } break; - case common::FileType::TURTLE: { + case FileType::NQUADS: + case FileType::TURTLE: { func = functions->matchScalarFunction(READ_RDF_FUNC_NAME, inputTypes); } break; default: diff --git a/src/common/copier_config/reader_config.cpp b/src/common/copier_config/reader_config.cpp index 9d033c1565f..2c38c01df00 100644 --- a/src/common/copier_config/reader_config.cpp +++ b/src/common/copier_config/reader_config.cpp @@ -19,6 +19,9 @@ FileType FileTypeUtils::getFileTypeFromExtension(std::string_view extension) { if (extension == ".ttl") { return FileType::TURTLE; } + if (extension == ".nq") { + return FileType::NQUADS; + } throw CopyException(std::string("Unsupported file type ").append(extension)); } @@ -39,6 +42,9 @@ std::string FileTypeUtils::toString(FileType fileType) { case FileType::TURTLE: { return "TURTLE"; } + case FileType::NQUADS: { + return "NQUADS"; + } default: { KU_UNREACHABLE; } diff --git a/src/include/common/copier_config/reader_config.h b/src/include/common/copier_config/reader_config.h index cc9e69d13d2..6ca49222f2d 100644 --- a/src/include/common/copier_config/reader_config.h +++ b/src/include/common/copier_config/reader_config.h @@ -7,7 +7,14 @@ namespace kuzu { namespace common { -enum class FileType : uint8_t { UNKNOWN = 0, CSV = 1, PARQUET = 2, NPY = 3, TURTLE = 4 }; +enum class FileType : uint8_t { + UNKNOWN = 0, + CSV = 1, + PARQUET = 2, + NPY = 3, + TURTLE = 4, // Terse triples http://www.w3.org/TR/turtle + NQUADS = 5 // Line-based quads http://www.w3.org/TR/n-quads/ +}; struct FileTypeUtils { static FileType getFileTypeFromExtension(std::string_view extension); diff --git a/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h b/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h index 70daa9fc437..dd8a2dac266 100644 --- a/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h +++ b/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h @@ -24,7 +24,7 @@ class SerialCSVReader final : public BaseCSVReader { }; struct SerialCSVScanSharedState final : public function::ScanSharedState { - explicit SerialCSVScanSharedState( + SerialCSVScanSharedState( common::ReaderConfig readerConfig, uint64_t numRows, uint64_t numColumns) : ScanSharedState{std::move(readerConfig), numRows}, numColumns{numColumns} { initReader(); diff --git a/src/include/processor/operator/persistent/reader/rdf/rdf_reader.h b/src/include/processor/operator/persistent/reader/rdf/rdf_reader.h index 77cc5084d27..9ec0771cdbf 100644 --- a/src/include/processor/operator/persistent/reader/rdf/rdf_reader.h +++ b/src/include/processor/operator/persistent/reader/rdf/rdf_reader.h @@ -6,39 +6,49 @@ #include "function/table_functions.h" #include "function/table_functions/bind_data.h" #include "function/table_functions/bind_input.h" +#include "function/table_functions/scan_functions.h" #include "serd.h" namespace kuzu { namespace processor { -class RDFReader { +class RdfReader { public: - RDFReader(std::string filePath, const common::RdfReaderConfig& config); + RdfReader( + std::string filePath, common::FileType fileType, const common::RdfReaderConfig& config) + : filePath{std::move(filePath)}, fileType{fileType}, mode{config.mode}, index{config.index}, + rowOffset{0}, vectorSize{0}, sVector{nullptr}, pVector{nullptr}, oVector{nullptr}, + status{SERD_SUCCESS} {} - ~RDFReader(); + ~RdfReader(); + + inline void initReader() { initReader(prefixHandle, statementHandle); } + inline void initCountReader() { initReader(nullptr, countStatementHandle); } common::offset_t read(common::DataChunk* dataChunkToRead); common::offset_t countLine(); private: static SerdStatus errorHandle(void* handle, const SerdError* error); - static SerdStatus readerStatementSink(void* handle, SerdStatementFlags flags, - const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, - const SerdNode* object, const SerdNode* object_datatype, const SerdNode* object_lang); - static SerdStatus prefixSink(void* handle, const SerdNode* name, const SerdNode* uri); + static SerdStatus statementHandle(void* handle, SerdStatementFlags flags, const SerdNode* graph, + const SerdNode* subject, const SerdNode* predicate, const SerdNode* object, + const SerdNode* object_datatype, const SerdNode* object_lang); + static SerdStatus prefixHandle(void* handle, const SerdNode* name, const SerdNode* uri); - static SerdStatus counterStatementSink(void* handle, SerdStatementFlags flags, + static SerdStatus countStatementHandle(void* handle, SerdStatementFlags flags, const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, const SerdNode* object, const SerdNode* object_datatype, const SerdNode* object_lang); + void initReader(const SerdPrefixSink prefixSink_, const SerdStatementSink statementSink_); + private: const std::string filePath; + common::FileType fileType; common::RdfReaderMode mode; storage::PrimaryKeyIndex* index; FILE* fp; SerdReader* reader; - SerdReader* counter; // TODO(Xiyang): use prefix to expand CURIE. const char* currentPrefix; @@ -51,8 +61,17 @@ class RDFReader { common::ValueVector* oVector; // object }; -struct RdfScanLocalState final : public function::TableFuncLocalState { - std::unique_ptr reader; +struct RdfScanSharedState final : public function::ScanSharedState { + std::unique_ptr reader; + + RdfScanSharedState(common::ReaderConfig readerConfig, uint64_t numRows) + : ScanSharedState{readerConfig, numRows} { + initReader(); + } + + void read(common::DataChunk& dataChunk); + + void initReader(); }; struct RdfScan { diff --git a/src/planner/plan/plan_copy.cpp b/src/planner/plan/plan_copy.cpp index 4fac446c758..8f6abd556b0 100644 --- a/src/planner/plan/plan_copy.cpp +++ b/src/planner/plan/plan_copy.cpp @@ -36,6 +36,7 @@ static void appendPartitioner(BoundCopyFromInfo* copyFromInfo, LogicalPlan& plan payloads.push_back(copyFromInfo->fileScanInfo->offset); // TODO(Xiyang): Merge TURTLE case with other data types. switch (fileType) { + case FileType::NQUADS: case FileType::TURTLE: { auto extraInfo = reinterpret_cast(copyFromInfo->extraInfo.get()); infos.push_back(std::make_unique( @@ -83,7 +84,7 @@ std::unique_ptr Planner::planCopyFrom(const BoundStatement& stateme QueryPlanner::appendScanFile(copyFromInfo->fileScanInfo.get(), *plan); auto tableType = copyFromInfo->tableSchema->tableType; if (tableType == TableType::REL) { - if (fileType != FileType::TURTLE) { + if (fileType != FileType::TURTLE && fileType != FileType::NQUADS) { auto extraInfo = dynamic_cast(copyFromInfo->extraInfo.get()); std::vector> infos; auto srcNodeTableSchema = dynamic_cast(extraInfo->srcTableSchema); diff --git a/src/processor/map/map_copy_from.cpp b/src/processor/map/map_copy_from.cpp index fc0be2c604a..b54ece4db69 100644 --- a/src/processor/map/map_copy_from.cpp +++ b/src/processor/map/map_copy_from.cpp @@ -130,9 +130,10 @@ std::unique_ptr PlanMapper::mapCopyNodeFrom(LogicalOperator* l auto readerConfig = reinterpret_cast(copyFromInfo->fileScanInfo->bindData.get()) ->config; - if (readerConfig.fileType == FileType::TURTLE && - reinterpret_cast(readerConfig.extraConfig.get())->mode == - RdfReaderMode::RESOURCE) { + bool isRdfFile = + readerConfig.fileType == FileType::TURTLE || readerConfig.fileType == FileType::NQUADS; + if (isRdfFile && reinterpret_cast(readerConfig.extraConfig.get())->mode == + RdfReaderMode::RESOURCE) { copyNode = std::make_unique(sharedState, std::move(info), std::make_unique(copyFrom->getSchema()), std::move(prevOperator), getOperatorID(), copyFrom->getExpressionsForPrinting()); diff --git a/src/processor/operator/persistent/reader/rdf/rdf_reader.cpp b/src/processor/operator/persistent/reader/rdf/rdf_reader.cpp index 6984f56f8a9..a1ea6009a9a 100644 --- a/src/processor/operator/persistent/reader/rdf/rdf_reader.cpp +++ b/src/processor/operator/persistent/reader/rdf/rdf_reader.cpp @@ -8,7 +8,6 @@ #include "common/string_format.h" #include "common/vector/value_vector.h" #include "function/cast/functions/cast_string_non_nested_functions.h" -#include "function/table_functions/scan_functions.h" #include "serd.h" #include "storage/index/hash_index.h" @@ -20,34 +19,38 @@ using namespace kuzu::function; namespace kuzu { namespace processor { -RDFReader::RDFReader(std::string filePath, const common::RdfReaderConfig& config) - : filePath{std::move(filePath)}, mode{config.mode}, index{config.index}, rowOffset{0}, - vectorSize{0}, sVector{nullptr}, pVector{nullptr}, oVector{nullptr}, status{SERD_SUCCESS} { - std::string fileName = this->filePath.substr(this->filePath.find_last_of("/\\") + 1); - fp = fopen(this->filePath.c_str(), "rb"); - reader = serd_reader_new( - SERD_TURTLE, this, nullptr, nullptr, prefixSink, readerStatementSink, nullptr); - serd_reader_set_strict(reader, false /* strict */); - serd_reader_set_error_sink(reader, errorHandle, this); - serd_reader_start_stream(reader, fp, reinterpret_cast(fileName.c_str()), true); - counter = serd_reader_new( - SERD_TURTLE, this, nullptr, nullptr, nullptr, counterStatementSink, nullptr); - serd_reader_set_strict(counter, false /* strict */); - serd_reader_set_error_sink(counter, errorHandle, this); - serd_reader_start_stream(counter, fp, reinterpret_cast(fileName.c_str()), true); -} - -RDFReader::~RDFReader() { +RdfReader::~RdfReader() { serd_reader_end_stream(reader); serd_reader_free(reader); - serd_reader_end_stream(counter); - serd_reader_free(counter); // Even if the close fails, the stream is in an undefined state. There really isn't anything we // can do. (void)fclose(fp); } -SerdStatus RDFReader::errorHandle(void* /*handle*/, const SerdError* error) { +static SerdSyntax getSerdSyntax(FileType fileType) { + switch (fileType) { + case FileType::TURTLE: + return SerdSyntax ::SERD_TURTLE; + case FileType::NQUADS: + return SerdSyntax ::SERD_NQUADS; + default: + KU_UNREACHABLE; + } +} + +void RdfReader::initReader( + const SerdPrefixSink prefixSink_, const SerdStatementSink statementSink_) { + KU_ASSERT(reader == nullptr); + fp = fopen(this->filePath.c_str(), "rb"); + reader = serd_reader_new( + getSerdSyntax(fileType), this, nullptr, nullptr, prefixSink_, statementSink_, nullptr); + serd_reader_set_strict(reader, false /* strict */); + serd_reader_set_error_sink(reader, errorHandle, this); + auto fileName = this->filePath.substr(this->filePath.find_last_of("/\\") + 1); + serd_reader_start_stream(reader, fp, reinterpret_cast(fileName.c_str()), true); +} + +SerdStatus RdfReader::errorHandle(void* /*handle*/, const SerdError* error) { if (error->status == SERD_ERR_BAD_SYNTAX) { return error->status; } @@ -61,13 +64,14 @@ SerdStatus RDFReader::errorHandle(void* /*handle*/, const SerdError* error) { static bool supportSerdType(SerdType type) { switch (type) { - case SERD_BLANK: case SERD_URI: case SERD_CURIE: case SERD_LITERAL: return true; - default: + case SERD_BLANK: return false; + default: + KU_UNREACHABLE; } } @@ -147,14 +151,14 @@ static common::offset_t lookupResourceNode(PrimaryKeyIndex* index, const SerdNod return offset; } -SerdStatus RDFReader::readerStatementSink(void* handle, SerdStatementFlags /*flags*/, +SerdStatus RdfReader::statementHandle(void* handle, SerdStatementFlags /*flags*/, const SerdNode* /*graph*/, const SerdNode* subject, const SerdNode* predicate, const SerdNode* object, const SerdNode* object_datatype, const SerdNode* /*object_lang*/) { if (!supportSerdType(subject->type) || !supportSerdType(predicate->type) || !supportSerdType(object->type)) { return SERD_SUCCESS; } - auto reader = reinterpret_cast(handle); + auto reader = reinterpret_cast(handle); switch (reader->mode) { case RdfReaderMode::RESOURCE: { @@ -204,34 +208,34 @@ SerdStatus RDFReader::readerStatementSink(void* handle, SerdStatementFlags /*fla return SERD_SUCCESS; } -SerdStatus RDFReader::counterStatementSink(void* handle, SerdStatementFlags /*flags*/, +SerdStatus RdfReader::countStatementHandle(void* handle, SerdStatementFlags /*flags*/, const SerdNode* /*graph*/, const SerdNode* subject, const SerdNode* predicate, const SerdNode* object, const SerdNode* /*object_datatype*/, const SerdNode* /*object_lang*/) { if (!supportSerdType(subject->type) || !supportSerdType(predicate->type) || !supportSerdType(object->type)) { return SERD_SUCCESS; } - auto reader = reinterpret_cast(handle); + auto reader = reinterpret_cast(handle); reader->rowOffset++; return SERD_SUCCESS; } -SerdStatus RDFReader::prefixSink(void* handle, const SerdNode* /*name*/, const SerdNode* uri) { - auto reader = reinterpret_cast(handle); +SerdStatus RdfReader::prefixHandle(void* handle, const SerdNode* /*name*/, const SerdNode* uri) { + auto reader = reinterpret_cast(handle); reader->currentPrefix = reinterpret_cast(uri->buf); return SERD_SUCCESS; } -common::offset_t RDFReader::countLine() { +common::offset_t RdfReader::countLine() { if (status) { return 0; } rowOffset = 0; while (true) { - status = serd_reader_read_chunk(counter); + status = serd_reader_read_chunk(reader); if (status == SERD_ERR_BAD_SYNTAX) { // Skip to the next line. - serd_reader_skip_until_byte(counter, (uint8_t)'\n'); + serd_reader_skip_until_byte(reader, (uint8_t)'\n'); continue; } if (status != SERD_SUCCESS) { @@ -241,7 +245,7 @@ common::offset_t RDFReader::countLine() { return rowOffset; } -offset_t RDFReader::read(DataChunk* dataChunk) { +offset_t RdfReader::read(DataChunk* dataChunk) { if (status) { return 0; } @@ -276,6 +280,30 @@ offset_t RDFReader::read(DataChunk* dataChunk) { return rowOffset; } +void RdfScanSharedState::read(common::DataChunk& dataChunk) { + std::lock_guard mtx{lock}; + do { + if (fileIdx > readerConfig.getNumFiles()) { + return; + } + if (reader->read(&dataChunk) > 0) { + return; + } + fileIdx++; + initReader(); + } while (true); +} + +void RdfScanSharedState::initReader() { + if (fileIdx >= readerConfig.getNumFiles()) { + return; + } + auto path = readerConfig.filePaths[fileIdx]; + auto rdfConfig = reinterpret_cast(readerConfig.extraConfig.get()); + reader = std::make_unique(path, readerConfig.fileType, *rdfConfig); + reader->initReader(); +} + function::function_set RdfScan::getFunctionSet() { function_set functionSet; auto func = std::make_unique(READ_RDF_FUNC_NAME, tableFunc, bindFunc, @@ -286,8 +314,8 @@ function::function_set RdfScan::getFunctionSet() { } void RdfScan::tableFunc(function::TableFunctionInput& input, common::DataChunk& outputChunk) { - auto localState = reinterpret_cast(input.localState); - localState->reader->read(&outputChunk); + auto sharedState = reinterpret_cast(input.sharedState); + sharedState->reader->read(&outputChunk); } std::unique_ptr RdfScan::bindFunc(main::ClientContext* /*context*/, @@ -302,17 +330,18 @@ std::unique_ptr RdfScan::initSharedState( function::TableFunctionInitInput& input) { auto bindData = reinterpret_cast(input.bindData); auto rdfConfig = reinterpret_cast(bindData->config.extraConfig.get()); - auto reader = make_unique(bindData->config.filePaths[0], *rdfConfig); - return std::make_unique(bindData->config, reader->countLine()); + common::row_idx_t numRows = 0u; + for (auto& path : bindData->config.filePaths) { + auto reader = std::make_unique(path, bindData->config.fileType, *rdfConfig); + reader->initCountReader(); + numRows += reader->countLine(); + } + return std::make_unique(bindData->config, numRows); } std::unique_ptr RdfScan::initLocalState( - function::TableFunctionInitInput& input, function::TableFuncSharedState* /*state*/) { - auto bindData = reinterpret_cast(input.bindData); - auto rdfConfig = reinterpret_cast(bindData->config.extraConfig.get()); - auto localState = std::make_unique(); - localState->reader = std::make_unique(bindData->config.filePaths[0], *rdfConfig); - return localState; + function::TableFunctionInitInput& /*input*/, function::TableFuncSharedState* /*state*/) { + return std::make_unique(); } } // namespace processor diff --git a/test/graph_test/base_graph_test.cpp b/test/graph_test/base_graph_test.cpp index cc14038057b..d9463c79aa1 100644 --- a/test/graph_test/base_graph_test.cpp +++ b/test/graph_test/base_graph_test.cpp @@ -54,7 +54,8 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn if (substrLower.find(".csv") != std::string::npos || substrLower.find(".parquet") != std::string::npos || substrLower.find(".npy") != std::string::npos || - substrLower.find(".ttl") != std::string::npos) { + substrLower.find(".ttl") != std::string::npos || + substrLower.find(".nq") != std::string::npos) { csvFilePaths.push_back(substr); } index = end + 1; diff --git a/test/test_files/rdf/spb1k.test b/test/test_files/rdf/spb1k.test new file mode 100644 index 00000000000..1cfd9fb0590 --- /dev/null +++ b/test/test_files/rdf/spb1k.test @@ -0,0 +1,16 @@ +-GROUP RdfoxExample +-DATASET TTL rdf/spb1k + +-- + +-CASE SPB + +-STATEMENT MATCH (s:spb_resource_t) RETURN COUNT(*); +---- 1 +173 +-STATEMENT MATCH (s:spb_resource_t)-[]->(:spb_resource_t) WHERE s.iri = "http://www.bbc.co.uk/things/3#id" RETURN COUNT(*); +---- 1 +16 +-STATEMENT MATCH (s:spb_resource_t)-[]->(:spb_literal_t) WHERE s.iri = "http://www.bbc.co.uk/things/3#id" RETURN COUNT(*); +---- 1 +7