diff --git a/astro.config.mjs b/astro.config.mjs
index e2db76fa..7e4ce164 100644
--- a/astro.config.mjs
+++ b/astro.config.mjs
@@ -61,6 +61,25 @@ export default defineConfig({
{ label: 'Run graph algorithms', link: '/get-started/graph-algorithms' },
]
},
+ {
+ label: 'Import data',
+ collapsed: true,
+ items: [
+ { label: 'Overview', link: '/import' },
+ { label: 'Copy from CSV', link: '/import/csv' },
+ { label: 'Copy from Parquet', link: '/import/parquet' },
+ { label: 'Copy from NumPy', link: '/import/npy', badge: { text: 'Experimental', variant: 'danger'}},
+ ]
+ },
+ {
+ label: 'Export data',
+ collapsed: true,
+ items: [
+ { label: 'Overview', link: '/export' },
+ { label: 'Copy to CSV', link: '/export/csv' },
+ { label: 'Copy to Parquet', link: '/export/parquet' },
+ ]
+ },
{
label: 'Visualize graphs',
link: '/visualization',
@@ -75,7 +94,7 @@ export default defineConfig({
{ label: 'Create your first RDF graph', link: '/rdf-graphs/example-rdfgraph' },
{ label: 'Query an RDF graph in Cypher', link: '/rdf-graphs/rdfgraphs-overview' },
{ label: 'RDF bulk data import', link: '/rdf-graphs/rdf-import' },
- { label: 'Example RDFGraphs', link: '/rdf-graphs/rdfgraphs-repo' },
+ { label: 'Preloaded RDFGraphs', link: '/rdf-graphs/rdfgraphs-repo' },
],
autogenerate: { directory: 'reference' },
},
diff --git a/src/content/docs/export/csv.md b/src/content/docs/export/csv.md
new file mode 100644
index 00000000..5300fe94
--- /dev/null
+++ b/src/content/docs/export/csv.md
@@ -0,0 +1,48 @@
+---
+title: Export CSV
+---
+
+The `COPY TO` clause can export query results to a CSV file, and is used as follows:
+
+```cypher
+COPY (MATCH (u:User) RETURN u.*) TO 'user.csv' (header=true);
+```
+
+The CSV file consists of the following fields:
+
+```csv
+u.name,u.age
+Adam,30
+Karissa,40
+Zhang,50
+Noura,25
+```
+
+Nested data types like lists and structs will be represented as strings within their respective columns.
+
+Available options are:
+
+
+
+| Option | Default Value | Description |
+|:------------------------:|:-----------------------:|---------------------------------------------------------------------------|
+| `ESCAPE` | `\` | Character used to escape special characters in CSV |
+| `DELIM` | `,` | Character that separates fields in the CSV |
+| `QUOTE` | `"` | Character used to enclose fields containing special characters or spaces |
+| `Header` | `false` | Indicates whether to output a header row |
+
+
+
+Another example is shown below.
+
+```cypher
+COPY (MATCH (a:User)-[f:Follows]->(b:User) RETURN a.name, f.since, b.name) TO 'follows.csv' (header=false, delim='|');
+```
+
+This outputs the following results to `follows.csv`:
+```csv
+Adam|2020|Karissa
+Adam|2020|Zhang
+Karissa|2021|Zhang
+Zhang|2022|Noura
+```
diff --git a/src/content/docs/export/index.mdx b/src/content/docs/export/index.mdx
new file mode 100644
index 00000000..078cb163
--- /dev/null
+++ b/src/content/docs/export/index.mdx
@@ -0,0 +1,23 @@
+---
+title: Overview
+---
+
+import { LinkCard } from '@astrojs/starlight/components';
+
+The `COPY TO` command allows you to export query results directly to the specified file format. This
+is useful when you want to persist the results of a query to be used in other systems, or for
+archiving purposes.
+
+## `COPY TO` CSV
+
+
+
+## `COPY TO` Parquet
+
+
diff --git a/src/content/docs/export/parquet.md b/src/content/docs/export/parquet.md
new file mode 100644
index 00000000..92c2ee6e
--- /dev/null
+++ b/src/content/docs/export/parquet.md
@@ -0,0 +1,33 @@
+---
+title: Export Parquet
+---
+
+The `COPY TO` clause can export query results to a Parquet file. It can be combined with a subquery
+and used as shown below.
+
+```cypher
+COPY (MATCH (u:User) return u.*) TO 'user.parquet';
+```
+
+The `LOAD FROM` clause can used to scan the Parquet file and to verify that the export worked:
+
+```cypher
+> LOAD FROM 'user.parquet' RETURN *;
+-------------------
+| u.name | u.age |
+-------------------
+| Adam | 30 |
+-------------------
+| Karissa | 40 |
+-------------------
+| Zhang | 50 |
+-------------------
+| Noura | 25 |
+-------------------
+```
+
+:::caution[Notes]
+- Exporting [fixed list](../cypher/data-types#list) or [variant](../../cypher/data-types/variant) data types to Parquet are not yet supported.
+- [UNION](../../cypher/data-types/union) is exported as a [STRUCT](../../cypher/data-types/struct), which is the internal representation of the `Union` data type.
+- Currently, only Snappy compression is supported for exports.
+:::
diff --git a/src/content/docs/import/csv.md b/src/content/docs/import/csv.md
new file mode 100644
index 00000000..a104e725
--- /dev/null
+++ b/src/content/docs/import/csv.md
@@ -0,0 +1,116 @@
+---
+title: Import data from CSV files
+---
+
+You can bulk import data to node and relationship tables from CSV files
+using the `COPY FROM` command. It is **highly recommended** to use `COPY FROM` if you are creating large
+databases.
+
+The CSV import configuration can be manually set by specifying the parameters inside `( )` at the
+end of the the `COPY FROM` clause. The following table shows the configuration parameters supported:
+
+| Parameter | Description | Default Value |
+|:-----|:-----|:-----|
+| `HEADER` | Whether the first line of the CSV file is the header. Can be true or false. | false |
+| `DELIM` | Character that separates different columns in a lines. | `,`|
+| `QUOTE` | Character to start a string quote. | `"` |
+| `ESCAPE` | Character within string quotes to escape QUOTE and other characters, e.g., a line break.
See the important note below about line breaks lines below.| `\` |
+| `LIST_BEGIN`/`LIST_END` | For the [list data type](../cypher/data-types/list.md), the delimiters to specify
list begin and list end characters | `[`, `]`|
+| `PARALLEL` | Read csv files in parallel or not | true |
+
+The example below specifies that the CSV delimiter is`|` and also that the header row exists.
+
+```cypher
+COPY User FROM "user.csv" (HEADER=true, DELIM="|");
+```
+
+:::caution[Guidelines]
+- **Start with empty tables:** `COPY FROM` commands can be used when your tables are completely empty. So you should use `COPY FROM` immediately after you define the schemas of your tables.
+- **Copy nodes before relationships:** In order to copy a relationship table `R` from a csv file `RFile`, the nodes that appear in `RFile` need to
+already exist in the database (either imported in bulk or inserted through Cypher data manipulation commands).
+- **Wrap strings inside quotes:** Kùzu will accept strings in string columns both with and without quotes, though it's recommended to wrap strings in quotes to avoid any ambiguity with delimiters.
+- **Avoid leading and trailing spaces**: As per the CSV standard, Kùzu does not ignore leading and trailing spaces (e.g., if you input ` 213 ` for
+ an integer value, that will be read as malformed integer and the corresponding node/rel property will be set to NULL.
+:::
+
+## Import to node table
+
+Create a node table `User` as follows:
+
+```cypher
+CREATE NODE TABLE User(name STRING, age INT64, reg_date DATE, PRIMARY KEY (name))
+```
+
+The CSV file `user.csv` contains the following fields:
+```csv
+name,age,reg_date
+Adam,30,2020-06-22
+Karissa,40,2019-05-12
+...
+```
+
+The following statement will load `user.csv` into User table.
+
+```cypher
+COPY User FROM "user.csv" (header=true);
+```
+
+## Import to relationship table
+
+When loading into a relationship table, Kùzu assumes the first two columns in the file are:
+
+- `FROM` Node Column: The primary key of the `FROM` nodes.
+- `TO` Node Column: The primary key of the `TO` nodes.
+
+The rest of the columns correspond to relationship properties.
+
+Create a relationship table `Follows` using the following Cypher query:
+
+```cypher
+CREATE REL TABLE Follows(FROM User TO User, since DATE)
+```
+
+This reads data from the below CSV file `follows.csv`:
+```csv
+Adam,Karissa,2010-01-30
+Karissa,Michelle,2014-01-30
+...
+```
+
+The following statement loads the `follows.csv` file into a `Follows` table.
+
+```cypher
+COPY Follows FROM "follows.csv";
+```
+
+Note that the header wasn't present in the CSV file, hence the `header` parameter is not set.
+
+## Import multiple files to a single table
+
+It is common practice to divide a large CSV file into several smaller files for cleaner data management.
+Kùzu can read multiple files with the same structure, consolidating their data into a single node or relationship table.
+You can specify that multiple files are loaded in the following ways:
+
+### Glob pattern
+
+This is similar to the Unix [glob](https://man7.org/linux/man-pages/man7/glob.7.html) pattern, where you specify
+file paths that match a given pattern. The following wildcard characters are supported:
+
+| Wildcard | Description |
+| :-----------: | ----------- |
+| `*` | match any number of any characters (including none) |
+| `?` | match any single character |
+| `[abc]` | match any one of the characters enclosed within the brackets |
+| `[a-z]` | match any one of the characters within the range |
+
+```cypher
+COPY User FROM "User*.csv"
+```
+
+### List of files
+
+Alternatively, you can just specify a list of files to be loaded.
+
+```cypher
+COPY User FROM ["User0.csv", "User0.csv", "User2.csv"]
+```
diff --git a/src/content/docs/import/index.mdx b/src/content/docs/import/index.mdx
new file mode 100644
index 00000000..90576366
--- /dev/null
+++ b/src/content/docs/import/index.mdx
@@ -0,0 +1,46 @@
+---
+title: Overview
+---
+
+import { LinkCard } from '@astrojs/starlight/components';
+
+There are multiple ways to import data in Kùzu. The only prerequisite for inserting data
+into a database is that you first create a graph schema, i.e., the structure of your node and relationship tables.
+
+For small graphs (a few thousand nodes), the `CREATE` and `MERGE` [Cypher clauses](../cypher/data-manipulation-clauses)
+can be used to insert nodes and
+relationships. These are similar to SQL's `INSERT` statements, but bear in mind that they are slower than the bulk import
+options shown below. The `CREATE`/`MERGE` clauses are intended to do small additions or updates on a sporadic basis.
+
+In general, the recommended approach is to use `COPY FROM` (rather than creating or
+merging nodes one by one), for larger graphs of millions of nodes and beyond. For now, the `COPY FROM`
+commands can only be used when tables are empty.
+
+## `COPY FROM` CSV
+
+The `COPY FROM` command is used to bulk import data from a CSV file into a node or relationship table.
+See the linked card below for more information and examples.
+
+
+
+## `COPY FROM` Parquet
+
+Similar to CSV, the `COPY FROM` command is used to bulk import data from a Parquet file into a node or relationship table.
+See the linked card below for more information and examples.
+
+
+
+## `COPY FROM` NumPy
+
+Importing from NumPy is a specific use case that allows you to import numeric data from a NumPy file into a node table.
+
+
\ No newline at end of file
diff --git a/src/content/docs/import/npy.md b/src/content/docs/import/npy.md
new file mode 100644
index 00000000..7c0c1338
--- /dev/null
+++ b/src/content/docs/import/npy.md
@@ -0,0 +1,44 @@
+---
+title: Import NumPy
+---
+
+The `.npy` format is the standard binary file format in [NumPy](https://numpy.org/) for persisting a
+single arbitrary NumPy array on disk.
+
+The primary use case for bulk loading NumPy files is to load
+large node features or vectors that are stored in `.npy` format. You can use the `COPY FROM` statement
+to import a set of `*.npy` files into a node table.
+
+:::caution[Notes]
+This feature is an experimental feature and will evolve. Currently, this feature has the following constraints:
+- **Import to node table only**: For now, Kùzu supports loading `.npy` files into **node tables** only.
+- **Start with empty tables**: `COPY FROM` commands can be used when your tables are completely empty.
+So you should use `COPY FROM` immediately after you define the schemas of your tables.
+- **NPY file mapped to column**: Each `.npy` file will be loaded as a node table column. So, in the `COPY FROM` statement, the
+number of `.npy` files must be equal to the number of columns defined in DDL.
+- **Numerical types only**: A `.npy` file can only contain numerical values.
+:::
+
+## Import to node table
+Consider a `Paper` table with an `id` column, a feature column that is an embedding (vector) with 768 dimensions,
+a `year` column and a `label` column as ground truth. We first define the schema with the following statement:
+
+```cypher
+CREATE NODE TABLE Paper(id INT64, feat FLOAT[768], year INT64, label DOUBLE, PRIMARY KEY(id));
+```
+
+The raw data is stored in `.npy` format where each column is represented as a NumPy array on disk. The files are
+specified below:
+
+```
+node_id.npy", "node_feat_f32.npy", "node_year.npy", "node_label.npy"
+```
+
+We can copy the files with the following statement:
+
+```cypher
+COPY Paper FROM ("node_id.npy", "node_feat_f32.npy", "node_year.npy", "node_label.npy") BY COLUMN;
+```
+
+As stated before, the number of `*.npy` files must equal the number of columns, and must also be
+specified in the same order as they are defined in the DDL.
diff --git a/src/content/docs/import/parquet.md b/src/content/docs/import/parquet.md
new file mode 100644
index 00000000..dd362a01
--- /dev/null
+++ b/src/content/docs/import/parquet.md
@@ -0,0 +1,93 @@
+---
+title: Import Parquet
+---
+
+[Apache Parquet](https://Parquet.apache.org/docs/) is an open source, column-oriented persistent storage format
+designed for efficient data storage and retrieval. Kùzu supports bulk data import from Parquet files
+using the `COPY FROM` command.
+
+:::caution[Notes]
+Parquet files store schema information in their metadata, so you don't need to explicitly handle columns
+based on type, unlike in CSV. However, the same rules apply:
+
+- **Start with empty tables:** `COPY FROM` commands can be used when your tables are completely empty.
+So you should use `COPY FROM` immediately after you define the schemas of your tables.
+- **Copy nodes before relationships:** In order to copy a relationship table `R` from a Parquet file `RFile`,
+the nodes that appear in `RFile` need to already exist in the database (either imported in bulk or
+inserted through Cypher data manipulation commands).
+:::
+
+## Import to node table
+
+Similar to CSV import, the order of columns in a Parquet file need to match the order of predefined
+properties for node tables in the catalog, i.e. the order used when defining the schema of a node table.
+
+The following example is for a file named `user.parquet`. The output is obtained by using `print(pyarrow.Table)`.
+```py
+pyarrow.Table
+name: string
+age: int64
+----
+name: [["Adam","Karissa","Zhang","Noura"]]
+age: [[30,40,50,25]]
+```
+
+To load this Parquet file into a `User` table, simply run:
+
+```cypher
+COPY User FROM "user.Parquet";
+```
+
+## Import to relationship table
+
+Similar to CSV import, the first two columns for a relationship file should the `from` and the `to` columns
+that represent existing nodes' primary keys.
+
+The following example is for a file named `follows.parquet`. The output is obtained by using `print(pyarrow.Table)`.
+
+```py
+pyarrow.Table
+from: string
+to: string
+since: int64
+----
+from: [["Adam","Adam","Karissa","Zhang"]]
+to: [["Karissa","Zhang","Zhang","Noura"]]
+since: [[2020,2020,2021,2022]]
+```
+
+To load this Parquet file into a `Follows` table, simply run:
+
+```cypher
+COPY Follows FROM "follows.Parquet";
+```
+
+## Import multiple files to a single table
+
+It is common practice to divide large Parquet files into several smaller files for cleaner data management.
+Kùzu can read multiple files with the same structure, consolidating their data into a single node or relationship table.
+You can specify that multiple files are loaded in the following ways:
+
+### Glob pattern
+
+This is similar to the Unix [glob](https://man7.org/linux/man-pages/man7/glob.7.html) pattern, where you specify
+file paths that match a given pattern. The following wildcard characters are supported:
+
+| Wildcard | Description |
+| :-----------: | ----------- |
+| `*` | match any number of any characters (including none) |
+| `?` | match any single character |
+| `[abc]` | match any one of the characters enclosed within the brackets |
+| `[a-z]` | match any one of the characters within the range |
+
+```cypher
+COPY User FROM "User*.parquet"
+```
+
+### List of files
+
+Alternatively, you can just specify a list of files to be loaded.
+
+```cypher
+COPY User FROM ["User0.parquet", "User1.parquet", "User2.parquet"]
+```