Skip to content

Commit

Permalink
[regression test] add some case for json load regression test (apache…
Browse files Browse the repository at this point in the history
…#9614)

Co-authored-by: hucheng01 <hucheng01@baidu.com>
  • Loading branch information
2 people authored and minghong.zhou committed May 23, 2022
1 parent c7a0c07 commit 1656b48
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 6 deletions.
10 changes: 10 additions & 0 deletions regression-test/data/load/stream_load/simple_object_json.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{"id": 1, "city": "beijing", "code": 2345671}
{"id": 2, "city": "shanghai", "code": 2345672}
{"id": 3, "city": "guangzhou", "code": 2345673}
{"id": 4, "city": "shenzhen", "code": 2345674}
{"id": 5, "city": "hangzhou", "code": 2345675}
{"id": 6, "city": "nanjing", "code": 2345676}
{"id": 7, "city": "wuhan", "code": 2345677}
{"id": 8, "city": "chengdu", "code": 2345678}
{"id": 9, "city": "xian", "code": 2345679}
{"id": 10, "city": "hefei", "code": 23456710}
131 changes: 125 additions & 6 deletions regression-test/suites/load/stream_load/test_json_load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ suite("test_json_load", "load") {
assertTrue(result1[0][0] == 0, "Create table should update 0 rows")

// insert 1 row to check whether the table is ok
def result2 = sql "INSERT INTO test_json_load (id, city, code) VALUES (200, 'shenzhen', 0755)"
def result2 = sql "INSERT INTO test_json_load (id, city, code) VALUES (200, 'changsha', 3456789)"
assertTrue(result2.size() == 1)
assertTrue(result2[0].size() == 1)
assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
Expand Down Expand Up @@ -96,6 +96,39 @@ suite("test_json_load", "load") {
}
}
}

def load_from_hdfs1 = {testTablex, label, hdfsFilePath, format, brokerName, hdfsUser, hdfsPasswd ->
def result1= sql """
LOAD LABEL ${label} (
DATA INFILE("${hdfsFilePath}")
INTO TABLE ${testTablex}
FORMAT as "${format}"
PRECEDING FILTER id > 1 and id < 10)
with BROKER "${brokerName}"
("username"="${hdfsUser}", "password"="${hdfsPasswd}");
"""

assertTrue(result1.size() == 1)
assertTrue(result1[0].size() == 1)
assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
}

def load_from_hdfs2 = {testTablex, label, hdfsFilePath, format, brokerName, hdfsUser, hdfsPasswd ->
def result1= sql """
LOAD LABEL ${label} (
DATA INFILE("${hdfsFilePath}")
INTO TABLE ${testTablex}
FORMAT as "${format}"
PRECEDING FILTER id < 10
where id > 1 and id < 5)
with BROKER "${brokerName}"
("username"="${hdfsUser}", "password"="${hdfsPasswd}");
"""

assertTrue(result1.size() == 1)
assertTrue(result1[0].size() == 1)
assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
}

// case1: import simple json
try {
Expand Down Expand Up @@ -165,8 +198,30 @@ suite("test_json_load", "load") {
} finally {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// case4: import json and apply jsonpaths & exprs
try {
sql "DROP TABLE IF EXISTS ${testTable}"

create_test_table2.call(testTable)

load_json_data.call('true', '', 'json', 'code = id * 10 + 200', '[\"$.id\"]',
'', '', '', 'simple_json.json')

def result3 = sql "select * from test_json_load order by id"
assertTrue(result3.size() == 11)
assertTrue(result3[0].size() == 2)
assertTrue(result3[0][0] == 1)
assertTrue(result3[0][1] == 210)
assertTrue(result3[9].size() == 2)
assertTrue(result3[9][0] == 10)
assertTrue(result3[9][1] == 300)

} finally {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// case4: import json with line reader
// case5: import json with line reader
try {
sql "DROP TABLE IF EXISTS ${testTable}"

Expand All @@ -188,7 +243,7 @@ suite("test_json_load", "load") {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// case5: import json use exprs and jsonpaths
// case6: import json use exprs and jsonpaths
try {
sql "DROP TABLE IF EXISTS ${testTable}"

Expand All @@ -210,7 +265,7 @@ suite("test_json_load", "load") {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// case6: import json use where
// case7: import json use where
try {
sql "DROP TABLE IF EXISTS ${testTable}"

Expand All @@ -234,7 +289,7 @@ suite("test_json_load", "load") {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// case7: import json use fuzzy_parse
// case8: import json use fuzzy_parse
try {
sql "DROP TABLE IF EXISTS ${testTable}"

Expand All @@ -258,7 +313,7 @@ suite("test_json_load", "load") {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// case8: import json use json_root
// case9: import json use json_root
try {
sql "DROP TABLE IF EXISTS ${testTable}"

Expand All @@ -280,4 +335,68 @@ suite("test_json_load", "load") {
} finally {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// if 'enableHdfs' in regression-conf.groovy has been set to true,
// the test will run these case as below.
if (enableHdfs()) {
brokerName =getBrokerName()
hdfsUser = getHdfsUser()
hdfsPasswd = getHdfsPasswd()
def hdfs_file_path = uploadToHdfs "stream_load/simple_object_json.json"
def format = "json"

// case10: import json use pre-filter exprs
try {
sql "DROP TABLE IF EXISTS ${testTable}"

create_test_table1.call(testTable)

def test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
load_from_hdfs1.call(testTable, test_load_label, hdfs_file_path, format,
brokerName, hdfsUser, hdfsPasswd)

// wait to load finished
sleep(5000)

def result3 = sql "select * from test_json_load order by id"
assertTrue(result3.size() == 9)
assertTrue(result3[0].size() == 3)
assertTrue(result3[0][0] == 2)
assertTrue(result3[0][1] == "shanghai")
assertTrue(result3[0][2] == 2345672)
assertTrue(result3[7].size() == 3)
assertTrue(result3[7][0] == 9)
assertTrue(result3[7][1] == "xian")
assertTrue(result3[7][2] == 2345679)
} finally {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// case11: import json use pre-filter and where exprs
try {
sql "DROP TABLE IF EXISTS ${testTable}"

create_test_table1.call(testTable)

def test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
load_from_hdfs2.call(testTable, test_load_label, hdfs_file_path, format,
brokerName, hdfsUser, hdfsPasswd)

// wait to load finished
sleep(5000)

def result3 = sql "select * from test_json_load order by id"
assertTrue(result3.size() == 4)
assertTrue(result3[0].size() == 3)
assertTrue(result3[0][0] == 2)
assertTrue(result3[0][1] == "shanghai")
assertTrue(result3[0][2] == 2345672)
assertTrue(result3[2].size() == 3)
assertTrue(result3[2][0] == 4)
assertTrue(result3[2][1] == "shenzhen")
assertTrue(result3[2][2] == 2345674)
} finally {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}
}
}

0 comments on commit 1656b48

Please sign in to comment.