diff --git a/BigQuery/src/LoadJobConfiguration.php b/BigQuery/src/LoadJobConfiguration.php index 5ee7f1fcda1..e83de20f3f8 100644 --- a/BigQuery/src/LoadJobConfiguration.php +++ b/BigQuery/src/LoadJobConfiguration.php @@ -679,4 +679,23 @@ public function connectionProperties(array $connectionProperties) return $this; } + + /** + * Sets the reference for external table schema. + * It is enabled for AVRO, PARQUET and ORC format. + * + * Example: + * ``` + * $loadJobConfig->referenceFileSchemaUri('gs://bucket/source.parquet'); + * ``` + * + * @param string $referenceFileSchemaUri + * @return LoadJobConfiguration + */ + public function referenceFileSchemaUri(string $referenceFileSchemaUri) + { + $this->config['configuration']['load']['referenceFileSchemaUri'] = $referenceFileSchemaUri; + + return $this; + } } diff --git a/BigQuery/tests/Snippet/LoadJobConfigurationTest.php b/BigQuery/tests/Snippet/LoadJobConfigurationTest.php index 75b3969b5b8..97deb7a33c4 100644 --- a/BigQuery/tests/Snippet/LoadJobConfigurationTest.php +++ b/BigQuery/tests/Snippet/LoadJobConfigurationTest.php @@ -223,6 +223,10 @@ public function setterDataProvider() 'value' => 'sessionId' ] ], + [ + 'referenceFileSchemaUri', + 'gs://bucket/source.parquet' + ] ]; } } diff --git a/BigQuery/tests/System/ManageTablesTest.php b/BigQuery/tests/System/ManageTablesTest.php index 0028905263a..4acf5fe36b5 100644 --- a/BigQuery/tests/System/ManageTablesTest.php +++ b/BigQuery/tests/System/ManageTablesTest.php @@ -28,6 +28,22 @@ */ class ManageTablesTest extends BigQueryTestCase { + const SOURCE_URIS_AVRO = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", + ]; + const SOURCE_URIS_PARQUET = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.parquet", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.parquet", + ]; + const REFERENCE_FILE_SCHEMA_URI_AVRO = + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro"; + const REFERENCE_FILE_SCHEMA_URI_PARQUET = + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet"; + const REFERENCE_SCHEMA = ['username', 'tweet', 'timestamp', 'likes']; + public function testListTables() { $foundTables = []; @@ -347,4 +363,92 @@ private function runJob($jobConfig, $client = null) $this->assertArrayNotHasKey('errorResult', $job->info()['status']); } + + /** + * @dataProvider referenceFileSchemaTestUris + */ + public function testCreateTableWithReferenceFileSchemaUri($sourceUris, $refUri, $format) + { + $id = uniqid(self::TESTING_PREFIX); + $options = [ + 'externalDataConfiguration' => [ + 'sourceUris' => $sourceUris, + 'referenceFileSchemaUri' => $refUri, + 'sourceFormat' => $format + ] + ]; + + $table = self::$dataset->createTable($id, $options); + + $this->assertTrue(self::$dataset->table($id)->exists()); + $this->assertEquals($id, $table->id()); + + $columns = $table->info()['schema']['fields']; + $i = 0; + foreach ($columns as $column) { + $this->assertEquals(self::REFERENCE_SCHEMA[$i++], $column['name']); + } + } + + /** + * @dataProvider referenceFileSchemaTestUris + */ + public function testLoadTableWithReferenceFileSchemaUri($sourceUris, $refUri, $format) + { + $id = uniqid(self::TESTING_PREFIX); + $table = self::$dataset->table($id); + $loadConfig = $table->load(''); + $loadConfig->sourceUris($sourceUris); + $loadConfig->referenceFileSchemaUri($refUri); + $loadConfig->sourceFormat($format); + $loadConfig->destinationTable($table); + + $job = self::$client->runJob($loadConfig); + + $this->assertTrue(self::$dataset->table($id)->exists()); + $this->assertEquals($id, $table->id()); + + $columns = $table->info()['schema']['fields']; + $i = 0; + foreach ($columns as $column) { + $this->assertEquals(self::REFERENCE_SCHEMA[$i++], $column['name']); + } + } + + /** + * @dataProvider referenceFileSchemaTestUris + */ + public function testQueryTableWithReferenceFileSchemaUri($sourceUris, $refUri, $format) + { + $id = uniqid(self::TESTING_PREFIX); + $queryConfig = self::$client->query(sprintf('SELECT * FROM %s', $id)); + $queryConfig->tableDefinitions([$id =>[ + 'sourceUris' => $sourceUris, + 'referenceFileSchemaUri' => $refUri, + 'sourceFormat' => $format + ]]); + $result = self::$client->runQuery($queryConfig); + $rows = $result->rows(); + + foreach ($rows as $row) { + $this->assertEquals(self::REFERENCE_SCHEMA, array_keys($row)); + } + $this->assertGreaterThan(0, count(iterator_to_array($rows))); + } + + public function referenceFileSchemaTestUris() + { + return [ + [ + self::SOURCE_URIS_PARQUET, + self::REFERENCE_FILE_SCHEMA_URI_PARQUET, + 'PARQUET' + ], + [ + self::SOURCE_URIS_AVRO, + self::REFERENCE_FILE_SCHEMA_URI_AVRO, + 'AVRO' + ], + ]; + } } diff --git a/BigQuery/tests/Unit/LoadJobConfigurationTest.php b/BigQuery/tests/Unit/LoadJobConfigurationTest.php index 3b9aaaa09a9..e32ea9f17d9 100644 --- a/BigQuery/tests/Unit/LoadJobConfigurationTest.php +++ b/BigQuery/tests/Unit/LoadJobConfigurationTest.php @@ -108,7 +108,8 @@ public function testFluentSetters() 'connectionProperties' => [ 'key' => 'session_id', 'value' => 'sessionId' - ] + ], + 'referenceFileSchemaUri' => 'gs://bucket/source.parquet' ]; $this->expectedConfig['configuration']['load'] = $load + $this->expectedConfig['configuration']['load']; @@ -142,7 +143,8 @@ public function testFluentSetters() ->connectionProperties([ 'key' => 'session_id', 'value' =>'sessionId' - ]); + ]) + ->referenceFileSchemaUri('gs://bucket/source.parquet'); $this->assertInstanceOf(LoadJobConfiguration::class, $config); $this->assertEquals(