Skip to content

Commit

Permalink
feat(BigQuery): Enable Reference File Schema Uri (#6621)
Browse files Browse the repository at this point in the history
  • Loading branch information
ajupazhamayil committed Sep 15, 2023
1 parent 2df4e91 commit be1ad5d
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 2 deletions.
19 changes: 19 additions & 0 deletions BigQuery/src/LoadJobConfiguration.php
Original file line number Diff line number Diff line change
Expand Up @@ -679,4 +679,23 @@ public function connectionProperties(array $connectionProperties)

return $this;
}

/**
* Sets the reference for external table schema.
* It is enabled for AVRO, PARQUET and ORC format.
*
* Example:
* ```
* $loadJobConfig->referenceFileSchemaUri('gs://bucket/source.parquet');
* ```
*
* @param string $referenceFileSchemaUri
* @return LoadJobConfiguration
*/
public function referenceFileSchemaUri(string $referenceFileSchemaUri)
{
$this->config['configuration']['load']['referenceFileSchemaUri'] = $referenceFileSchemaUri;

return $this;
}
}
4 changes: 4 additions & 0 deletions BigQuery/tests/Snippet/LoadJobConfigurationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,10 @@ public function setterDataProvider()
'value' => 'sessionId'
]
],
[
'referenceFileSchemaUri',
'gs://bucket/source.parquet'
]
];
}
}
104 changes: 104 additions & 0 deletions BigQuery/tests/System/ManageTablesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,22 @@
*/
class ManageTablesTest extends BigQueryTestCase
{
const SOURCE_URIS_AVRO = [
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro",
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro",
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro",
];
const SOURCE_URIS_PARQUET = [
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet",
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.parquet",
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.parquet",
];
const REFERENCE_FILE_SCHEMA_URI_AVRO =
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro";
const REFERENCE_FILE_SCHEMA_URI_PARQUET =
"gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet";
const REFERENCE_SCHEMA = ['username', 'tweet', 'timestamp', 'likes'];

public function testListTables()
{
$foundTables = [];
Expand Down Expand Up @@ -347,4 +363,92 @@ private function runJob($jobConfig, $client = null)

$this->assertArrayNotHasKey('errorResult', $job->info()['status']);
}

/**
* @dataProvider referenceFileSchemaTestUris
*/
public function testCreateTableWithReferenceFileSchemaUri($sourceUris, $refUri, $format)
{
$id = uniqid(self::TESTING_PREFIX);
$options = [
'externalDataConfiguration' => [
'sourceUris' => $sourceUris,
'referenceFileSchemaUri' => $refUri,
'sourceFormat' => $format
]
];

$table = self::$dataset->createTable($id, $options);

$this->assertTrue(self::$dataset->table($id)->exists());
$this->assertEquals($id, $table->id());

$columns = $table->info()['schema']['fields'];
$i = 0;
foreach ($columns as $column) {
$this->assertEquals(self::REFERENCE_SCHEMA[$i++], $column['name']);
}
}

/**
* @dataProvider referenceFileSchemaTestUris
*/
public function testLoadTableWithReferenceFileSchemaUri($sourceUris, $refUri, $format)
{
$id = uniqid(self::TESTING_PREFIX);
$table = self::$dataset->table($id);
$loadConfig = $table->load('');
$loadConfig->sourceUris($sourceUris);
$loadConfig->referenceFileSchemaUri($refUri);
$loadConfig->sourceFormat($format);
$loadConfig->destinationTable($table);

$job = self::$client->runJob($loadConfig);

$this->assertTrue(self::$dataset->table($id)->exists());
$this->assertEquals($id, $table->id());

$columns = $table->info()['schema']['fields'];
$i = 0;
foreach ($columns as $column) {
$this->assertEquals(self::REFERENCE_SCHEMA[$i++], $column['name']);
}
}

/**
* @dataProvider referenceFileSchemaTestUris
*/
public function testQueryTableWithReferenceFileSchemaUri($sourceUris, $refUri, $format)
{
$id = uniqid(self::TESTING_PREFIX);
$queryConfig = self::$client->query(sprintf('SELECT * FROM %s', $id));
$queryConfig->tableDefinitions([$id =>[
'sourceUris' => $sourceUris,
'referenceFileSchemaUri' => $refUri,
'sourceFormat' => $format
]]);
$result = self::$client->runQuery($queryConfig);
$rows = $result->rows();

foreach ($rows as $row) {
$this->assertEquals(self::REFERENCE_SCHEMA, array_keys($row));
}
$this->assertGreaterThan(0, count(iterator_to_array($rows)));
}

public function referenceFileSchemaTestUris()
{
return [
[
self::SOURCE_URIS_PARQUET,
self::REFERENCE_FILE_SCHEMA_URI_PARQUET,
'PARQUET'
],
[
self::SOURCE_URIS_AVRO,
self::REFERENCE_FILE_SCHEMA_URI_AVRO,
'AVRO'
],
];
}
}
6 changes: 4 additions & 2 deletions BigQuery/tests/Unit/LoadJobConfigurationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ public function testFluentSetters()
'connectionProperties' => [
'key' => 'session_id',
'value' => 'sessionId'
]
],
'referenceFileSchemaUri' => 'gs://bucket/source.parquet'
];
$this->expectedConfig['configuration']['load'] = $load
+ $this->expectedConfig['configuration']['load'];
Expand Down Expand Up @@ -142,7 +143,8 @@ public function testFluentSetters()
->connectionProperties([
'key' => 'session_id',
'value' =>'sessionId'
]);
])
->referenceFileSchemaUri('gs://bucket/source.parquet');

$this->assertInstanceOf(LoadJobConfiguration::class, $config);
$this->assertEquals(
Expand Down

0 comments on commit be1ad5d

Please sign in to comment.