Skip to content

Commit

Permalink
Merge pull request #79 from planetlabs/command-io
Browse files Browse the repository at this point in the history
Optionally use stdin and stdout for validate, describe, and convert commands
  • Loading branch information
tschaub committed Sep 26, 2023
2 parents 859b789 + b88a386 commit b1b08b5
Show file tree
Hide file tree
Showing 14 changed files with 486 additions and 117 deletions.
16 changes: 16 additions & 0 deletions cmd/gpq/command/command.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package command

import "io"

var CLI struct {
Convert ConvertCmd `cmd:"" help:"Convert data from one format to another."`
Validate ValidateCmd `cmd:"" help:"Validate a GeoParquet file."`
Describe DescribeCmd `cmd:"" help:"Describe a GeoParquet file."`
Version VersionCmd `cmd:"" help:"Print the version of this program."`
}

type ReaderAtSeeker interface {
io.Reader
io.ReaderAt
io.Seeker
}
65 changes: 65 additions & 0 deletions cmd/gpq/command/command_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package command_test

import (
"io"
"os"
"testing"

"github.com/stretchr/testify/suite"
)

type Suite struct {
suite.Suite
originalStdin *os.File
mockStdin *os.File
originalStdout *os.File
mockStdout *os.File
}

func (s *Suite) SetupTest() {
stdin, err := os.CreateTemp("", "stdin")
s.Require().NoError(err)
s.originalStdin = os.Stdin
s.mockStdin = stdin
os.Stdin = stdin

stdout, err := os.CreateTemp("", "stdout")
s.Require().NoError(err)
s.originalStdout = os.Stdout
s.mockStdout = stdout
os.Stdout = stdout
}

func (s *Suite) writeStdin(data []byte) {
_, writeErr := s.mockStdin.Write(data)
s.Require().NoError(writeErr)
_, seekErr := s.mockStdin.Seek(0, 0)
s.Require().NoError(seekErr)
}

func (s *Suite) readStdout() []byte {
if _, seekErr := s.mockStdout.Seek(0, 0); seekErr != nil {
// assume the file is closed
stdout, err := os.Open(s.mockStdout.Name())
s.Require().NoError(err)
s.mockStdout = stdout
}
data, err := io.ReadAll(s.mockStdout)
s.Require().NoError(err)
return data
}

func (s *Suite) TearDownTest() {
os.Stdout = s.originalStdout
os.Stdin = s.originalStdin

_ = s.mockStdin.Close()
s.NoError(os.Remove(s.mockStdin.Name()))

_ = s.mockStdout.Close()
s.NoError(os.Remove(s.mockStdout.Name()))
}

func TestSuite(t *testing.T) {
suite.Run(t, &Suite{})
}
73 changes: 58 additions & 15 deletions cmd/gpq/convert.go → cmd/gpq/command/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package main
package command

import (
"bytes"
"errors"
"fmt"
"io"
"os"
"strings"

Expand All @@ -25,9 +27,9 @@ import (
)

type ConvertCmd struct {
Input string `arg:"" name:"input" help:"Input file." type:"existingfile"`
Input string `arg:"" optional:"" name:"input" help:"Input file. If not provided, input is read from stdin." type:"path"`
From string `help:"Input file format. Possible values: ${enum}." enum:"auto, geojson, geoparquet, parquet" default:"auto"`
Output string `arg:"" name:"output" help:"Output file." type:"path"`
Output string `arg:"" optional:"" name:"output" help:"Output file. If not provided, output is written to stdout." type:"path"`
To string `help:"Output file format. Possible values: ${enum}." enum:"auto, geojson, geoparquet" default:"auto"`
Min int `help:"Minimum number of features to consider when building a schema." default:"10"`
Max int `help:"Maximum number of features to consider when building a schema." default:"100"`
Expand All @@ -53,6 +55,9 @@ var validTypes = map[FormatType]bool{
}

func parseFormatType(format string) FormatType {
if format == "" {
return AutoType
}
ft := FormatType(strings.ToLower(format))
if !validTypes[ft] {
return UnknownType
Expand All @@ -73,34 +78,72 @@ func getFormatType(filename string) FormatType {
return UnknownType
}

func hasStdin() bool {
stats, err := os.Stdin.Stat()
if err != nil {
return false
}
return stats.Size() > 0
}

func (c *ConvertCmd) Run() error {
inputSource := c.Input
outputSource := c.Output

if outputSource == "" && hasStdin() {
outputSource = inputSource
inputSource = ""
}

outputFormat := parseFormatType(c.To)
if outputFormat == AutoType {
outputFormat = getFormatType(c.Output)
if outputSource == "" {
return fmt.Errorf("when writing to stdout, the --to option must be provided to determine the output format")
}
outputFormat = getFormatType(outputSource)
}
if outputFormat == UnknownType {
return fmt.Errorf("could not determine output format for %s", c.Output)
return fmt.Errorf("could not determine output format for %s", outputSource)
}

inputFormat := parseFormatType(c.From)
if inputFormat == AutoType {
inputFormat = getFormatType(c.Input)
if inputSource == "" {
return fmt.Errorf("when reading from stdin, the --from option must be provided to determine the input format")
}
inputFormat = getFormatType(inputSource)
}
if inputFormat == UnknownType {
return fmt.Errorf("could not determine input format for %s", c.Input)
return fmt.Errorf("could not determine input format for %s", inputSource)
}

input, readErr := os.Open(c.Input)
if readErr != nil {
return fmt.Errorf("failed to read from %q: %w", c.Input, readErr)
var input ReaderAtSeeker
if inputSource == "" {
data, err := io.ReadAll(os.Stdin)
if err != nil {
return fmt.Errorf("trouble reading from stdin: %w", err)
}
input = bytes.NewReader(data)
} else {
i, readErr := os.Open(inputSource)
if readErr != nil {
return fmt.Errorf("failed to read from %q: %w", inputSource, readErr)
}
defer i.Close()
input = i
}
defer input.Close()

output, createErr := os.Create(c.Output)
if createErr != nil {
return fmt.Errorf("failed to open %q for writing: %w", c.Output, createErr)
var output *os.File
if outputSource == "" {
output = os.Stdout
} else {
o, createErr := os.Create(outputSource)
if createErr != nil {
return fmt.Errorf("failed to open %q for writing: %w", outputSource, createErr)
}
defer o.Close()
output = o
}
defer output.Close()

if inputFormat == GeoJSONType {
if outputFormat != ParquetType && outputFormat != GeoParquetType {
Expand Down
122 changes: 122 additions & 0 deletions cmd/gpq/command/convert_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package command_test

import (
"bytes"
"encoding/json"

"github.com/apache/arrow/go/v14/parquet/file"
"github.com/planetlabs/gpq/cmd/gpq/command"
"github.com/planetlabs/gpq/internal/geo"
"github.com/planetlabs/gpq/internal/test"
)

func (s *Suite) TestConvertGeoParquetToGeoJSONStdout() {
cmd := &command.ConvertCmd{
From: "auto",
Input: "../../../internal/testdata/cases/example-v1.0.0.parquet",
To: "geojson",
}

s.Require().NoError(cmd.Run())
data := s.readStdout()

collection := &geo.FeatureCollection{}
s.Require().NoError(json.Unmarshal(data, collection))
s.Len(collection.Features, 5)
}

func (s *Suite) TestConvertGeoJSONToGeoParquetStdout() {
cmd := &command.ConvertCmd{
From: "auto",
Input: "../../../internal/geojson/testdata/example.geojson",
To: "parquet",
}

s.Require().NoError(cmd.Run())
data := s.readStdout()

fileReader, err := file.NewParquetReader(bytes.NewReader(data))
s.Require().NoError(err)
defer fileReader.Close()

s.Equal(int64(5), fileReader.NumRows())
}

func (s *Suite) TestConvertGeoParquetToUnknownStdout() {
cmd := &command.ConvertCmd{
From: "auto",
Input: "../../../internal/testdata/cases/example-v1.0.0.parquet",
}

s.ErrorContains(cmd.Run(), "when writing to stdout, the --to option must be provided")
}

func (s *Suite) TestConvertGeoJSONStdinToGeoParquetStdout() {
s.writeStdin([]byte(`{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"name": "Null Island"
},
"geometry": {
"type": "Point",
"coordinates": [0, 0]
}
}
]
}`))

cmd := &command.ConvertCmd{
From: "geojson",
To: "geoparquet",
}

s.Require().NoError(cmd.Run())
data := s.readStdout()

fileReader, err := file.NewParquetReader(bytes.NewReader(data))
s.Require().NoError(err)
defer fileReader.Close()

s.Equal(int64(1), fileReader.NumRows())
}

func (s *Suite) TestConvertGeoParquetStdinToGeoJSONStdout() {
s.writeStdin(test.GeoParquetFromJSON(s.T(), `{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"name": "Null Island"
},
"geometry": {
"type": "Point",
"coordinates": [0, 0]
}
}
]
}`))

cmd := &command.ConvertCmd{
From: "geoparquet",
To: "geojson",
}

s.Require().NoError(cmd.Run())
data := s.readStdout()

collection := &geo.FeatureCollection{}
s.Require().NoError(json.Unmarshal(data, collection))
s.Len(collection.Features, 1)
}

func (s *Suite) TestConvertUnknownStdinToGeoParquetStdout() {
cmd := &command.ConvertCmd{
To: "geoparquet",
}

s.ErrorContains(cmd.Run(), "when reading from stdin, the --from option must be provided")
}
Loading

0 comments on commit b1b08b5

Please sign in to comment.