Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(turborepo): rust implementation of file hashing via git index #4967

Merged
merged 13 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions cli/internal/ffi/bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ struct Buffer recursive_copy(struct Buffer buffer);

struct Buffer verify_signature(struct Buffer buffer);

struct Buffer get_package_file_hashes_from_git_index(struct Buffer buffer);

struct Buffer transitive_closure(struct Buffer buf);

struct Buffer subgraph(struct Buffer buf);
Expand Down
23 changes: 23 additions & 0 deletions cli/internal/ffi/ffi.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,3 +338,26 @@ func VerifySignature(teamID []byte, hash string, artifactBody []byte, expectedTa

return resp.GetVerified(), nil
}

// GetPackageFileHashesFromGitIndex proxies to rust to use git to hash the files in a package.
// It does not support additional files, it just hashes the non-ignored files in the package.
func GetPackageFileHashesFromGitIndex(rootPath string, packagePath string) (map[string]string, error) {
req := ffi_proto.GetPackageFileHashesFromGitIndexRequest{
TurboRoot: rootPath,
PackagePath: packagePath,
}
reqBuf := Marshal(&req)
resBuf := C.get_package_file_hashes_from_git_index(reqBuf)
reqBuf.Free()

resp := ffi_proto.GetPackageFileHashesFromGitIndexResponse{}
if err := Unmarshal(resBuf, resp.ProtoReflect().Interface()); err != nil {
panic(err)
}

if err := resp.GetError(); err != "" {
return nil, errors.New(err)
}
hashes := resp.GetHashes()
return hashes.GetHashes(), nil
}
350 changes: 301 additions & 49 deletions cli/internal/ffi/proto/messages.pb.go

Large diffs are not rendered by default.

100 changes: 0 additions & 100 deletions cli/internal/hashing/package_deps_hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,6 @@ type PackageDepsOptions struct {
InputPatterns []string
}

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
var result map[turbopath.AnchoredUnixPath]string
absolutePackagePath := packagePath.RestoreAnchor(rootPath)

// Get the state of the git index.
gitLsTreeOutput, err := gitLsTree(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err)
}
result = gitLsTreeOutput

// Update the with the state of the working directory.
// The paths returned from this call are anchored at the package directory
gitStatusOutput, err := gitStatus(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("Could not get git hashes from git status: %v", err)
}

// Review status output to identify the delta.
var filesToHash []turbopath.AnchoredSystemPath
for filePath, status := range gitStatusOutput {
if status.isDelete() {
delete(result, filePath)
} else {
filesToHash = append(filesToHash, filePath.ToSystemPath())
}
}

// Get the hashes for any modified files in the working directory.
hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash)
if err != nil {
return nil, err
}

// Zip up file paths and hashes together
for filePath, hash := range hashes {
result[filePath] = hash
}

return result, nil
}

func safeCompileIgnoreFile(filepath turbopath.AbsoluteSystemPath) (*gitignore.GitIgnore, error) {
if filepath.FileExists() {
return gitignore.CompileIgnoreFile(filepath.ToString())
Expand Down Expand Up @@ -508,61 +466,3 @@ type statusCode struct {
func (s statusCode) isDelete() bool {
return s.x == "D" || s.y == "D"
}

// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should
// be done with files that do not currently match what is in the index.
//
// Note: `git status -z`'s relative path results are relative to the repository's location.
// We need to calculate where the repository's location is in order to determine what the full path is
// before we can return those paths relative to the calling directory, normalizing to the behavior of
// `ls-files` and `ls-tree`.
func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) {
cmd := exec.Command(
"git", // Using `git` from $PATH,
"status", // tell me about the status of the working tree,
"--untracked-files", // including information about untracked files,
"--no-renames", // do not detect renames,
"-z", // with each file path relative to the repository root and \000-terminated,
"--", // and any additional argument you see is a path, promise.
)
cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree.
cmd.Dir = rootPath.ToString() // Include files only from this directory.

entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader)
if err != nil {
return nil, err
}

output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries))
convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString())

traversePath, err := memoizedGetTraversePath(convertedRootPath)
if err != nil {
return nil, err
}

for _, entry := range entries {
statusEntry := gitoutput.StatusEntry(entry)
// Anchored at repository.
pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path))
var outputPath turbopath.AnchoredUnixPath

if len(traversePath) > 0 {
repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath())
fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath)

relativePath, err := fileFullPath.RelativeTo(convertedRootPath)
if err != nil {
return nil, err
}

outputPath = relativePath.ToUnixPath()
} else {
outputPath = pathFromStatus
}

output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)}
}

return output, nil
}
112 changes: 112 additions & 0 deletions cli/internal/hashing/package_deps_hash_go.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
//go:build go || !rust
// +build go !rust

package hashing

import (
"fmt"
"os/exec"

"github.com/vercel/turbo/cli/internal/encoding/gitoutput"
"github.com/vercel/turbo/cli/internal/turbopath"
)

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
var result map[turbopath.AnchoredUnixPath]string
absolutePackagePath := packagePath.RestoreAnchor(rootPath)

// Get the state of the git index.
gitLsTreeOutput, err := gitLsTree(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err)
}
result = gitLsTreeOutput

// Update the with the state of the working directory.
// The paths returned from this call are anchored at the package directory
gitStatusOutput, err := gitStatus(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("Could not get git hashes from git status: %v", err)
}

// Review status output to identify the delta.
var filesToHash []turbopath.AnchoredSystemPath
for filePath, status := range gitStatusOutput {
if status.isDelete() {
delete(result, filePath)
} else {
filesToHash = append(filesToHash, filePath.ToSystemPath())
}
}

// Get the hashes for any modified files in the working directory.
hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash)
if err != nil {
return nil, err
}

// Zip up file paths and hashes together
for filePath, hash := range hashes {
result[filePath] = hash
}

return result, nil
}

// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should
// be done with files that do not currently match what is in the index.
//
// Note: `git status -z`'s relative path results are relative to the repository's location.
// We need to calculate where the repository's location is in order to determine what the full path is
// before we can return those paths relative to the calling directory, normalizing to the behavior of
// `ls-files` and `ls-tree`.
func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) {
cmd := exec.Command(
"git", // Using `git` from $PATH,
"status", // tell me about the status of the working tree,
"--untracked-files", // including information about untracked files,
"--no-renames", // do not detect renames,
"-z", // with each file path relative to the repository root and \000-terminated,
"--", // and any additional argument you see is a path, promise.
)
cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree.
cmd.Dir = rootPath.ToString() // Include files only from this directory.

entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader)
if err != nil {
return nil, err
}

output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries))
convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString())

traversePath, err := memoizedGetTraversePath(convertedRootPath)
if err != nil {
return nil, err
}

for _, entry := range entries {
statusEntry := gitoutput.StatusEntry(entry)
// Anchored at repository.
pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path))
var outputPath turbopath.AnchoredUnixPath

if len(traversePath) > 0 {
repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath())
fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath)

relativePath, err := fileFullPath.RelativeTo(convertedRootPath)
if err != nil {
return nil, err
}

outputPath = relativePath.ToUnixPath()
} else {
outputPath = pathFromStatus
}

output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)}
}

return output, nil
}
22 changes: 22 additions & 0 deletions cli/internal/hashing/package_deps_hash_rust.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//go:build rust
// +build rust

package hashing

import (
"github.com/vercel/turbo/cli/internal/ffi"
"github.com/vercel/turbo/cli/internal/turbopath"
)

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
rawHashes, err := ffi.GetPackageFileHashesFromGitIndex(rootPath.ToString(), packagePath.ToString())
if err != nil {
return nil, err
}

hashes := make(map[turbopath.AnchoredUnixPath]string, len(rawHashes))
for rawPath, hash := range rawHashes {
hashes[turbopath.AnchoredUnixPathFromUpstream(rawPath)] = hash
}
return hashes, nil
}
16 changes: 16 additions & 0 deletions crates/turborepo-ffi/messages.proto
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,19 @@ message VerifySignatureResponse {
string error = 2;
}
}

message GetPackageFileHashesFromGitIndexRequest {
string turbo_root = 1;
string package_path = 2;
}

message FileHashes {
map<string, string> hashes = 1;
}

message GetPackageFileHashesFromGitIndexResponse {
oneof response {
FileHashes hashes = 1;
string error = 2;
}
}
Loading