diff --git a/plumbing/format/commitgraph/commitgraph.go b/plumbing/format/commitgraph/commitgraph.go new file mode 100644 index 000000000..9bf714904 --- /dev/null +++ b/plumbing/format/commitgraph/commitgraph.go @@ -0,0 +1,35 @@ +package commitgraph + +import ( + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// Node is a reduced representation of Commit as presented in the commit graph +// file. It is merely useful as an optimization for walking the commit graphs. +type Node struct { + // TreeHash is the hash of the root tree of the commit. + TreeHash plumbing.Hash + // ParentIndexes are the indexes of the parent commits of the commit. + ParentIndexes []int + // ParentHashes are the hashes of the parent commits of the commit. + ParentHashes []plumbing.Hash + // Generation number is the pre-computed generation in the commit graph + // or zero if not available + Generation int + // When is the timestamp of the commit. + When time.Time +} + +// Index represents a representation of commit graph that allows indexed +// access to the nodes using commit object hash +type Index interface { + // GetIndexByHash gets the index in the commit graph from commit hash, if available + GetIndexByHash(h plumbing.Hash) (int, error) + // GetNodeByIndex gets the commit node from the commit graph using index + // obtained from child node, if available + GetNodeByIndex(i int) (*Node, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash +} diff --git a/plumbing/format/commitgraph/commitgraph_test.go b/plumbing/format/commitgraph/commitgraph_test.go new file mode 100644 index 000000000..b98414251 --- /dev/null +++ b/plumbing/format/commitgraph/commitgraph_test.go @@ -0,0 +1,135 @@ +package commitgraph_test + +import ( + "io/ioutil" + "os" + "path" + "testing" + + "golang.org/x/exp/mmap" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" +) + +func Test(t *testing.T) { TestingT(t) } + +type CommitgraphSuite struct { + fixtures.Suite +} + +var _ = Suite(&CommitgraphSuite{}) + +func testDecodeHelper(c *C, path string) { + reader, err := mmap.Open(path) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + + // Root commit + nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe")) + c.Assert(err, IsNil) + node, err := index.GetNodeByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(node.ParentIndexes), Equals, 0) + c.Assert(len(node.ParentHashes), Equals, 0) + + // Regular commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643")) + c.Assert(err, IsNil) + node, err = index.GetNodeByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(node.ParentIndexes), Equals, 1) + c.Assert(len(node.ParentHashes), Equals, 1) + c.Assert(node.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe") + + // Merge commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d")) + c.Assert(err, IsNil) + node, err = index.GetNodeByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(node.ParentIndexes), Equals, 2) + c.Assert(len(node.ParentHashes), Equals, 2) + c.Assert(node.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643") + c.Assert(node.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981") + + // Octopus merge commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + node, err = index.GetNodeByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(node.ParentIndexes), Equals, 3) + c.Assert(len(node.ParentHashes), Equals, 3) + c.Assert(node.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c") + c.Assert(node.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1") + c.Assert(node.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082") + + // Check all hashes + hashes := index.Hashes() + c.Assert(len(hashes), Equals, 11) + c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981") + c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643") +} + +func (s *CommitgraphSuite) TestDecode(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + testDecodeHelper(c, path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + }) +} + +func (s *CommitgraphSuite) TestReencode(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + + reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + + writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph") + c.Assert(err, IsNil) + tmpName := writer.Name() + defer os.Remove(tmpName) + encoder := commitgraph.NewEncoder(writer) + err = encoder.Encode(index) + c.Assert(err, IsNil) + writer.Close() + + testDecodeHelper(c, tmpName) + }) +} + +func (s *CommitgraphSuite) TestReencodeInMemory(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + + reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + c.Assert(err, IsNil) + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + memoryIndex := commitgraph.NewMemoryIndex() + for i, hash := range index.Hashes() { + node, err := index.GetNodeByIndex(i) + c.Assert(err, IsNil) + err = memoryIndex.Add(hash, node) + c.Assert(err, IsNil) + } + reader.Close() + + writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph") + c.Assert(err, IsNil) + tmpName := writer.Name() + defer os.Remove(tmpName) + encoder := commitgraph.NewEncoder(writer) + err = encoder.Encode(memoryIndex) + c.Assert(err, IsNil) + writer.Close() + + testDecodeHelper(c, tmpName) + }) +} diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go new file mode 100644 index 000000000..501b09e1c --- /dev/null +++ b/plumbing/format/commitgraph/encoder.go @@ -0,0 +1,189 @@ +package commitgraph + +import ( + "crypto/sha1" + "hash" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +func (e *Encoder) Encode(idx Index) error { + var err error + + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, largeEdgesCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} + if largeEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, largeEdgeListSignature) + chunkSizes = append(chunkSizes, uint64(largeEdgesCount)*4) + } + + if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err = e.encodeFanout(fanout); err != nil { + return err + } + if err = e.encodeOidLookup(hashes); err != nil { + return err + } + if largeEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { + if err = e.encodeLargeEdges(largeEdges); err != nil { + return err + } + } + if err != nil { + return err + } + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, largeEdgesCount uint32) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, 256) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i <= 0xff; i++ { + fanout[i] += fanout[i-1] + } + + // Find out if we will need large edge table + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetNodeByIndex(i) + if len(v.ParentHashes) > 2 { + largeEdgesCount += uint32(len(v.ParentHashes) - 1) + break + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + _, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(8 + len(chunkSignatures)*12 + 12) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(lastSignature); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (largeEdges []uint32, err error) { + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetNodeByIndex(origIndex) + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(largeEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + largeEdges = append(largeEdges, hashToIndex[parentHash]) + } + largeEdges[len(largeEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeLargeEdges(largeEdges []uint32) (err error) { + for _, parent := range largeEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:20]) + return err +} diff --git a/plumbing/format/commitgraph/file.go b/plumbing/format/commitgraph/file.go new file mode 100644 index 000000000..dce62436c --- /dev/null +++ b/plumbing/format/commitgraph/file.go @@ -0,0 +1,259 @@ +package commitgraph + +import ( + "bytes" + encbin "encoding/binary" + "errors" + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph + // file version is not supported. + ErrUnsupportedVersion = errors.New("Unsuported version") + // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph + // hash function is not supported. Currently only SHA-1 is defined and + // supported + ErrUnsupportedHash = errors.New("Unsuported hash algorithm") + // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit + // graph file is corrupted. + ErrMalformedCommitGraphFile = errors.New("Malformed commit graph file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + oidFanoutSignature = []byte{'O', 'I', 'D', 'F'} + oidLookupSignature = []byte{'O', 'I', 'D', 'L'} + commitDataSignature = []byte{'C', 'D', 'A', 'T'} + largeEdgeListSignature = []byte{'E', 'D', 'G', 'E'} + lastSignature = []byte{0, 0, 0, 0} + + parentNone = uint32(0x70000000) + parentOctopusUsed = uint32(0x80000000) + parentOctopusMask = uint32(0x7fffffff) + parentLast = uint32(0x80000000) +) + +type fileIndex struct { + reader io.ReaderAt + fanout [256]int + oidFanoutOffset int64 + oidLookupOffset int64 + commitDataOffset int64 + largeEdgeListOffset int64 +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader io.ReaderAt) (Index, error) { + fi := &fileIndex{reader: reader} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + return fi, nil +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + var signature = make([]byte, 4) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedCommitGraphFile + } + + // Read and verify the file header + var header = make([]byte, 4) + if _, err := fi.reader.ReadAt(header, 4); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + if header[1] != 1 { + return ErrUnsupportedHash + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + var chunkID = make([]byte, 4) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + if bytes.Equal(chunkID, oidFanoutSignature) { + fi.oidFanoutOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, oidLookupSignature) { + fi.oidLookupOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, commitDataSignature) { + fi.commitDataOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, largeEdgeListSignature) { + fi.largeEdgeListOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, lastSignature) { + break + } + } + + if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.commitDataOffset <= 0 { + return ErrMalformedCommitGraphFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedCommitGraphFile + } + fi.fanout[i] = int(fanoutValue) + } + return nil +} + +func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low int + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.oidLookupOffset + int64(mid)*20 + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid, nil + } else { + low = mid + 1 + } + } + + return 0, plumbing.ErrObjectNotFound +} + +func (fi *fileIndex) GetNodeByIndex(idx int) (*Node, error) { + if idx >= fi.fanout[0xff] { + return nil, plumbing.ErrObjectNotFound + } + + offset := fi.commitDataOffset + int64(idx)*36 + commitDataReader := io.NewSectionReader(fi.reader, offset, 36) + + treeHash, err := binary.ReadHash(commitDataReader) + if err != nil { + return nil, err + } + parent1, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + parent2, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + genAndTime, err := binary.ReadUint64(commitDataReader) + if err != nil { + return nil, err + } + + var parentIndexes []int + if parent2&parentOctopusUsed == parentOctopusUsed { + // Octopus merge + parentIndexes = []int{int(parent1 & parentOctopusMask)} + offset := fi.largeEdgeListOffset + 4*int64(parent2&parentOctopusMask) + buf := make([]byte, 4) + for { + _, err := fi.reader.ReadAt(buf, offset) + if err != nil { + return nil, err + } + + parent := encbin.BigEndian.Uint32(buf) + offset += 4 + parentIndexes = append(parentIndexes, int(parent&parentOctopusMask)) + if parent&parentLast == parentLast { + break + } + } + } else if parent2 != parentNone { + parentIndexes = []int{int(parent1 & parentOctopusMask), int(parent2 & parentOctopusMask)} + } else if parent1 != parentNone { + parentIndexes = []int{int(parent1 & parentOctopusMask)} + } + + parentHashes, err := fi.getHashesFromIndexes(parentIndexes) + if err != nil { + return nil, err + } + + return &Node{ + TreeHash: treeHash, + ParentIndexes: parentIndexes, + ParentHashes: parentHashes, + Generation: int(genAndTime >> 34), + When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), + }, nil +} + +func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error) { + hashes := make([]plumbing.Hash, len(indexes)) + + for i, idx := range indexes { + if idx >= fi.fanout[0xff] { + return nil, ErrMalformedCommitGraphFile + } + + offset := fi.oidLookupOffset + int64(idx)*20 + if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { + return nil, err + } + } + + return hashes, nil +} + +// Hashes returns all the hashes that are available in the index +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]) + for i := 0; i < int(fi.fanout[0xff]); i++ { + offset := fi.oidLookupOffset + int64(i)*20 + if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 { + return nil + } + } + return hashes +} diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go new file mode 100644 index 000000000..316bc6d57 --- /dev/null +++ b/plumbing/format/commitgraph/memory.go @@ -0,0 +1,71 @@ +package commitgraph + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) + +type MemoryIndex struct { + commitData []*Node + indexMap map[plumbing.Hash]int +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{ + indexMap: make(map[plumbing.Hash]int), + } +} + +// GetIndexByHash gets the index in the commit graph from commit hash, if available +func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (int, error) { + i, ok := mi.indexMap[h] + if ok { + return i, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetNodeByIndex gets the commit node from the commit graph using index +// obtained from child node, if available +func (mi *MemoryIndex) GetNodeByIndex(i int) (*Node, error) { + if int(i) >= len(mi.commitData) { + return nil, plumbing.ErrObjectNotFound + } + + node := mi.commitData[i] + + // Map parent hashes to parent indexes + if node.ParentIndexes == nil { + parentIndexes := make([]int, len(node.ParentHashes)) + for i, parentHash := range node.ParentHashes { + var err error + if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { + return nil, err + } + } + node.ParentIndexes = parentIndexes + } + + return node, nil +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) + for k := range mi.indexMap { + hashes = append(hashes, k) + } + return hashes +} + +// Add adds new node to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, node *Node) error { + // The parent indexes are calculated lazily in GetNodeByIndex + // which allows adding nodes out of order as long as all parents + // are eventually resolved + node.ParentIndexes = nil + mi.indexMap[hash] = len(mi.commitData) + mi.commitData = append(mi.commitData, node) + return nil +}