go-gitea · filipnavara · Apr 21, 2019 · Apr 21, 2019 · filipnavara · Apr 21, 2019
diff --git a/models/repo.go b/models/repo.go
@@ -2240,6 +2240,14 @@ func GitFsck() {
 			func(idx int, bean interface{}) error {
 				repo := bean.(*Repository)
 				repoPath := repo.RepoPath()
+				// TODO: Move this elsewhere
+				if gitRepo, err := git.OpenRepository(repoPath); err == nil {
+					log.Trace("Building commit graph index")
+					if err := gitRepo.BuildCommitGraph(false); err != nil {
+						desc := fmt.Sprintf("Failed to build commit graph (%s): %v", repoPath, err)
+						log.Warn(desc)
+					}
+				}
 				log.Trace("Running health check on repository %s", repoPath)
 				if err := git.Fsck(repoPath, setting.Cron.RepoHealthCheck.Timeout, setting.Cron.RepoHealthCheck.Args...); err != nil {
 					desc := fmt.Sprintf("Failed to health check repository (%s): %v", repoPath, err)

diff --git a/modules/commitgraph/plumbing/format/commitgraph/bloom.go b/modules/commitgraph/plumbing/format/commitgraph/bloom.go
@@ -0,0 +1,92 @@
+package commitgraph
+
+import (
+	"encoding/binary"
+	"hash"
+	"hash/fnv"
+
+	"github.com/dchest/siphash"
+)
+
+type filter struct {
+	m uint32
+	k uint32
+	h hash.Hash64
+}
+
+func (f *filter) bits(data []byte) []uint32 {
+	f.h.Reset()
+	f.h.Write(data)
+	d := f.h.Sum(nil)
+	a := binary.BigEndian.Uint32(d[4:8])
+	b := binary.BigEndian.Uint32(d[0:4])
+	is := make([]uint32, f.k)
+	for i := uint32(0); i < f.k; i++ {
+		is[i] = (a + b*i) % f.m
+	}
+	return is
+}
+
+func newFilter(m, k uint32) *filter {
+	return &filter{
+		m: m,
+		k: k,
+		h: fnv.New64(),
+	}
+}
+
+// BloomPathFilter is a probabilistic data structure that helps determining
+// whether a path was was changed.
+//
+// The implementation uses a standard bloom filter with n=512, m=10, k=7
+// parameters using the 64-bit SipHash hash function with zero key.
+type BloomPathFilter struct {
+	b []byte
+}
+
+// Test checks whether a path was previously added to the filter. Returns
+// false if the path is not present in the filter. Returns true if the path
+// could be present in the filter.
+func (f *BloomPathFilter) Test(path string) bool {
+	d := siphash.Hash(0, 0, []byte(path))
+	a := uint32(d)
+	b := uint32(d >> 32)
+	var i uint32
+	for i = 0; i < 7; i++ {
+		bit := (a + b*i) % 5120
+		if f.b[bit>>3]&(1<<(bit&7)) == 0 {
+			return false
+		}
+	}
+	return true
+}
+
+// Add path data to the filter.
+func (f *BloomPathFilter) Add(path string) {
+	d := siphash.Hash(0, 0, []byte(path))
+	a := uint32(d)
+	b := uint32(d >> 32)
+	var i uint32
+	for i = 0; i < 7; i++ {
+		bit := (a + b*i) % 5120
+		f.b[bit>>3] |= 1 << (bit & 7)
+	}
+}
+
+// Data returns data bytes
+func (f *BloomPathFilter) Data() []byte {
+	return f.b
+}
+
+// NewBloomPathFilter creates a new empty bloom filter
+func NewBloomPathFilter() *BloomPathFilter {
+	f := &BloomPathFilter{make([]byte, 640)}
+	return f
+}
+
+// LoadBloomPathFilter creates a bloom filter from a byte array previously
+// returned by Data
+func LoadBloomPathFilter(data []byte) *BloomPathFilter {
+	f := &BloomPathFilter{data}
+	return f
+}
diff --git a/modules/commitgraph/plumbing/format/commitgraph/commitgraph.go b/modules/commitgraph/plumbing/format/commitgraph/commitgraph.go
@@ -0,0 +1,38 @@
+package commitgraph
+
+import (
+	"time"
+
+	"gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+// Node is a reduced representation of Commit as presented in the commit graph
+// file. It is merely useful as an optimization for walking the commit graphs.
+type Node struct {
+	// TreeHash is the hash of the root tree of the commit.
+	TreeHash plumbing.Hash
+	// ParentIndexes are the indexes of the parent commits of the commit.
+	ParentIndexes []int
+	// ParentHashes are the hashes of the parent commits of the commit.
+	ParentHashes []plumbing.Hash
+	// Generation number is the pre-computed generation in the commit graph
+	// or zero if not available
+	Generation int
+	// When is the timestamp of the commit.
+	When time.Time
+}
+
+// Index represents a representation of commit graph that allows indexed
+// access to the nodes using commit object hash
+type Index interface {
+	// GetIndexByHash gets the index in the commit graph from commit hash, if available
+	GetIndexByHash(h plumbing.Hash) (int, error)
+	// GetNodeByIndex gets the commit node from the commit graph using index
+	// obtained from child node, if available
+	GetNodeByIndex(i int) (*Node, error)
+	// Hashes returns all the hashes that are available in the index
+	Hashes() []plumbing.Hash
+
+	// GetBloomFilterByIndex gets the bloom filter for files changed in the commit, if available
+	GetBloomFilterByIndex(i int) (*BloomPathFilter, error)
+}
diff --git a/modules/commitgraph/plumbing/format/commitgraph/commitgraph_test.go b/modules/commitgraph/plumbing/format/commitgraph/commitgraph_test.go
@@ -0,0 +1,35 @@
+package commitgraph_test
+
+import (
+	"testing"
+
+	"code.gitea.io/gitea/modules/commitgraph/plumbing/format/commitgraph"
+	"golang.org/x/exp/mmap"
+
+	. "gopkg.in/check.v1"
+	"gopkg.in/src-d/go-git-fixtures.v3"
+	"gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type CommitgraphSuite struct {
+	fixtures.Suite
+}
+
+var _ = Suite(&CommitgraphSuite{})
+
+func (s *CommitgraphSuite) TestDecode(c *C) {
+	reader, err := mmap.Open("..\\..\\tests\\testgit\\objects\\info\\commit-graph")
+	c.Assert(err, IsNil)
+	index, err := commitgraph.OpenFileIndex(reader)
+	c.Assert(err, IsNil)
+
+	nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("5aa811d3c2f6d5d6e928a4acacd15248928c26d0"))
+	c.Assert(err, IsNil)
+	node, err := index.GetNodeByIndex(nodeIndex)
+	c.Assert(err, IsNil)
+	c.Assert(len(node.ParentIndexes), Equals, 0)
+
+	reader.Close()
+}
diff --git a/modules/commitgraph/plumbing/format/commitgraph/encoder.go b/modules/commitgraph/plumbing/format/commitgraph/encoder.go
@@ -0,0 +1,197 @@
+package commitgraph
+
+import (
+	"bytes"
+	"crypto/sha1"
+	"hash"
+	"io"
+	"math"
+
+	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/utils/binary"
+)
+
+// Encoder writes MemoryIndex structs to an output stream.
+type Encoder struct {
+	io.Writer
+	hash hash.Hash
+}
+
+// NewEncoder returns a new stream encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+	h := sha1.New()
+	mw := io.MultiWriter(w, h)
+	return &Encoder{mw, h}
+}
+
+func (e *Encoder) Encode(idx Index) error {
+	// Get all the hashes in the memory index
+	hashes := idx.Hashes()
+
+	// Sort the hashes and build our index
+	plumbing.HashesSort(hashes)
+	hashToIndex := make(map[plumbing.Hash]uint32)
+	hashFirstToCount := make(map[byte]uint32)
+	for i, hash := range hashes {
+		hashToIndex[hash] = uint32(i)
+		hashFirstToCount[hash[0]]++
+	}
+
+	// Find out if we will need large edge table
+	chunkCount := 3
+	hasLargeEdges := false
+	for i := 0; i < len(hashes); i++ {
+		v, _ := idx.GetNodeByIndex(i)
+		if len(v.ParentHashes) > 2 {
+			hasLargeEdges = true
+			chunkCount++
+			break
+		}
+	}
+
+	// Find out if the bloom filters are present
+	hasBloomFilters := false
+	sparseBloomFilters := false
+	bloomFiltersCount := 0
+	for i := 0; i < len(hashes); i++ {
+		_, err := idx.GetBloomFilterByIndex(i)
+		if err == nil {
+			bloomFiltersCount++
+		}
+	}
+	if bloomFiltersCount > 0 {
+		hasBloomFilters = true
+		chunkCount++
+		if bloomFiltersCount < (len(hashes) * 4 / 3) {
+			sparseBloomFilters = true
+			chunkCount++
+		}
+	}
+
+	var fanoutOffset = uint64(20 + (chunkCount * 12))
+	var oidLookupOffset = fanoutOffset + 4*256
+	var commitDataOffset = oidLookupOffset + uint64(len(hashes))*20
+	var bloomOffset = commitDataOffset + uint64(len(hashes))*36
+	var sparseBloomOffset = bloomOffset + uint64(bloomFiltersCount)*640
+	var largeEdgeListOffset uint64
+	var largeEdges []uint32
+
+	// Write header
+	// TODO: Error handling
+	e.Write(commitFileSignature)
+	e.Write([]byte{1, 1, byte(chunkCount), 0})
+
+	// Write chunk headers
+	e.Write(oidFanoutSignature)
+	binary.WriteUint64(e, fanoutOffset)
+	e.Write(oidLookupSignature)
+	binary.WriteUint64(e, oidLookupOffset)
+	e.Write(commitDataSignature)
+	binary.WriteUint64(e, commitDataOffset)
+	if hasBloomFilters {
+		e.Write(experimentalBloomSignature)
+		binary.WriteUint64(e, bloomOffset)
+		if sparseBloomFilters {
+			e.Write(experimentalSparseBloomSignature)
+			binary.WriteUint64(e, sparseBloomOffset)
+			largeEdgeListOffset = sparseBloomOffset + uint64(len(hashes)+7)/8
+		} else {
+			largeEdgeListOffset = bloomOffset + 640*uint64(len(hashes))
+		}
+	}
+	if hasLargeEdges {
+		e.Write(largeEdgeListSignature)
+		binary.WriteUint64(e, largeEdgeListOffset)
+	}
+	e.Write([]byte{0, 0, 0, 0})
+	binary.WriteUint64(e, uint64(0))
+
+	// Write fanout
+	var cumulative uint32
+	for i := 0; i <= 0xff; i++ {
+		if err := binary.WriteUint32(e, hashFirstToCount[byte(i)]+cumulative); err != nil {
+			return err
+		}
+		cumulative += hashFirstToCount[byte(i)]
+	}
+
+	// Write OID lookup
+	for _, hash := range hashes {
+		if _, err := e.Write(hash[:]); err != nil {
+			return err
+		}
+	}
+
+	// Write commit data
+	for _, hash := range hashes {
+		origIndex, _ := idx.GetIndexByHash(hash)
+		commitData, _ := idx.GetNodeByIndex(origIndex)
+		if _, err := e.Write(commitData.TreeHash[:]); err != nil {
+			return err
+		}
+
+		if len(commitData.ParentHashes) == 0 {
+			binary.WriteUint32(e, parentNone)
+			binary.WriteUint32(e, parentNone)
+		} else if len(commitData.ParentHashes) == 1 {
+			binary.WriteUint32(e, hashToIndex[commitData.ParentHashes[0]])
+			binary.WriteUint32(e, parentNone)
+		} else if len(commitData.ParentHashes) == 2 {
+			binary.WriteUint32(e, hashToIndex[commitData.ParentHashes[0]])
+			binary.WriteUint32(e, hashToIndex[commitData.ParentHashes[1]])
+		} else if len(commitData.ParentHashes) > 2 {
+			binary.WriteUint32(e, hashToIndex[commitData.ParentHashes[0]])
+			binary.WriteUint32(e, uint32(len(largeEdges))|parentOctopusMask)
+			for _, parentHash := range commitData.ParentHashes[1:] {
+				largeEdges = append(largeEdges, hashToIndex[parentHash])
+			}
+			largeEdges[len(largeEdges)-1] |= parentLast
+		}
+
+		unixTime := uint64(commitData.When.Unix())
+		unixTime |= uint64(commitData.Generation) << 34
+		binary.WriteUint64(e, unixTime)
+	}
+
+	// Write bloom filters (experimental)
+	if hasBloomFilters {
+		var sparseBloomBitset []byte
+
+		if sparseBloomFilters {
+			sparseBloomBitset = bytes.Repeat([]byte{0xff}, (len(hashes)+7)/8)
+		}
+
+		for i, hash := range hashes {
+			origIndex, _ := idx.GetIndexByHash(hash)
+			if bloomFilter, err := idx.GetBloomFilterByIndex(origIndex); err != nil {
+				if !sparseBloomFilters {
+					for i := 0; i < 80; i++ {
+						binary.WriteUint64(e, math.MaxUint64)
+					}
+				} else {
+					sparseBloomBitset[i/8] &= ^(1 << uint(i%8))
+				}
+			} else {
+				e.Write(bloomFilter.Data())
+			}
+		}
+
+		if sparseBloomFilters {
+			e.Write(sparseBloomBitset)
+		}
+	}
+
+	// Write large edges if necessary
+	if hasLargeEdges {
+		for _, parent := range largeEdges {
+			binary.WriteUint32(e, parent)
+		}
+	}
+
+	// Write checksum
+	if _, err := e.Write(e.hash.Sum(nil)[:20]); err != nil {
+		return err
+	}
+
+	return nil
+}