From 84e51a52364ed184e33969f6885fe15241014956 Mon Sep 17 00:00:00 2001 From: Ivan Martinez Date: Wed, 29 Apr 2020 09:42:12 -0400 Subject: [PATCH] Add prometheus metrics test into E2E (#5673) * Progress on metrics tests * Progress on metrics test * Get metrics E2E working * Merge branch 'master' of https://github.com/prysmaticlabs/prysm into e2e-metrics * Complete most of metrics tests * Change E2E polling to the middle of a slot, instead of at the start of the middle * Add metrics to all E2E * Remove extra types * Update endtoend/evaluators/metrics.go Co-Authored-By: Preston Van Loon * Merge branch 'master' into e2e-metrics * Add more comments, address feedback * Merge branch 'e2e-metrics' of https://github.com/prysmaticlabs/prysm into e2e-metrics * Fix build * Remove unneeded comment * Set E2E_EPOCHS back * Improve sync testing reliability * Remove metrics check from slashing * Improve time allotted to sync * Remove possibly flaky sync test change --- endtoend/endtoend_test.go | 40 ++--- endtoend/evaluators/BUILD.bazel | 2 + endtoend/evaluators/metrics.go | 206 +++++++++++++++++++++++ endtoend/evaluators/node.go | 24 +-- endtoend/long_minimal_e2e_test.go | 1 + endtoend/minimal_antiflake_e2e_1_test.go | 1 + endtoend/minimal_antiflake_e2e_2_test.go | 1 + endtoend/minimal_e2e_test.go | 1 + 8 files changed, 242 insertions(+), 34 deletions(-) create mode 100644 endtoend/evaluators/metrics.go diff --git a/endtoend/endtoend_test.go b/endtoend/endtoend_test.go index 5d4095461869..c32b0d443dca 100644 --- a/endtoend/endtoend_test.go +++ b/endtoend/endtoend_test.go @@ -51,6 +51,17 @@ func runEndToEndTest(t *testing.T, config *types.E2EConfig) { return } + if config.TestSlasher { + slasherPIDs := components.StartSlashers(t) + defer helpers.KillProcesses(t, slasherPIDs) + } + if config.TestDeposits { + valCount := int(params.BeaconConfig().MinGenesisActiveValidatorCount) / e2e.TestParams.BeaconNodeCount + valPid := components.StartNewValidatorClient(t, config, valCount, e2e.TestParams.BeaconNodeCount) + defer helpers.KillProcesses(t, []int{valPid}) + components.SendAndMineDeposits(t, keystorePath, valCount, int(params.BeaconConfig().MinGenesisActiveValidatorCount)) + } + conns := make([]*grpc.ClientConn, e2e.TestParams.BeaconNodeCount) for i := 0; i < len(conns); i++ { conn, err := grpc.Dial(fmt.Sprintf("127.0.0.1:%d", e2e.TestParams.BeaconNodeRPCPort+i), grpc.WithInsecure()) @@ -69,22 +80,14 @@ func runEndToEndTest(t *testing.T, config *types.E2EConfig) { if err != nil { t.Fatal(err) } - // Small offset so evaluators perform in the middle of an epoch. - epochSeconds := params.BeaconConfig().SecondsPerSlot * params.BeaconConfig().SlotsPerEpoch - genesisTime := time.Unix(genesis.GenesisTime.Seconds+int64(epochSeconds/2), 0) - if config.TestSlasher { - slasherPIDs := components.StartSlashers(t) - defer helpers.KillProcesses(t, slasherPIDs) - } - if config.TestDeposits { - valCount := int(params.BeaconConfig().MinGenesisActiveValidatorCount) / e2e.TestParams.BeaconNodeCount - valPid := components.StartNewValidatorClient(t, config, valCount, e2e.TestParams.BeaconNodeCount) - defer helpers.KillProcesses(t, []int{valPid}) - components.SendAndMineDeposits(t, keystorePath, valCount, int(params.BeaconConfig().MinGenesisActiveValidatorCount)) - } + epochSeconds := params.BeaconConfig().SecondsPerSlot * params.BeaconConfig().SlotsPerEpoch + // Adding a half slot here to ensure the requests are in the middle of an epoch. + middleOfEpoch := int64(epochSeconds/2 + (params.BeaconConfig().SecondsPerSlot / 2)) + // Offsetting the ticker from genesis so it ticks in the middle of an epoch, in order to keep results consistent. + tickingStartTime := time.Unix(genesis.GenesisTime.Seconds+middleOfEpoch, 0) - ticker := helpers.GetEpochTicker(genesisTime, epochSeconds) + ticker := helpers.GetEpochTicker(tickingStartTime, epochSeconds) for currentEpoch := range ticker.C() { for _, evaluator := range config.Evaluators { // Only run if the policy says so. @@ -119,11 +122,10 @@ func runEndToEndTest(t *testing.T, config *types.E2EConfig) { } conns = append(conns, syncConn) - // Sleep for a few epochs to give time for the newly started node to sync. - extraTimeToSync := (config.EpochsToRun+config.EpochsToRun/2)*epochSeconds + 60 - genesisTime.Add(time.Duration(extraTimeToSync) * time.Second) - // Wait until middle of epoch to request to prevent conflicts. - time.Sleep(time.Until(genesisTime)) + // Sleep a second for every 4 blocks that need to be synced for the newly started node. + extraSecondsToSync := (config.EpochsToRun)*epochSeconds + (params.BeaconConfig().SlotsPerEpoch / 4 * config.EpochsToRun) + waitForSync := tickingStartTime.Add(time.Duration(extraSecondsToSync) * time.Second) + time.Sleep(time.Until(waitForSync)) syncLogFile, err := os.Open(path.Join(e2e.TestParams.LogPath, fmt.Sprintf(e2e.BeaconNodeLogFileName, index))) if err != nil { diff --git a/endtoend/evaluators/BUILD.bazel b/endtoend/evaluators/BUILD.bazel index 62f0da95a50d..c5fa5ccafdbc 100644 --- a/endtoend/evaluators/BUILD.bazel +++ b/endtoend/evaluators/BUILD.bazel @@ -5,6 +5,7 @@ go_library( testonly = True, srcs = [ "finality.go", + "metrics.go", "node.go", "slashing.go", "validator.go", @@ -16,6 +17,7 @@ go_library( "//endtoend/params:go_default_library", "//endtoend/types:go_default_library", "//shared/bytesutil:go_default_library", + "//shared/p2putils:go_default_library", "//shared/params:go_default_library", "//shared/sliceutil:go_default_library", "//shared/testutil:go_default_library", diff --git a/endtoend/evaluators/metrics.go b/endtoend/evaluators/metrics.go new file mode 100644 index 000000000000..07b1148d71ab --- /dev/null +++ b/endtoend/evaluators/metrics.go @@ -0,0 +1,206 @@ +package evaluators + +import ( + "context" + "fmt" + "io/ioutil" + "net/http" + "strconv" + "strings" + "time" + + ptypes "github.com/gogo/protobuf/types" + "github.com/pkg/errors" + eth "github.com/prysmaticlabs/ethereumapis/eth/v1alpha1" + e2e "github.com/prysmaticlabs/prysm/endtoend/params" + "github.com/prysmaticlabs/prysm/endtoend/types" + "github.com/prysmaticlabs/prysm/shared/p2putils" + "google.golang.org/grpc" +) + +// MetricsCheck performs a check on metrics to make sure caches are functioning, and +// overall health is good. Not checking the first epoch so the sample size isn't too small. +var MetricsCheck = types.Evaluator{ + Name: "metrics_check_epoch_%d", + Policy: afterNthEpoch(0), + Evaluation: metricsTest, +} + +type equalityTest struct { + name string + topic string + value int +} + +type comparisonTest struct { + name string + topic1 string + topic2 string + expectedComparison float64 +} + +var metricLessThanTests = []equalityTest{ + { + name: "memory usage", + topic: "go_memstats_alloc_bytes", + value: 100000000, // 100 Mb + }, +} + +var metricComparisonTests = []comparisonTest{ + { + name: "beacon aggregate and proof", + topic1: "p2p_message_failed_validation_total{topic=\"/eth2/%x/beacon_aggregate_and_proof/ssz_snappy\"}", + topic2: "p2p_message_received_total{topic=\"/eth2/%x/beacon_aggregate_and_proof/ssz_snappy\"}", + expectedComparison: 0.8, + }, + { + name: "committee index 0 beacon attestation", + topic1: "p2p_message_failed_validation_total{topic=\"/eth2/%x/committee_index0_beacon_attestation/ssz_snappy\"}", + topic2: "p2p_message_received_total{topic=\"/eth2/%x/committee_index0_beacon_attestation/ssz_snappy\"}", + expectedComparison: 0.1, + }, + { + name: "committee index 1 beacon attestation", + topic1: "p2p_message_failed_validation_total{topic=\"/eth2/%x/committee_index1_beacon_attestation/ssz_snappy\"}", + topic2: "p2p_message_received_total{topic=\"/eth2/%x/committee_index1_beacon_attestation/ssz_snappy\"}", + expectedComparison: 0.1, + }, + { + name: "committee cache", + topic1: "committee_cache_miss", + topic2: "committee_cache_hit", + expectedComparison: 0.01, + }, + { + name: "hot state cache", + topic1: "hot_state_cache_miss", + topic2: "hot_state_cache_hit", + expectedComparison: 0.01, + }, +} + +func metricsTest(conns ...*grpc.ClientConn) error { + for i := 0; i < len(conns); i++ { + response, err := http.Get(fmt.Sprintf("http://localhost:%d/metrics", e2e.TestParams.BeaconNodeMetricsPort+i)) + if err != nil { + return errors.Wrap(err, "failed to reach prometheus metrics page") + } + dataInBytes, err := ioutil.ReadAll(response.Body) + if err != nil { + return err + } + pageContent := string(dataInBytes) + if err := response.Body.Close(); err != nil { + return err + } + + genesis, err := eth.NewNodeClient(conns[i]).GetGenesis(context.Background(), &ptypes.Empty{}) + if err != nil { + return err + } + forkDigest, err := p2putils.CreateForkDigest(time.Unix(genesis.GenesisTime.Seconds, 0), genesis.GenesisValidatorsRoot) + if err != nil { + return err + } + + chainHead, err := eth.NewBeaconChainClient(conns[i]).GetChainHead(context.Background(), &ptypes.Empty{}) + if err != nil { + return err + } + timeSlot, err := getValueOfTopic(pageContent, "beacon_clock_time_slot") + if err != nil { + return err + } + if chainHead.HeadSlot != uint64(timeSlot) { + return fmt.Errorf("expected metrics slot to equal chain head slot, expected %d, received %d", chainHead.HeadSlot, timeSlot) + } + + for _, test := range metricLessThanTests { + topic := test.topic + if strings.Contains(topic, "%x") { + topic = fmt.Sprintf(topic, forkDigest) + } + if err := metricCheckLessThan(pageContent, topic, test.value); err != nil { + return errors.Wrapf(err, "failed %s check", test.name) + } + } + for _, test := range metricComparisonTests { + topic1 := test.topic1 + if strings.Contains(topic1, "%x") { + topic1 = fmt.Sprintf(topic1, forkDigest) + } + topic2 := test.topic2 + if strings.Contains(topic2, "%x") { + topic2 = fmt.Sprintf(topic2, forkDigest) + } + if err := metricCheckComparison(pageContent, topic1, topic2, test.expectedComparison); err != nil { + return err + } + } + } + return nil +} + +func metricCheckLessThan(pageContent string, topic string, value int) error { + topicValue, err := getValueOfTopic(pageContent, topic) + if err != nil { + return err + } + if topicValue >= value { + return fmt.Errorf( + "unexpected result for metric %s, expected less than %d, received %d", + topic, + value, + topicValue, + ) + } + return nil +} + +func metricCheckComparison(pageContent string, topic1 string, topic2 string, comparison float64) error { + topic2Value, err := getValueOfTopic(pageContent, topic2) + if err != nil { + return err + } + topic1Value, err := getValueOfTopic(pageContent, topic1) + // If we can't find the first topic (error metrics), then assume the test passes. + if topic1Value == -1 && topic2Value != -1 { + return nil + } + if err != nil { + return err + } + topicComparison := float64(topic1Value) / float64(topic2Value) + if topicComparison >= comparison { + return fmt.Errorf( + "unexpected result for comparison between metric %s and metric %s, expected comparison to be %.2f, received %.2f", + topic1, + topic2, + comparison, + topicComparison, + ) + } + return nil +} + +func getValueOfTopic(pageContent string, topic string) (int, error) { + // Adding a space to search exactly. + startIdx := strings.LastIndex(pageContent, topic+" ") + if startIdx == -1 { + return -1, fmt.Errorf("did not find requested text %s in %s", topic, pageContent) + } + endOfTopic := startIdx + len(topic) + // Adding 1 to skip the space after the topic name. + startOfValue := endOfTopic + 1 + endOfValue := strings.Index(pageContent[startOfValue:], "\n") + if endOfValue == -1 { + return -1, fmt.Errorf("could not find next space in %s", pageContent[startOfValue:]) + } + metricValue := pageContent[startOfValue : startOfValue+endOfValue] + floatResult, err := strconv.ParseFloat(metricValue, 64) + if err != nil { + return -1, errors.Wrapf(err, "could not parse %s for int", metricValue) + } + return int(floatResult), nil +} diff --git a/endtoend/evaluators/node.go b/endtoend/evaluators/node.go index f273333b2c44..f5cceeff35c9 100644 --- a/endtoend/evaluators/node.go +++ b/endtoend/evaluators/node.go @@ -138,43 +138,37 @@ func allNodesHaveSameHead(conns ...*grpc.ClientConn) error { } } - for i, epoch := range headEpochs { - if headEpochs[0] != epoch { + for i := 0; i < len(conns); i++ { + if headEpochs[0] != headEpochs[i] { return fmt.Errorf( "received conflicting head epochs on node %d, expected %d, received %d", i, headEpochs[0], - epoch, + headEpochs[i], ) } - } - for i, root := range justifiedRoots { - if !bytes.Equal(justifiedRoots[0], root) { + if !bytes.Equal(justifiedRoots[0], justifiedRoots[i]) { return fmt.Errorf( "received conflicting justified block roots on node %d, expected %#x, received %#x", i, justifiedRoots[0], - root, + justifiedRoots[i], ) } - } - for i, root := range prevJustifiedRoots { - if !bytes.Equal(prevJustifiedRoots[0], root) { + if !bytes.Equal(prevJustifiedRoots[0], prevJustifiedRoots[i]) { return fmt.Errorf( "received conflicting previous justified block roots on node %d, expected %#x, received %#x", i, prevJustifiedRoots[0], - root, + prevJustifiedRoots[i], ) } - } - for i, root := range finalizedRoots { - if !bytes.Equal(finalizedRoots[0], root) { + if !bytes.Equal(finalizedRoots[0], finalizedRoots[i]) { return fmt.Errorf( "received conflicting finalized epoch roots on node %d, expected %#x, received %#x", i, finalizedRoots[0], - root, + finalizedRoots[i], ) } } diff --git a/endtoend/long_minimal_e2e_test.go b/endtoend/long_minimal_e2e_test.go index 42a9e190e4d7..f940fb829d75 100644 --- a/endtoend/long_minimal_e2e_test.go +++ b/endtoend/long_minimal_e2e_test.go @@ -41,6 +41,7 @@ func TestEndToEnd_Long_MinimalConfig(t *testing.T) { ev.ValidatorsAreActive, ev.ValidatorsParticipating, ev.FinalizationOccurs, + ev.MetricsCheck, ev.ProcessesDepositedValidators, ev.DepositedValidatorsAreActive, }, diff --git a/endtoend/minimal_antiflake_e2e_1_test.go b/endtoend/minimal_antiflake_e2e_1_test.go index e5bad3b823a9..daf0e894de66 100644 --- a/endtoend/minimal_antiflake_e2e_1_test.go +++ b/endtoend/minimal_antiflake_e2e_1_test.go @@ -25,6 +25,7 @@ func TestEndToEnd_AntiFlake_MinimalConfig_1(t *testing.T) { ev.HealthzCheck, ev.ValidatorsAreActive, ev.ValidatorsParticipating, + ev.MetricsCheck, }, } if err := e2eParams.Init(4); err != nil { diff --git a/endtoend/minimal_antiflake_e2e_2_test.go b/endtoend/minimal_antiflake_e2e_2_test.go index c23ee2e9a992..cf29808ab262 100644 --- a/endtoend/minimal_antiflake_e2e_2_test.go +++ b/endtoend/minimal_antiflake_e2e_2_test.go @@ -25,6 +25,7 @@ func TestEndToEnd_AntiFlake_MinimalConfig_2(t *testing.T) { ev.HealthzCheck, ev.ValidatorsAreActive, ev.ValidatorsParticipating, + ev.MetricsCheck, }, } if err := e2eParams.Init(4); err != nil { diff --git a/endtoend/minimal_e2e_test.go b/endtoend/minimal_e2e_test.go index 663a9559ed18..d85216e240b7 100644 --- a/endtoend/minimal_e2e_test.go +++ b/endtoend/minimal_e2e_test.go @@ -26,6 +26,7 @@ func TestEndToEnd_MinimalConfig(t *testing.T) { ev.ValidatorsAreActive, ev.ValidatorsParticipating, ev.FinalizationOccurs, + ev.MetricsCheck, }, } if err := e2eParams.Init(4); err != nil {