Skip to content

Commit

Permalink
Download a flare or logs when the Agent failed to start (#26736)
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorentClarret committed Jun 17, 2024
1 parent 7de29b3 commit 4e877e6
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 3 deletions.
102 changes: 99 additions & 3 deletions test/new-e2e/pkg/utils/e2e/client/agent_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package client

import (
"fmt"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/runner"
"regexp"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -40,7 +42,7 @@ func NewHostAgentClient(context e2e.Context, hostOutput remote.HostOutput, waitF
commandRunner := newAgentCommandRunner(context.T(), ae)

if params.ShouldWaitForReady {
if err := commandRunner.waitForReadyTimeout(agentReadyTimeout); err != nil {
if err := waitForReadyTimeout(context.T(), host, commandRunner, agentReadyTimeout); err != nil {
return nil, err
}
}
Expand All @@ -61,7 +63,7 @@ func NewHostAgentClientWithParams(context e2e.Context, hostOutput remote.HostOut
commandRunner := newAgentCommandRunner(context.T(), ae)

if params.ShouldWaitForReady {
if err := commandRunner.waitForReadyTimeout(agentReadyTimeout); err != nil {
if err := waitForReadyTimeout(context.T(), host, commandRunner, agentReadyTimeout); err != nil {
return nil, err
}
}
Expand Down Expand Up @@ -127,7 +129,7 @@ func ensureAuthToken(params *agentclientparams.Params, _ osComp.Family, host *Ho
if err != nil {
return fmt.Errorf("could not read auth token file: %v", err)
}
params.AuthToken = strings.TrimSpace(string(authToken))
params.AuthToken = strings.TrimSpace(authToken)

return nil
}
Expand Down Expand Up @@ -169,3 +171,97 @@ func curlCommand(endpoint string, authToken string) string {
endpoint,
)
}

func waitForReadyTimeout(t *testing.T, host *Host, commandRunner *agentCommandRunner, timeout time.Duration) error {
err := commandRunner.waitForReadyTimeout(timeout)

if err != nil {
// Propagate the original error if we have another error here
localErr := generateAndDownloadFlare(t, commandRunner, host)

if localErr != nil {
t.Errorf("Could not generate and get a flare: %v", localErr)
}
}

return err
}

func generateAndDownloadFlare(t *testing.T, commandRunner *agentCommandRunner, host *Host) error {
profile := runner.GetProfile()
outputDir, err := profile.GetOutputDir()
flareFound := false

if err != nil {
return fmt.Errorf("could not get output directory: %v", err)
}

_, err = commandRunner.FlareWithError(agentclient.WithArgs([]string{"--email", "e2e@test.com", "--send", "--local"}))
if err != nil {
t.Errorf("Error while generating the flare: %v.", err)
// Do not return now, the flare may be generated locally but was not uploaded because there's no fake intake
}

flareRegex, err := regexp.Compile(`datadog-agent-.*\.zip`)
if err != nil {
return fmt.Errorf("could not compile regex: %v", err)
}

tmpFolder, err := host.GetTmpFolder()
if err != nil {
return fmt.Errorf("could not get tmp folder: %v", err)
}

entries, err := host.ReadDir(tmpFolder)
if err != nil {
return fmt.Errorf("could not read directory: %v", err)
}

for _, entry := range entries {
if flareRegex.MatchString(entry.Name()) {
t.Logf("Found flare file: %s", entry.Name())

if host.osFamily != osComp.WindowsFamily {
_, err = host.Execute(fmt.Sprintf("sudo chmod 744 %s/%s", tmpFolder, entry.Name()))
if err != nil {
return fmt.Errorf("could not update permission of flare file %s/%s : %v", tmpFolder, entry.Name(), err)
}
}

t.Logf("Downloading flare file in: %s", outputDir)
err = host.GetFile(fmt.Sprintf("%s/%s", tmpFolder, entry.Name()), fmt.Sprintf("%s/%s", outputDir, entry.Name()))

if err != nil {
return fmt.Errorf("could not download flare file from %s/%s : %v", tmpFolder, entry.Name(), err)
}

flareFound = true
}
}

if !flareFound {
t.Errorf("Could not find a flare. Retrieving logs directly instead...")

logsFolder, err := host.GetLogsFolder()
if err != nil {
return fmt.Errorf("could not get logs folder: %v", err)
}

entries, err = host.ReadDir(logsFolder)

if err != nil {
return fmt.Errorf("could not read directory: %v", err)
}

for _, entry := range entries {
t.Logf("Found log file: %s. Downloading file in: %s", entry.Name(), outputDir)

err = host.GetFile(fmt.Sprintf("%s/%s", logsFolder, entry.Name()), fmt.Sprintf("%s/%s", outputDir, entry.Name()))
if err != nil {
return fmt.Errorf("could not download log file from %s/%s : %v", logsFolder, entry.Name(), err)
}
}
}

return nil
}
9 changes: 9 additions & 0 deletions test/new-e2e/pkg/utils/e2e/client/agent_commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ func (agent *agentCommandRunner) Flare(commandArgs ...agentclient.AgentArgsOptio
return agent.executeCommand("flare", commandArgs...)
}

// FlareWithError runs flare command and returns the output or an error. You should use the FakeIntake client to fetch the flare archive
func (agent *agentCommandRunner) FlareWithError(commandArgs ...agentclient.AgentArgsOption) (string, error) {
args, err := optional.MakeParams(commandArgs...)
require.NoError(agent.t, err)

arguments := append([]string{"flare"}, args.Args...)
return agent.executor.execute(arguments)
}

// Health runs health command and returns the runtime agent health
func (agent *agentCommandRunner) Health() (string, error) {
arguments := []string{"health"}
Expand Down
28 changes: 28 additions & 0 deletions test/new-e2e/pkg/utils/e2e/client/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type Host struct {
privateKeyPassphrase []byte
buildCommand buildCommandFn
convertPathSeparator convertPathSeparatorFn
osFamily oscomp.Family
}

// NewHost creates a new ssh client to connect to a remote host with
Expand Down Expand Up @@ -82,6 +83,7 @@ func NewHost(context e2e.Context, hostOutput remote.HostOutput) (*Host, error) {
privateKeyPassphrase: []byte(privateKeyPassword),
buildCommand: buildCommandFactory(hostOutput.OSFamily),
convertPathSeparator: convertPathSeparatorFactory(hostOutput.OSFamily),
osFamily: hostOutput.OSFamily,
}
err = host.Reconnect()
return host, err
Expand Down Expand Up @@ -316,6 +318,32 @@ func (h *Host) DialPort(port uint16) (net.Conn, error) {
return connection, err
}

// GetTmpFolder returns the temporary folder path for the host
func (h *Host) GetTmpFolder() (string, error) {
switch osFamily := h.osFamily; osFamily {
case oscomp.WindowsFamily:
return h.Execute("echo %TEMP%")
case oscomp.LinuxFamily:
return "/tmp", nil
default:
return "", errors.ErrUnsupported
}
}

// GetLogsFolder returns the logs folder path for the host
func (h *Host) GetLogsFolder() (string, error) {
switch osFamily := h.osFamily; osFamily {
case oscomp.WindowsFamily:
return `C:\ProgramData\Datadog\logs`, nil
case oscomp.LinuxFamily:
return "/var/log/datadog/", nil
case oscomp.MacOSFamily:
return "/opt/datadog-agent/logs", nil
default:
return "", errors.ErrUnsupported
}
}

// appendWithSudo appends content to the file using sudo tee for Linux environment
func (h *Host) appendWithSudo(path string, content []byte) (int64, error) {
cmd := fmt.Sprintf("echo '%s' | sudo tee -a %s", string(content), path)
Expand Down

0 comments on commit 4e877e6

Please sign in to comment.