diff --git a/pkg/analyzer/pod.go b/pkg/analyzer/pod.go index 3c5663ce6f..990273bd01 100644 --- a/pkg/analyzer/pod.go +++ b/pkg/analyzer/pod.go @@ -18,6 +18,7 @@ import ( "github.com/k8sgpt-ai/k8sgpt/pkg/common" "github.com/k8sgpt-ai/k8sgpt/pkg/util" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -41,12 +42,12 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { for _, pod := range list.Items { var failures []common.Failure + // Check for pending pods if pod.Status.Phase == "Pending" { - // Check through container status to check for crashes for _, containerStatus := range pod.Status.Conditions { - if containerStatus.Type == "PodScheduled" && containerStatus.Reason == "Unschedulable" { + if containerStatus.Type == v1.PodScheduled && containerStatus.Reason == "Unschedulable" { if containerStatus.Message != "" { failures = append(failures, common.Failure{ Text: containerStatus.Message, @@ -57,60 +58,12 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { } } - // Check through container status to check for crashes or unready - for _, containerStatus := range pod.Status.ContainerStatuses { - - if containerStatus.State.Waiting != nil { - - if isErrorReason(containerStatus.State.Waiting.Reason) && containerStatus.State.Waiting.Message != "" { - failures = append(failures, common.Failure{ - Text: containerStatus.State.Waiting.Message, - Sensitive: []common.Sensitive{}, - }) - } - - // This represents a container that is still being created or blocked due to conditions such as OOMKilled - if containerStatus.State.Waiting.Reason == "ContainerCreating" && pod.Status.Phase == "Pending" { - - // parse the event log and append details - evt, err := FetchLatestEvent(a.Context, a.Client, pod.Namespace, pod.Name) - if err != nil || evt == nil { - continue - } - if isEvtErrorReason(evt.Reason) && evt.Message != "" { - failures = append(failures, common.Failure{ - Text: evt.Message, - Sensitive: []common.Sensitive{}, - }) - } - } + // Check for errors in the init containers. + failures = append(failures, analyzeContainerStatusFailures(a, pod.Status.InitContainerStatuses, pod.Name, pod.Namespace, string(pod.Status.Phase))...) - // This represents container that is in CrashLoopBackOff state due to conditions such as OOMKilled - if containerStatus.State.Waiting.Reason == "CrashLoopBackOff" { - failures = append(failures, common.Failure{ - Text: fmt.Sprintf("the last termination reason is %s container=%s pod=%s", containerStatus.LastTerminationState.Terminated.Reason, containerStatus.Name, pod.Name), - Sensitive: []common.Sensitive{}, - }) - } - } else { - // when pod is Running but its ReadinessProbe fails - if !containerStatus.Ready && pod.Status.Phase == "Running" { - // parse the event log and append details - evt, err := FetchLatestEvent(a.Context, a.Client, pod.Namespace, pod.Name) - if err != nil || evt == nil { - continue - } - if evt.Reason == "Unhealthy" && evt.Message != "" { - failures = append(failures, common.Failure{ - Text: evt.Message, - Sensitive: []common.Sensitive{}, - }) + // Check for errors in containers. + failures = append(failures, analyzeContainerStatusFailures(a, pod.Status.ContainerStatuses, pod.Name, pod.Namespace, string(pod.Status.Phase))...) - } - - } - } - } if len(failures) > 0 { preAnalysis[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = common.PreAnalysis{ Pod: pod, @@ -135,6 +88,58 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { return a.Results, nil } +func analyzeContainerStatusFailures(a common.Analyzer, statuses []v1.ContainerStatus, name string, namespace string, statusPhase string) []common.Failure { + var failures []common.Failure + + // Check through container status to check for crashes or unready + for _, containerStatus := range statuses { + if containerStatus.State.Waiting != nil { + if containerStatus.State.Waiting.Reason == "ContainerCreating" && statusPhase == "Pending" { + // This represents a container that is still being created or blocked due to conditions such as OOMKilled + // parse the event log and append details + evt, err := FetchLatestEvent(a.Context, a.Client, namespace, name) + if err != nil || evt == nil { + continue + } + if isEvtErrorReason(evt.Reason) && evt.Message != "" { + failures = append(failures, common.Failure{ + Text: evt.Message, + Sensitive: []common.Sensitive{}, + }) + } + } else if containerStatus.State.Waiting.Reason == "CrashLoopBackOff" && containerStatus.LastTerminationState.Terminated != nil { + // This represents container that is in CrashLoopBackOff state due to conditions such as OOMKilled + failures = append(failures, common.Failure{ + Text: fmt.Sprintf("the last termination reason is %s container=%s pod=%s", containerStatus.LastTerminationState.Terminated.Reason, containerStatus.Name, name), + Sensitive: []common.Sensitive{}, + }) + } else if isErrorReason(containerStatus.State.Waiting.Reason) && containerStatus.State.Waiting.Message != "" { + failures = append(failures, common.Failure{ + Text: containerStatus.State.Waiting.Message, + Sensitive: []common.Sensitive{}, + }) + } + } else { + // when pod is Running but its ReadinessProbe fails + if !containerStatus.Ready && statusPhase == "Running" { + // parse the event log and append details + evt, err := FetchLatestEvent(a.Context, a.Client, namespace, name) + if err != nil || evt == nil { + continue + } + if evt.Reason == "Unhealthy" && evt.Message != "" { + failures = append(failures, common.Failure{ + Text: evt.Message, + Sensitive: []common.Sensitive{}, + }) + } + } + } + } + + return failures +} + func isErrorReason(reason string) bool { failureReasons := []string{ "CrashLoopBackOff", "ImagePullBackOff", "CreateContainerConfigError", "PreCreateHookError", "CreateContainerError", diff --git a/pkg/analyzer/pod_test.go b/pkg/analyzer/pod_test.go index d088d838c5..b9973a3730 100644 --- a/pkg/analyzer/pod_test.go +++ b/pkg/analyzer/pod_test.go @@ -15,144 +15,357 @@ package analyzer import ( "context" + "sort" "testing" "github.com/k8sgpt-ai/k8sgpt/pkg/common" "github.com/k8sgpt-ai/k8sgpt/pkg/kubernetes" - "github.com/magiconair/properties/assert" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" ) func TestPodAnalyzer(t *testing.T) { - - clientset := fake.NewSimpleClientset( - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "example", - Namespace: "default", - Annotations: map[string]string{}, - }, - Status: v1.PodStatus{ - Phase: v1.PodPending, - Conditions: []v1.PodCondition{ - { - Type: v1.PodScheduled, - Reason: "Unschedulable", - Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.", - }, + tests := []struct { + name string + config common.Analyzer + expectations []struct { + name string + failuresCount int + } + }{ + { + name: "Pending pods, namespace filtering and readiness probe failure", + config: common.Analyzer{ + Client: &kubernetes.Client{ + Client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Pod1", + Namespace: "default", + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + Conditions: []v1.PodCondition{ + { + // This condition will contribute to failures. + Type: v1.PodScheduled, + Reason: "Unschedulable", + Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.", + }, + { + // This condition won't contribute to failures. + Type: v1.PodScheduled, + Reason: "Unexpected failure", + }, + }, + }, + }, + &v1.Pod{ + // This pod won't be selected because of namespace filtering. + ObjectMeta: metav1.ObjectMeta{ + Name: "Pod2", + Namespace: "test", + }, + }, + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Pod3", + Namespace: "default", + }, + Status: v1.PodStatus{ + // When pod is Running but its ReadinessProbe fails + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + Ready: false, + }, + }, + }, + }, + &v1.Event{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Event1", + Namespace: "default", + }, + InvolvedObject: v1.ObjectReference{ + Kind: "Pod", + Name: "Pod3", + Namespace: "default", + }, + Reason: "Unhealthy", + Message: "readiness probe failed: the detail reason here ...", + Source: v1.EventSource{Component: "eventTest"}, + Count: 1, + Type: v1.EventTypeWarning, + }, + ), }, - }, - }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "example2", + Context: context.Background(), Namespace: "default", }, - Status: v1.PodStatus{ - Phase: v1.PodRunning, - ContainerStatuses: []v1.ContainerStatus{ - { - Name: "example2", - Ready: false, - }, + expectations: []struct { + name string + failuresCount int + }{ + { + name: "default/Pod1", + failuresCount: 1, }, - Conditions: []v1.PodCondition{ - { - Type: v1.ContainersReady, - Reason: "ContainersNotReady", - Message: "containers with unready status: [example2]", - }, + { + name: "default/Pod3", + failuresCount: 1, }, }, }, - // simulate event: 30s Warning Unhealthy pod/my-nginx-7fb4dbcf47-4ch4w Readiness probe failed: bash: xxxx: command not found - &v1.Event{ - ObjectMeta: metav1.ObjectMeta{ - Name: "foo", + { + name: "readiness probe failure without any event", + config: common.Analyzer{ + Client: &kubernetes.Client{ + Client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Pod1", + Namespace: "default", + }, + Status: v1.PodStatus{ + // When pod is Running but its ReadinessProbe fails + // It won't contribute to any failures because + // there's no event present. + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + Ready: false, + }, + }, + }, + }, + ), + }, + Context: context.Background(), Namespace: "default", }, - InvolvedObject: v1.ObjectReference{ - Kind: "Pod", - Name: "example2", - Namespace: "default", - UID: "differentUid", - APIVersion: "v1", - }, - Reason: "Unhealthy", - Message: "readiness probe failed: the detail reason here ...", - Source: v1.EventSource{Component: "eventTest"}, - Count: 1, - Type: v1.EventTypeWarning, - }) - - config := common.Analyzer{ - Client: &kubernetes.Client{ - Client: clientset, }, - Context: context.Background(), - Namespace: "default", - } - podAnalyzer := PodAnalyzer{} - var analysisResults []common.Result - analysisResults, err := podAnalyzer.Analyze(config) - if err != nil { - t.Error(err) - } - assert.Equal(t, len(analysisResults), 2) -} - -func TestPodAnalyzerNamespaceFiltering(t *testing.T) { - - clientset := fake.NewSimpleClientset( - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "example", - Namespace: "default", - Annotations: map[string]string{}, + { + name: "Init container status state waiting", + config: common.Analyzer{ + Client: &kubernetes.Client{ + Client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Pod1", + Namespace: "default", + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + InitContainerStatuses: []v1.ContainerStatus{ + { + Ready: true, + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{ + StartedAt: metav1.Now(), + }, + }, + }, + { + Ready: false, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + // This represents a container that is still being created or blocked due to conditions such as OOMKilled + Reason: "ContainerCreating", + }, + }, + }, + }, + }, + }, + &v1.Event{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Event1", + Namespace: "default", + }, + InvolvedObject: v1.ObjectReference{ + Kind: "Pod", + Name: "Pod1", + Namespace: "default", + }, + Reason: "FailedCreatePodSandBox", + Message: "failed to create the pod sandbox ...", + Type: v1.EventTypeWarning, + }, + ), + }, + Context: context.Background(), + Namespace: "default", }, - Status: v1.PodStatus{ - Phase: v1.PodPending, - Conditions: []v1.PodCondition{ - { - Type: v1.PodScheduled, - Reason: "Unschedulable", - Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.", - }, + expectations: []struct { + name string + failuresCount int + }{ + { + name: "default/Pod1", + failuresCount: 1, }, }, }, - &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "example", - Namespace: "other-namespace", - Annotations: map[string]string{}, + { + name: "Container status state waiting but no event reported", + config: common.Analyzer{ + Client: &kubernetes.Client{ + Client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Pod1", + Namespace: "default", + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + ContainerStatuses: []v1.ContainerStatus{ + { + Ready: false, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + // This represents a container that is still being created or blocked due to conditions such as OOMKilled + Reason: "ContainerCreating", + }, + }, + }, + }, + }, + }, + ), + }, + Context: context.Background(), + Namespace: "default", }, - Status: v1.PodStatus{ - Phase: v1.PodPending, - Conditions: []v1.PodCondition{ - { - Type: v1.PodScheduled, - Reason: "Unschedulable", - Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.", - }, + }, + { + name: "Container status state waiting", + config: common.Analyzer{ + Client: &kubernetes.Client{ + Client: fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Pod1", + Namespace: "default", + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "Container1", + Ready: false, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + // This represents a container that is still being created or blocked due to conditions such as OOMKilled + Reason: "ContainerCreating", + }, + }, + }, + { + Name: "Container2", + Ready: false, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + // This represents container that is in CrashLoopBackOff state due to conditions such as OOMKilled + Reason: "CrashLoopBackOff", + }, + }, + LastTerminationState: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + Reason: "test reason", + }, + }, + }, + { + Name: "Container3", + Ready: false, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + // This won't contribute to failures. + Reason: "RandomReason", + Message: "This container won't be present in the failures", + }, + }, + }, + { + Name: "Container4", + Ready: false, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + // Valid error reason. + Reason: "PreStartHookError", + Message: "Container4 encountered PreStartHookError", + }, + }, + }, + { + Name: "Container5", + Ready: false, + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + // Valid error reason. + Reason: "CrashLoopBackOff", + Message: "Container4 encountered CrashLoopBackOff", + }, + }, + }, + }, + }, + }, + &v1.Event{ + ObjectMeta: metav1.ObjectMeta{ + Name: "Event1", + Namespace: "default", + }, + InvolvedObject: v1.ObjectReference{ + Kind: "Pod", + Name: "Pod1", + Namespace: "default", + }, + // This reason won't contribute to failures. + Reason: "RandomEvent", + Type: v1.EventTypeWarning, + }, + ), + }, + Context: context.Background(), + Namespace: "default", + }, + expectations: []struct { + name string + failuresCount int + }{ + { + name: "default/Pod1", + failuresCount: 3, }, }, - }) - - config := common.Analyzer{ - Client: &kubernetes.Client{ - Client: clientset, }, - Context: context.Background(), - Namespace: "default", } + podAnalyzer := PodAnalyzer{} - var analysisResults []common.Result - analysisResults, err := podAnalyzer.Analyze(config) - if err != nil { - t.Error(err) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results, err := podAnalyzer.Analyze(tt.config) + require.NoError(t, err) + + if tt.expectations == nil { + require.Equal(t, 0, len(results)) + } else { + sort.Slice(results, func(i, j int) bool { + return results[i].Name < results[j].Name + }) + + require.Equal(t, len(tt.expectations), len(results)) + + for i, result := range results { + require.Equal(t, tt.expectations[i].name, result.Name) + require.Equal(t, tt.expectations[i].failuresCount, len(result.Error)) + } + } + }) } - assert.Equal(t, len(analysisResults), 1) }