From fbeaa69e3d6202f74bfc7df7a2b6f4ebc9c0095d Mon Sep 17 00:00:00 2001 From: rabi Date: Tue, 5 May 2026 19:12:55 +0530 Subject: [PATCH] Store ansible execution summaries in status Capture the AEE pod termination summary when jobs reach a terminal state and persist it on deployment/ nodeset status. This keeps host failure counts and failure percentages available after the ansible job pods are deleted. Depends-On: https://github.com/openstack-k8s-operators/edpm-ansible/pull/1168 Change-Id: I2dd2aa5af4587a6ed514628c462e957d63d8a1a7 Signed-off-by: rabi --- ...ack.org_openstackdataplanedeployments.yaml | 37 ++++++ ...nstack.org_openstackdataplanenodesets.yaml | 40 +++++++ .../openstackdataplanedeployment_types.go | 23 ++++ .../openstackdataplanenodeset_types.go | 13 ++- .../v1beta1/zz_generated.deepcopy.go | 78 +++++++++++++ bindata/crds/crds.yaml | 77 ++++++++++++ ...ack.org_openstackdataplanedeployments.yaml | 37 ++++++ ...nstack.org_openstackdataplanenodesets.yaml | 40 +++++++ .../openstackdataplanenodeset_controller.go | 7 ++ internal/dataplane/deployment.go | 20 ++++ internal/dataplane/util/ansible_execution.go | 66 +++++++++++ .../dataplane/util/ansible_execution_test.go | 110 ++++++++++++++++++ ...enstackdataplanenodeset_controller_test.go | 79 +++++++++++++ 13 files changed, 623 insertions(+), 4 deletions(-) create mode 100644 internal/dataplane/util/ansible_execution_test.go diff --git a/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml b/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml index 69936ebcce..b5f3c3a4e1 100644 --- a/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml +++ b/api/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml @@ -132,6 +132,43 @@ spec: type: string description: AnsibleEEHashes type: object + ansibleExecutionSummaries: + additionalProperties: + description: |- + AnsibleExecutionSummary captures the final ansible-runner execution result + reported by the AEE pod. + properties: + failedHostList: + description: FailedHostList contains the hosts that failed. + items: + type: string + type: array + failedHosts: + description: FailedHosts is the number of hosts with Ansible + task failures. + type: integer + failurePercent: + description: FailurePercent is the percent of total hosts that + failed or were unreachable. + type: integer + totalHosts: + description: TotalHosts is the number of hosts included in the + Ansible execution summary. + type: integer + unreachableHostList: + description: UnreachableHostList contains the hosts that were + unreachable. + items: + type: string + type: array + unreachableHosts: + description: UnreachableHosts is the number of hosts that were + unreachable. + type: integer + type: object + description: AnsibleExecutionSummaries stores the most recent AEE + execution summary per Job name. + type: object bmhRefHashes: additionalProperties: type: string diff --git a/api/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml b/api/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml index 53be12829a..95af7afd97 100644 --- a/api/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml +++ b/api/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml @@ -1925,6 +1925,46 @@ spec: deployedVersion: description: DeployedVersion type: string + deploymentExecutionSummaries: + additionalProperties: + additionalProperties: + description: |- + AnsibleExecutionSummary captures the final ansible-runner execution result + reported by the AEE pod. + properties: + failedHostList: + description: FailedHostList contains the hosts that failed. + items: + type: string + type: array + failedHosts: + description: FailedHosts is the number of hosts with Ansible + task failures. + type: integer + failurePercent: + description: FailurePercent is the percent of total hosts + that failed or were unreachable. + type: integer + totalHosts: + description: TotalHosts is the number of hosts included in + the Ansible execution summary. + type: integer + unreachableHostList: + description: UnreachableHostList contains the hosts that were + unreachable. + items: + type: string + type: array + unreachableHosts: + description: UnreachableHosts is the number of hosts that + were unreachable. + type: integer + type: object + type: object + description: |- + DeploymentExecutionSummaries stores the most recent AEE execution summary + per deployment and Job name. + type: object deploymentStatuses: additionalProperties: description: Conditions provide observations of the operational diff --git a/api/dataplane/v1beta1/openstackdataplanedeployment_types.go b/api/dataplane/v1beta1/openstackdataplanedeployment_types.go index 3814cca78b..9c87d7e2fa 100644 --- a/api/dataplane/v1beta1/openstackdataplanedeployment_types.go +++ b/api/dataplane/v1beta1/openstackdataplanedeployment_types.go @@ -80,6 +80,23 @@ type OpenStackDataPlaneDeploymentSpec struct { AnsibleEEEnvConfigMapName string `json:"ansibleEEEnvConfigMapName,omitempty"` } +// AnsibleExecutionSummary captures the final ansible-runner execution result +// reported by the AEE pod. +type AnsibleExecutionSummary struct { + // TotalHosts is the number of hosts included in the Ansible execution summary. + TotalHosts *int `json:"totalHosts,omitempty" optional:"true"` + // FailedHosts is the number of hosts with Ansible task failures. + FailedHosts *int `json:"failedHosts,omitempty" optional:"true"` + // UnreachableHosts is the number of hosts that were unreachable. + UnreachableHosts *int `json:"unreachableHosts,omitempty" optional:"true"` + // FailurePercent is the percent of total hosts that failed or were unreachable. + FailurePercent *int `json:"failurePercent,omitempty" optional:"true"` + // FailedHostList contains the hosts that failed. + FailedHostList *[]string `json:"failedHostList,omitempty" optional:"true"` + // UnreachableHostList contains the hosts that were unreachable. + UnreachableHostList *[]string `json:"unreachableHostList,omitempty" optional:"true"` +} + // OpenStackDataPlaneDeploymentStatus defines the observed state of OpenStackDataPlaneDeployment type OpenStackDataPlaneDeploymentStatus struct { // NodeSetConditions @@ -88,6 +105,9 @@ type OpenStackDataPlaneDeploymentStatus struct { // AnsibleEEHashes AnsibleEEHashes map[string]string `json:"ansibleEEHashes,omitempty" optional:"true"` + // AnsibleExecutionSummaries stores the most recent AEE execution summary per Job name. + AnsibleExecutionSummaries map[string]AnsibleExecutionSummary `json:"ansibleExecutionSummaries,omitempty" optional:"true"` + // ConfigMapHashes ConfigMapHashes map[string]string `json:"configMapHashes,omitempty" optional:"true"` @@ -192,6 +212,9 @@ func (instance *OpenStackDataPlaneDeployment) InitHashesAndImages() { if instance.Status.AnsibleEEHashes == nil { instance.Status.AnsibleEEHashes = make(map[string]string) } + if instance.Status.AnsibleExecutionSummaries == nil { + instance.Status.AnsibleExecutionSummaries = make(map[string]AnsibleExecutionSummary) + } if instance.Status.ContainerImages == nil { instance.Status.ContainerImages = make(map[string]string) } diff --git a/api/dataplane/v1beta1/openstackdataplanenodeset_types.go b/api/dataplane/v1beta1/openstackdataplanenodeset_types.go index 521dc5dda1..1490e2a50e 100644 --- a/api/dataplane/v1beta1/openstackdataplanenodeset_types.go +++ b/api/dataplane/v1beta1/openstackdataplanenodeset_types.go @@ -117,6 +117,10 @@ type OpenStackDataPlaneNodeSetStatus struct { // DeploymentStatuses DeploymentStatuses map[string]condition.Conditions `json:"deploymentStatuses,omitempty" optional:"true"` + // DeploymentExecutionSummaries stores the most recent AEE execution summary + // per deployment and Job name. + DeploymentExecutionSummaries map[string]map[string]AnsibleExecutionSummary `json:"deploymentExecutionSummaries,omitempty" optional:"true"` + // AllHostnames AllHostnames map[string]map[infranetworkv1.NetNameStr]string `json:"allHostnames,omitempty" optional:"true"` @@ -187,6 +191,7 @@ func (instance OpenStackDataPlaneNodeSet) IsReady() bool { func (instance *OpenStackDataPlaneNodeSet) InitConditions() { instance.Status.Conditions = condition.Conditions{} instance.Status.DeploymentStatuses = make(map[string]condition.Conditions) + instance.Status.DeploymentExecutionSummaries = make(map[string]map[string]AnsibleExecutionSummary) cl := condition.CreateList( condition.UnknownCondition(condition.DeploymentReadyCondition, condition.InitReason, condition.DeploymentReadyInitMessage), @@ -208,10 +213,10 @@ func (instance *OpenStackDataPlaneNodeSet) InitConditions() { // GetAnsibleEESpec - get the fields that will be passed to AEE Job func (instance OpenStackDataPlaneNodeSet) GetAnsibleEESpec() AnsibleEESpec { return AnsibleEESpec{ - NetworkAttachments: instance.Spec.NetworkAttachments, - ExtraMounts: instance.Spec.NodeTemplate.ExtraMounts, - Env: instance.Spec.Env, - ServiceAccountName: instance.Name, + NetworkAttachments: instance.Spec.NetworkAttachments, + ExtraMounts: instance.Spec.NodeTemplate.ExtraMounts, + Env: instance.Spec.Env, + ServiceAccountName: instance.Name, } } diff --git a/api/dataplane/v1beta1/zz_generated.deepcopy.go b/api/dataplane/v1beta1/zz_generated.deepcopy.go index 7db3b5abd8..476cd1ce1f 100644 --- a/api/dataplane/v1beta1/zz_generated.deepcopy.go +++ b/api/dataplane/v1beta1/zz_generated.deepcopy.go @@ -86,6 +86,59 @@ func (in *AnsibleEESpec) DeepCopy() *AnsibleEESpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AnsibleExecutionSummary) DeepCopyInto(out *AnsibleExecutionSummary) { + *out = *in + if in.TotalHosts != nil { + in, out := &in.TotalHosts, &out.TotalHosts + *out = new(int) + **out = **in + } + if in.FailedHosts != nil { + in, out := &in.FailedHosts, &out.FailedHosts + *out = new(int) + **out = **in + } + if in.UnreachableHosts != nil { + in, out := &in.UnreachableHosts, &out.UnreachableHosts + *out = new(int) + **out = **in + } + if in.FailurePercent != nil { + in, out := &in.FailurePercent, &out.FailurePercent + *out = new(int) + **out = **in + } + if in.FailedHostList != nil { + in, out := &in.FailedHostList, &out.FailedHostList + *out = new([]string) + if **in != nil { + in, out := *in, *out + *out = make([]string, len(*in)) + copy(*out, *in) + } + } + if in.UnreachableHostList != nil { + in, out := &in.UnreachableHostList, &out.UnreachableHostList + *out = new([]string) + if **in != nil { + in, out := *in, *out + *out = make([]string, len(*in)) + copy(*out, *in) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AnsibleExecutionSummary. +func (in *AnsibleExecutionSummary) DeepCopy() *AnsibleExecutionSummary { + if in == nil { + return nil + } + out := new(AnsibleExecutionSummary) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *AnsibleOpts) DeepCopyInto(out *AnsibleOpts) { *out = *in @@ -405,6 +458,13 @@ func (in *OpenStackDataPlaneDeploymentStatus) DeepCopyInto(out *OpenStackDataPla (*out)[key] = val } } + if in.AnsibleExecutionSummaries != nil { + in, out := &in.AnsibleExecutionSummaries, &out.AnsibleExecutionSummaries + *out = make(map[string]AnsibleExecutionSummary, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } if in.ConfigMapHashes != nil { in, out := &in.ConfigMapHashes, &out.ConfigMapHashes *out = make(map[string]string, len(*in)) @@ -596,6 +656,24 @@ func (in *OpenStackDataPlaneNodeSetStatus) DeepCopyInto(out *OpenStackDataPlaneN (*out)[key] = outVal } } + if in.DeploymentExecutionSummaries != nil { + in, out := &in.DeploymentExecutionSummaries, &out.DeploymentExecutionSummaries + *out = make(map[string]map[string]AnsibleExecutionSummary, len(*in)) + for key, val := range *in { + var outVal map[string]AnsibleExecutionSummary + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = make(map[string]AnsibleExecutionSummary, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } + (*out)[key] = outVal + } + } if in.AllHostnames != nil { in, out := &in.AllHostnames, &out.AllHostnames *out = make(map[string]map[networkv1beta1.NetNameStr]string, len(*in)) diff --git a/bindata/crds/crds.yaml b/bindata/crds/crds.yaml index 38aed21500..b532c4a914 100644 --- a/bindata/crds/crds.yaml +++ b/bindata/crds/crds.yaml @@ -19071,6 +19071,43 @@ spec: type: string description: AnsibleEEHashes type: object + ansibleExecutionSummaries: + additionalProperties: + description: |- + AnsibleExecutionSummary captures the final ansible-runner execution result + reported by the AEE pod. + properties: + failedHostList: + description: FailedHostList contains the hosts that failed. + items: + type: string + type: array + failedHosts: + description: FailedHosts is the number of hosts with Ansible + task failures. + type: integer + failurePercent: + description: FailurePercent is the percent of total hosts that + failed or were unreachable. + type: integer + totalHosts: + description: TotalHosts is the number of hosts included in the + Ansible execution summary. + type: integer + unreachableHostList: + description: UnreachableHostList contains the hosts that were + unreachable. + items: + type: string + type: array + unreachableHosts: + description: UnreachableHosts is the number of hosts that were + unreachable. + type: integer + type: object + description: AnsibleExecutionSummaries stores the most recent AEE + execution summary per Job name. + type: object bmhRefHashes: additionalProperties: type: string @@ -21131,6 +21168,46 @@ spec: deployedVersion: description: DeployedVersion type: string + deploymentExecutionSummaries: + additionalProperties: + additionalProperties: + description: |- + AnsibleExecutionSummary captures the final ansible-runner execution result + reported by the AEE pod. + properties: + failedHostList: + description: FailedHostList contains the hosts that failed. + items: + type: string + type: array + failedHosts: + description: FailedHosts is the number of hosts with Ansible + task failures. + type: integer + failurePercent: + description: FailurePercent is the percent of total hosts + that failed or were unreachable. + type: integer + totalHosts: + description: TotalHosts is the number of hosts included in + the Ansible execution summary. + type: integer + unreachableHostList: + description: UnreachableHostList contains the hosts that were + unreachable. + items: + type: string + type: array + unreachableHosts: + description: UnreachableHosts is the number of hosts that + were unreachable. + type: integer + type: object + type: object + description: |- + DeploymentExecutionSummaries stores the most recent AEE execution summary + per deployment and Job name. + type: object deploymentStatuses: additionalProperties: description: Conditions provide observations of the operational diff --git a/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml b/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml index 69936ebcce..b5f3c3a4e1 100644 --- a/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml +++ b/config/crd/bases/dataplane.openstack.org_openstackdataplanedeployments.yaml @@ -132,6 +132,43 @@ spec: type: string description: AnsibleEEHashes type: object + ansibleExecutionSummaries: + additionalProperties: + description: |- + AnsibleExecutionSummary captures the final ansible-runner execution result + reported by the AEE pod. + properties: + failedHostList: + description: FailedHostList contains the hosts that failed. + items: + type: string + type: array + failedHosts: + description: FailedHosts is the number of hosts with Ansible + task failures. + type: integer + failurePercent: + description: FailurePercent is the percent of total hosts that + failed or were unreachable. + type: integer + totalHosts: + description: TotalHosts is the number of hosts included in the + Ansible execution summary. + type: integer + unreachableHostList: + description: UnreachableHostList contains the hosts that were + unreachable. + items: + type: string + type: array + unreachableHosts: + description: UnreachableHosts is the number of hosts that were + unreachable. + type: integer + type: object + description: AnsibleExecutionSummaries stores the most recent AEE + execution summary per Job name. + type: object bmhRefHashes: additionalProperties: type: string diff --git a/config/crd/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml b/config/crd/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml index 53be12829a..95af7afd97 100644 --- a/config/crd/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml +++ b/config/crd/bases/dataplane.openstack.org_openstackdataplanenodesets.yaml @@ -1925,6 +1925,46 @@ spec: deployedVersion: description: DeployedVersion type: string + deploymentExecutionSummaries: + additionalProperties: + additionalProperties: + description: |- + AnsibleExecutionSummary captures the final ansible-runner execution result + reported by the AEE pod. + properties: + failedHostList: + description: FailedHostList contains the hosts that failed. + items: + type: string + type: array + failedHosts: + description: FailedHosts is the number of hosts with Ansible + task failures. + type: integer + failurePercent: + description: FailurePercent is the percent of total hosts + that failed or were unreachable. + type: integer + totalHosts: + description: TotalHosts is the number of hosts included in + the Ansible execution summary. + type: integer + unreachableHostList: + description: UnreachableHostList contains the hosts that were + unreachable. + items: + type: string + type: array + unreachableHosts: + description: UnreachableHosts is the number of hosts that + were unreachable. + type: integer + type: object + type: object + description: |- + DeploymentExecutionSummaries stores the most recent AEE execution summary + per deployment and Job name. + type: object deploymentStatuses: additionalProperties: description: Conditions provide observations of the operational diff --git a/internal/controller/dataplane/openstackdataplanenodeset_controller.go b/internal/controller/dataplane/openstackdataplanenodeset_controller.go index c625a93359..92ec41c9f1 100644 --- a/internal/controller/dataplane/openstackdataplanenodeset_controller.go +++ b/internal/controller/dataplane/openstackdataplanenodeset_controller.go @@ -539,6 +539,13 @@ func checkDeployment(ctx context.Context, helper *helper.Helper, instance.Status.DeploymentStatuses = make(map[string]condition.Conditions) } instance.Status.DeploymentStatuses[deployment.Name] = deploymentConditions + if len(deployment.Status.AnsibleExecutionSummaries) > 0 { + if instance.Status.DeploymentExecutionSummaries == nil { + instance.Status.DeploymentExecutionSummaries = make(map[string]map[string]dataplanev1.AnsibleExecutionSummary) + } + deploymentStatus := deployment.Status.DeepCopy() + instance.Status.DeploymentExecutionSummaries[deployment.Name] = deploymentStatus.AnsibleExecutionSummaries + } // Apply filtering for overall nodeset deployment state logic isLatestDeployment := latestRelevantDeployment != nil && deployment.Name == latestRelevantDeployment.Name diff --git a/internal/dataplane/deployment.go b/internal/dataplane/deployment.go index d3a228d10c..6b2f476cf5 100644 --- a/internal/dataplane/deployment.go +++ b/internal/dataplane/deployment.go @@ -231,6 +231,7 @@ func (d *Deployer) ConditionalDeploy( err.Error())) } if ansibleJob.Status.Succeeded > 0 { + d.storeExecutionSummary(ansibleJob) log.Info(fmt.Sprintf("Condition %s ready", readyCondition)) nsConditions.Set(condition.TrueCondition( readyCondition, @@ -245,6 +246,7 @@ func (d *Deployer) ConditionalDeploy( if ansibleCondition.Reason == condition.JobReasonBackoffLimitExceeded { errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleJob.Name, ansibleJob.Namespace, ansibleCondition.Message) } + d.storeExecutionSummary(ansibleJob) log.Info(fmt.Sprintf("Condition %s error", readyCondition)) err = fmt.Errorf("%s", errorMsg) nsConditions.Set(condition.FalseCondition( @@ -267,6 +269,24 @@ func (d *Deployer) ConditionalDeploy( return err } +// storeExecutionSummary fetches and stores the ansible execution summary for a +// completed or failed Job into the deployment status. +func (d *Deployer) storeExecutionSummary(ansibleJob *batchv1.Job) { + log := d.Helper.GetLogger() + summary, err := dataplaneutil.GetAnsibleExecutionSummary(d.Ctx, d.Helper, ansibleJob) + if err != nil { + log.Error(err, "Unable to get ansible execution summary", "execution", ansibleJob.Name) + return + } + if summary == nil { + return + } + if d.Status.AnsibleExecutionSummaries == nil { + d.Status.AnsibleExecutionSummaries = make(map[string]dataplanev1.AnsibleExecutionSummary) + } + d.Status.AnsibleExecutionSummaries[ansibleJob.Name] = *summary +} + // addCertMounts adds the cert mounts to the aeeSpec for the install-certs service func (d *Deployer) addCertMounts( services []string, diff --git a/internal/dataplane/util/ansible_execution.go b/internal/dataplane/util/ansible_execution.go index bbef23cd03..2fa60e8983 100644 --- a/internal/dataplane/util/ansible_execution.go +++ b/internal/dataplane/util/ansible_execution.go @@ -177,6 +177,72 @@ func GetAnsibleExecution(ctx context.Context, return ansibleEE, nil } +// GetAnsibleExecutionSummary retrieves the summary reported by a completed AEE pod. +func GetAnsibleExecutionSummary( + ctx context.Context, + helper *helper.Helper, + job *batchv1.Job, +) (*dataplanev1.AnsibleExecutionSummary, error) { + podList := &corev1.PodList{} + if err := helper.GetClient().List( + ctx, + podList, + client.InNamespace(job.Namespace), + client.MatchingLabels{"batch.kubernetes.io/job-name": job.Name}, + ); err != nil { + return nil, err + } + + sort.Slice(podList.Items, func(i, j int) bool { + return podList.Items[j].CreationTimestamp.Before(&podList.Items[i].CreationTimestamp) + }) + + for _, pod := range podList.Items { + summary, err := ParseAnsibleExecutionSummaryFromPod(&pod) + if err != nil { + return nil, err + } + if summary != nil { + return summary, nil + } + } + + return nil, nil +} + +// ParseAnsibleExecutionSummaryFromPod parses the AEE pod termination message JSON. +func ParseAnsibleExecutionSummaryFromPod( + pod *corev1.Pod, +) (*dataplanev1.AnsibleExecutionSummary, error) { + for _, containerStatus := range pod.Status.ContainerStatuses { + terminated := containerStatus.State.Terminated + if terminated == nil { + terminated = containerStatus.LastTerminationState.Terminated + } + if terminated == nil || terminated.Message == "" { + continue + } + + summary := &dataplanev1.AnsibleExecutionSummary{} + if err := json.Unmarshal([]byte(terminated.Message), summary); err != nil { + return nil, fmt.Errorf( + "failed to parse ansible execution summary for pod %s container %s: %w", + pod.Name, + containerStatus.Name, + err, + ) + } + + if summary.TotalHosts == nil || *summary.TotalHosts == 0 { + continue + } + + return summary, nil + } + + return nil, nil +} + // GetAnsibleExecutionNameAndLabels Name and Labels of AnsibleEE func GetAnsibleExecutionNameAndLabels(service *dataplanev1.OpenStackDataPlaneService, deploymentName string, diff --git a/internal/dataplane/util/ansible_execution_test.go b/internal/dataplane/util/ansible_execution_test.go new file mode 100644 index 0000000000..0ad8a91315 --- /dev/null +++ b/internal/dataplane/util/ansible_execution_test.go @@ -0,0 +1,110 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util //nolint:revive // util is an acceptable package name in this context + +import ( + "context" + "testing" + + . "github.com/onsi/gomega" //revive:disable:dot-imports + + dataplanev1 "github.com/openstack-k8s-operators/openstack-operator/api/dataplane/v1beta1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +func TestParseAnsibleExecutionSummaryFromPod(t *testing.T) { + g := NewWithT(t) + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "runner", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Message: `{"totalHosts":3,"failedHosts":1,"unreachableHosts":1,"failurePercent":67,"failedHostList":["host-b"],"unreachableHostList":["host-c"]}`, + }, + }, + }, + }, + }, + } + + summary, err := ParseAnsibleExecutionSummaryFromPod(pod) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(summary).To(Equal(&dataplanev1.AnsibleExecutionSummary{ + TotalHosts: ptr.To(3), + FailedHosts: ptr.To(1), + UnreachableHosts: ptr.To(1), + FailurePercent: ptr.To(67), + FailedHostList: &[]string{"host-b"}, + UnreachableHostList: &[]string{"host-c"}, + })) +} + +func TestGetAnsibleExecutionSummary(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-job", + Namespace: "test-namespace", + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + Labels: map[string]string{ + "batch.kubernetes.io/job-name": "test-job", + }, + }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "runner", + LastTerminationState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Message: `{"totalHosts":2,"failedHosts":1,"unreachableHosts":0,"failurePercent":50,"failedHostList":["host-b"],"unreachableHostList":[]}`, + }, + }, + }, + }, + }, + } + + h := setupTestHelper(false, job, pod) + + summary, err := GetAnsibleExecutionSummary(ctx, h, job) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(summary).To(Equal(&dataplanev1.AnsibleExecutionSummary{ + TotalHosts: ptr.To(2), + FailedHosts: ptr.To(1), + UnreachableHosts: ptr.To(0), + FailurePercent: ptr.To(50), + FailedHostList: &[]string{"host-b"}, + UnreachableHostList: &[]string{}, + })) +} diff --git a/test/functional/dataplane/openstackdataplanenodeset_controller_test.go b/test/functional/dataplane/openstackdataplanenodeset_controller_test.go index 2b42df6d23..27eac43f6c 100644 --- a/test/functional/dataplane/openstackdataplanenodeset_controller_test.go +++ b/test/functional/dataplane/openstackdataplanenodeset_controller_test.go @@ -32,6 +32,7 @@ import ( "gopkg.in/yaml.v3" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" ) // Ansible Inventory Structs for testing specific values @@ -1563,6 +1564,84 @@ var _ = Describe("Dataplane NodeSet Test", func() { g.Expect(instance.Status.DeploymentStatuses).Should(HaveKey(secondDeploymentName.Name)) }, th.Timeout, th.Interval).Should(Succeed()) }) + + It("Should copy deployment execution stats into the nodeset status", func() { + Eventually(func(g Gomega) { + deployment := GetDataplaneDeployment(dataplaneDeploymentName) + deployment.Status.AnsibleExecutionSummaries = map[string]dataplanev1.AnsibleExecutionSummary{ + "bootstrap-" + dataplaneDeploymentName.Name + "-" + dataplaneNodeSetName.Name: { + TotalHosts: ptr.To(3), + FailedHosts: ptr.To(1), + UnreachableHosts: ptr.To(1), + FailurePercent: ptr.To(67), + FailedHostList: &[]string{"host-b"}, + UnreachableHostList: &[]string{"host-c"}, + }, + } + g.Expect(th.K8sClient.Status().Update(th.Ctx, deployment)).To(Succeed()) + }, th.Timeout, th.Interval).Should(Succeed()) + + Eventually(func(g Gomega) { + deployment := GetDataplaneDeployment(secondDeploymentName) + deployment.Status.AnsibleExecutionSummaries = map[string]dataplanev1.AnsibleExecutionSummary{ + "bootstrap-" + secondDeploymentName.Name + "-" + dataplaneNodeSetName.Name: { + TotalHosts: ptr.To(2), + FailedHosts: ptr.To(0), + UnreachableHosts: ptr.To(0), + FailurePercent: ptr.To(0), + FailedHostList: &[]string{}, + UnreachableHostList: &[]string{}, + }, + "configure-network-" + secondDeploymentName.Name + "-" + dataplaneNodeSetName.Name: { + TotalHosts: ptr.To(2), + FailedHosts: ptr.To(0), + UnreachableHosts: ptr.To(1), + FailurePercent: ptr.To(50), + FailedHostList: &[]string{}, + UnreachableHostList: &[]string{"host-a"}, + }, + } + g.Expect(th.K8sClient.Status().Update(th.Ctx, deployment)).To(Succeed()) + }, th.Timeout, th.Interval).Should(Succeed()) + + Eventually(func(g Gomega) { + instance := GetDataplaneNodeSet(dataplaneNodeSetName) + g.Expect(instance.Status.DeploymentExecutionSummaries).To(HaveKey(dataplaneDeploymentName.Name)) + g.Expect(instance.Status.DeploymentExecutionSummaries).To(HaveKey(secondDeploymentName.Name)) + g.Expect(instance.Status.DeploymentExecutionSummaries[dataplaneDeploymentName.Name]).To(Equal( + map[string]dataplanev1.AnsibleExecutionSummary{ + "bootstrap-" + dataplaneDeploymentName.Name + "-" + dataplaneNodeSetName.Name: { + TotalHosts: ptr.To(3), + FailedHosts: ptr.To(1), + UnreachableHosts: ptr.To(1), + FailurePercent: ptr.To(67), + FailedHostList: &[]string{"host-b"}, + UnreachableHostList: &[]string{"host-c"}, + }, + }, + )) + g.Expect(instance.Status.DeploymentExecutionSummaries[secondDeploymentName.Name]).To(Equal( + map[string]dataplanev1.AnsibleExecutionSummary{ + "bootstrap-" + secondDeploymentName.Name + "-" + dataplaneNodeSetName.Name: { + TotalHosts: ptr.To(2), + FailedHosts: ptr.To(0), + UnreachableHosts: ptr.To(0), + FailurePercent: ptr.To(0), + FailedHostList: &[]string{}, + UnreachableHostList: &[]string{}, + }, + "configure-network-" + secondDeploymentName.Name + "-" + dataplaneNodeSetName.Name: { + TotalHosts: ptr.To(2), + FailedHosts: ptr.To(0), + UnreachableHosts: ptr.To(1), + FailurePercent: ptr.To(50), + FailedHostList: &[]string{}, + UnreachableHostList: &[]string{"host-a"}, + }, + }, + )) + }, th.Timeout, th.Interval).Should(Succeed()) + }) }) When("Latest deployment has ServicesOverride", func() {