Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: wait for run in resource spacelift_run #535

Merged
merged 14 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/resources/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,26 @@ resource "spacelift_run" "this" {
- `commit_sha` (String) The commit SHA for which to trigger a run.
- `keepers` (Map of String) Arbitrary map of values that, when changed, will trigger recreation of the resource.
- `proposed` (Boolean) Whether the run is a proposed run. Defaults to `false`.
- `timeouts` (Block, Optional) (see [below for nested schema](#nestedblock--timeouts))
- `wait` (Block List, Max: 1) Wait for the run to finish (see [below for nested schema](#nestedblock--wait))

### Read-Only

- `id` (String) The ID of the triggered run.

<a id="nestedblock--timeouts"></a>
### Nested Schema for `timeouts`

Optional:

- `create` (String)


<a id="nestedblock--wait"></a>
### Nested Schema for `wait`

Optional:

- `continue_on_state` (Set of String) Continue on the specified states of a finished run. If not specified, the default is `[ 'finished' ]`. You can use following states: `applying`, `canceled`, `confirmed`, `destroying`, `discarded`, `failed`, `finished`, `initializing`, `pending_review`, `performing`, `planning`, `preparing_apply`, `preparing_replan`, `preparing`, `queued`, `ready`, `replan_requested`, `skipped`, `stopped`, `unconfirmed`.
- `continue_on_timeout` (Boolean) Continue if run timed out, i.e. did not reach any defined end state in time. Default: `false`
- `disabled` (Boolean) Whether waiting for a job is disabled or not. Default: `false`
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1
github.com/hashicorp/go-cty v1.4.1-0.20200414143053-d3edf31b6320
github.com/hashicorp/go-retryablehttp v0.7.4
github.com/hashicorp/terraform-plugin-log v0.9.0
github.com/hashicorp/terraform-plugin-sdk/v2 v2.29.0
github.com/kelseyhightower/envconfig v1.4.0
github.com/pkg/errors v0.9.1
Expand Down Expand Up @@ -36,7 +37,6 @@ require (
github.com/hashicorp/terraform-exec v0.19.0 // indirect
github.com/hashicorp/terraform-json v0.17.1 // indirect
github.com/hashicorp/terraform-plugin-go v0.19.0 // indirect
github.com/hashicorp/terraform-plugin-log v0.9.0 // indirect
github.com/hashicorp/terraform-registry-address v0.2.2 // indirect
github.com/hashicorp/terraform-svchost v0.1.1 // indirect
github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d // indirect
Expand Down
14 changes: 14 additions & 0 deletions spacelift/internal/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,17 @@ func parseExtensions(ext map[string]interface{}) string {

return strings.Join(errorParts, ", ")
}

// AsError is an inline form of errors.As.
func AsError[TError error](err error) (TError, bool) {
var as TError
ok := errors.As(err, &as)
return as, ok
}

// IsErrorType reports whether or not the type of any error in err's chain matches
// the Error type.
func IsErrorType[TError error](err error) bool {
_, ok := AsError[TError](err)
return ok
}
207 changes: 204 additions & 3 deletions spacelift/resource_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@ package spacelift

import (
"context"
"fmt"
"slices"
"strings"
"time"

"github.com/hashicorp/terraform-plugin-log/tflog"
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/pkg/errors"
"github.com/shurcooL/graphql"

"github.com/spacelift-io/terraform-provider-spacelift/spacelift/internal"
Expand All @@ -20,6 +27,11 @@ func resourceRun() *schema.Resource {
CreateContext: resourceRunCreate,
ReadContext: schema.NoopContext,
Delete: schema.RemoveFromState,
UpdateContext: schema.NoopContext,

Timeouts: &schema.ResourceTimeout{
Create: schema.DefaultTimeout(30 * time.Minute),
},

Schema: map[string]*schema.Schema{
"stack_id": {
Expand Down Expand Up @@ -55,16 +67,144 @@ func resourceRun() *schema.Resource {
Type: schema.TypeString,
Computed: true,
},
"wait": {
Type: schema.TypeList,
Optional: true,
Description: "Wait for the run to finish",
MaxItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"disabled": {
Type: schema.TypeBool,
Description: "Whether waiting for a job is disabled or not. Default: `false`",
Optional: true,
Default: false,
},
"continue_on_state": {
Type: schema.TypeSet,
Elem: &schema.Schema{
Type: schema.TypeString,
},
Description: "Continue on the specified states of a finished run. If not specified, the default is `[ 'finished' ]`. You can use following states: `applying`, `canceled`, `confirmed`, `destroying`, `discarded`, `failed`, `finished`, `initializing`, `pending_review`, `performing`, `planning`, `preparing_apply`, `preparing_replan`, `preparing`, `queued`, `ready`, `replan_requested`, `skipped`, `stopped`, `unconfirmed`.",
Optional: true,
},
"continue_on_timeout": {
Type: schema.TypeBool,
Description: "Continue if run timed out, i.e. did not reach any defined end state in time. Default: `false`",
Optional: true,
Default: false,
},
},
},
},
},
}
}

type waitConfiguration struct {
disabled bool
continueOnState []string
continueOnTimeout bool
}

func expandWaitConfiguration(input []interface{}) *waitConfiguration {
if len(input) == 0 {
return nil
}
v := input[0].(map[string]interface{})
cfg := &waitConfiguration{
disabled: v["disabled"].(bool),
continueOnState: []string{},
continueOnTimeout: v["continue_on_timeout"].(bool),
}

if v, ok := v["continue_on_state"]; ok {
for _, item := range v.(*schema.Set).List() {
str, ok := item.(string)
if !ok {
panic(fmt.Sprintf("continue_on_state contains a non-string element %+v", str))
}
cfg.continueOnState = append(cfg.continueOnState, str)
}
}
if len(cfg.continueOnState) == 0 {
cfg.continueOnState = append(cfg.continueOnState, "finished")
}
return cfg
}

func (wait *waitConfiguration) Wait(ctx context.Context, d *schema.ResourceData, client *internal.Client, stackID, mutationID string) diag.Diagnostics {
if wait.disabled {
return nil
}

stateConf := &retry.StateChangeConf{
ContinuousTargetOccurence: 1,
Delay: 10 * time.Second,
MinTimeout: 10 * time.Second,
Pending: []string{
"running",
},
Target: []string{
"finished",
"unconfirmed", // Let's treat unconfirmed as the target state.
// It's not finished, but we don't want to wait for it because it requires confirmation from someone.
},
Refresh: checkStackStatusFunc(ctx, client, stackID, mutationID),
Timeout: d.Timeout(schema.TimeoutCreate),
}

finalState, err := stateConf.WaitForStateContext(ctx)
if err != nil {
if timeoutErr, ok := internal.AsError[*retry.TimeoutError](err); ok {
tflog.Debug(ctx, "received retry.TimeoutError from WaitForStateContext", map[string]any{
"stackID": stackID,
"runID": mutationID,
"lastState": timeoutErr.LastState,
"expectedState": timeoutErr.ExpectedState,
})
finalState = "__timeout__"
} else if err == context.DeadlineExceeded {
tflog.Debug(ctx, "received context.DeadlineExceeded from WaitForStateContext", map[string]any{
"stackID": stackID,
"runID": mutationID,
})
finalState = "__timeout__"
} else {
return diag.Errorf("failed waiting for run %s on stack %s to finish. error(%T): %+v ", mutationID, stackID, err, err)
}
}

switch finalState.(string) {
case "__timeout__":
if !wait.continueOnTimeout {
return diag.Errorf("run %s on stack %s has timed out", mutationID, stackID)
}
tflog.Info(ctx, "run timed out but continue_on_timeout=true",
map[string]any{
"stackID": stackID,
"runID": mutationID,
})
default:
if !slices.Contains[[]string](wait.continueOnState, finalState.(string)) {
return diag.Errorf("run %s on stack %s has ended with status %s. expected %v", mutationID, stackID, finalState, wait.continueOnState)
}
tflog.Debug(ctx, "run finished", map[string]any{
"stackID": stackID,
"runID": mutationID,
"finalState": finalState,
})
}

return nil
}

func resourceRunCreate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
var mutation struct {
ID string `graphql:"runResourceCreate(stack: $stack, commitSha: $sha, proposed: $proposed)"`
}

stackID := d.Get("stack_id")
stackID := d.Get("stack_id").(string)

variables := map[string]interface{}{
"stack": toID(stackID),
Expand All @@ -80,11 +220,72 @@ func resourceRunCreate(ctx context.Context, d *schema.ResourceData, meta interfa
variables["proposed"] = graphql.NewBoolean(graphql.Boolean(proposed.(bool)))
}

if err := meta.(*internal.Client).Mutate(ctx, "ResourceRunCreate", &mutation, variables); err != nil {
client := meta.(*internal.Client)
if err := client.Mutate(ctx, "ResourceRunCreate", &mutation, variables); err != nil {
return diag.Errorf("could not trigger run for stack %s: %v", stackID, internal.FromSpaceliftError(err))
}

d.SetId(mutation.ID)
if waitRaw, ok := d.GetOk("wait"); ok {
wait := expandWaitConfiguration(waitRaw.([]interface{}))
if diag := wait.Wait(ctx, d, client, stackID, mutation.ID); len(diag) > 0 {
return diag
}
}

d.SetId(mutation.ID)
return nil
}

func checkStackStatusFunc(ctx context.Context, client *internal.Client, stackID string, runID string) retry.StateRefreshFunc {
return func() (result any, state string, err error) {
// instead of a resource handle we return the current state as result
// Makes it easier to detect which end state has been reached.
// Otherwise we would need another GraphQL query
result, finished, err := getStackRunStateByID(ctx, client, stackID, runID)
if err != nil {
return
}
state = "running"
if finished {
state = "finished"
}
// Let's treat unconfirmed as the target state.
// It's not finished, but we don't want to wait for it because it requires confirmation from someone.
if result == "unconfirmed" {
state = "unconfirmed"
}
return
}
}

func getStackRunStateByID(ctx context.Context, client *internal.Client, stackID string, runID string) (string, bool, error) {
var query struct {
Stack struct {
RunResourceState struct {
ID graphql.String
State graphql.String
Finished graphql.Boolean
} `graphql:"runResourceState(id: $runId)"`
} `graphql:"stack(id: $stackId)"`
}

variables := map[string]interface{}{
"stackId": graphql.ID(stackID),
"runId": graphql.ID(runID),
}

if err := client.Query(ctx, "StackRunRead", &query, variables); err != nil {
return "", false, errors.Wrap(err, fmt.Sprintf("could not query for run %s of stack %s", runID, stackID))
}

rrs := query.Stack.RunResourceState

currentState := strings.ToLower(string(rrs.State))
tflog.Debug(ctx, "current state of run", map[string]interface{}{
"stackID": stackID,
"runID": runID,
"currentState": currentState,
"finished": rrs.Finished,
})
return currentState, bool(rrs.Finished), nil
}
Loading
Loading