diff --git a/ecs_deploy/cli.py b/ecs_deploy/cli.py index 679fe93..94cae82 100644 --- a/ecs_deploy/cli.py +++ b/ecs_deploy/cli.py @@ -155,13 +155,15 @@ def deploy(cluster, service, tag, image, command, health_check, cpu, memory, mem ignore_warnings=ignore_warnings, sleep_time=sleep_time ) - + if deployment.rollback: + slack.notify_failure(cluster, "Rollback", service=service) + exit(100) except TaskPlacementError as e: slack.notify_failure(cluster, str(e), service=service) if rollback: click.secho('%s\n' % str(e), fg='red', err=True) rollback_task_definition(deployment, td, new_td, sleep_time=sleep_time) - exit(1) + exit(100) else: raise @@ -532,21 +534,25 @@ def wait_for_finish(action, timeout, title, success_message, failure_message, waiting = True while waiting and datetime.now() < waiting_timeout: - click.secho('.', nl=False) service = action.get_service() - inspected_until = inspect_errors( + inspected_until = inspect_events( service=service, failure_message=failure_message, ignore_warnings=ignore_warnings, since=inspected_until, timeout=False ) + waiting = not action.is_deployed(service) + if action.primary_deployment_updated(service): + event_log(datetime.now().isoformat(), + action.primary_deployment.status_message, + 'WARNING' if 'rolling back' in action.primary_deployment.status_message else 'INFO') if waiting: sleep(sleep_time) - inspect_errors( + inspect_events( service=service, failure_message=failure_message, ignore_warnings=ignore_warnings, @@ -554,9 +560,12 @@ def wait_for_finish(action, timeout, title, success_message, failure_message, timeout=waiting ) - click.secho('\n%s' % success_message, fg='green') click.secho('Duration: %s sec\n' % (datetime.now() - start_timestamp).seconds) + if action.rollback: + click.secho('Deployment failed, but service has been rolled back to previous task definition', fg='red') + else: + click.secho('\n%s' % success_message, fg='green') def deploy_task_definition(deployment, task_definition, title, success_message, failure_message, timeout, deregister, @@ -669,38 +678,29 @@ def print_diff(task_definition, title='Updating task definition'): click.secho('') -def inspect_errors(service, failure_message, ignore_warnings, since, timeout): +def inspect_events(service, failure_message, ignore_warnings, since, timeout): error = False - last_error_timestamp = since - warnings = service.get_warnings(since) - for timestamp in warnings: - message = warnings[timestamp] - click.secho('') - if ignore_warnings: - last_error_timestamp = timestamp - click.secho( - '%s\nWARNING: %s' % (timestamp, message), - fg='yellow', - err=False - ) + last_event_timestamp = since + events = service.get_events(since) + for timestamp in events: + message = events[timestamp] + last_event_timestamp = timestamp + message_lower = message.lower() + + if 'unable' in message_lower: + error = False if ignore_warnings else True + level = 'ERROR' if error else 'WARNING' click.secho('Continuing.', nl=False) + event_log(timestamp, message, level) + elif 'rolling back' in message_lower: + event_log(timestamp, message, 'WARNING') else: - click.secho( - '%s\nERROR: %s\n' % (timestamp, message), - fg='red', - err=True - ) - error = True + event_log(timestamp, message, 'INFO') if service.older_errors: - click.secho('') - click.secho('Older errors', fg='yellow', err=True) + event_log(timestamp, 'Older errors', 'WARNING') for timestamp in service.older_errors: - click.secho( - text='%s\n%s\n' % (timestamp, service.older_errors[timestamp]), - fg='yellow', - err=True - ) + event_log(timestamp, service.older_errors[timestamp], 'WARNING') if timeout: error = True @@ -711,8 +711,24 @@ def inspect_errors(service, failure_message, ignore_warnings, since, timeout): if error: raise TaskPlacementError(failure_message) - return last_error_timestamp + return last_event_timestamp + +def event_log(timestamp, message, level): + """ + Helper function to display a message with consistent formatting and color coding. + """ + color_map = { + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red' + } + color = color_map.get(level, 'white') # Default to white if the level is unknown + click.secho( + f'{timestamp} {level}: {message}', + fg=color, + err=(level == 'ERROR') + ) ecs.add_command(deploy) ecs.add_command(scale) diff --git a/ecs_deploy/ecs.py b/ecs_deploy/ecs.py index 2fb4824..e36b2f6 100644 --- a/ecs_deploy/ecs.py +++ b/ecs_deploy/ecs.py @@ -205,6 +205,18 @@ class EcsDeployment(dict): ROLLOUT_STATE_FAILED = u'FAILED' ROLLOUT_STATE_COMPLETED = u'COMPLETED' + def __eq__(self, other): + return self.get(u'status') == other.get(u'status') and \ + other.get('runningCount') == self.get('runningCount') and \ + other.get('pendingCount') == self.get('pendingCount') and \ + other.get('failedTasks') == self.get('failedTasks') and \ + other.get('rolloutState') == self.get('rolloutState') and \ + other.get('rolloutStateReason') == self.get('rolloutStateReason') + + @property + def status_message(self): + return f"{self.get('rolloutState')} : {self.rollout_state_reason} Running: {self.get('runningCount')} / Pending: {self.get('pendingCount')} / Failed: {self.get('failedTasks')}" + @property def is_primary(self): return self.get(u'status') == self.STATUS_PRIMARY @@ -217,6 +229,10 @@ def is_active(self): def has_failed(self): return self.get(u'rolloutState') == self.ROLLOUT_STATE_FAILED + @property + def is_rollback(self): + return "rolling back" in self.get(u'rolloutStateReason') + @property def has_completed(self): return self.get(u'rolloutState') == self.ROLLOUT_STATE_COMPLETED @@ -298,15 +314,17 @@ def older_errors(self): ) def get_warnings(self, since=None, until=None): + events = self.get_events(since, until) + return {k: v for k, v in events.items() if 'unable' in v} + + def get_events(self, since=None, until=None): since = since or self.deployment_created_at until = until or datetime.now(tz=tzlocal()) - errors = {} + events = {} for event in self.get(u'events'): - if u'unable' not in event[u'message']: - continue if since < event[u'createdAt'] < until: - errors[event[u'createdAt']] = event[u'message'] - return errors + events[event[u'createdAt']] = event[u'message'] + return events class EcsTaskDefinition(object): @@ -1308,6 +1326,9 @@ def __init__(self, client, cluster_name, service_name): try: if service_name: self._service = self.get_service() + self.primary_deployment = self._service.primary_deployment + self.active_deployment = self._service.active_deployment + self.rollback = False except IndexError: raise EcsConnectionError( u'An error occurred when calling the DescribeServices ' @@ -1372,8 +1393,23 @@ def update_service(self, service, desired_count=None): task_definition=service.task_definition ) return EcsService(self._cluster_name, response[u'service']) + + def primary_deployment_updated(self, service): + if service.primary_deployment != self.primary_deployment: + self.primary_deployment = service.primary_deployment + return True + + def active_deployment_updated(self, service): + if service.active_deployment != self.active_deployment: + self.active_deployment = service.active_deployment + return True + def deployment_status_updated(self, service): + return self.primary_deployment_updated(service) or self.active_deployment_updated(service) + def is_deployed(self, service): + if service.primary_deployment.is_rollback: + self.rollback = True if service.primary_deployment.has_failed: raise EcsDeploymentError(u'Deployment Failed! ' + service.primary_deployment.rollout_state_reason) if service.primary_deployment.failed_tasks > 0 and service.primary_deployment.failed_tasks != self.FAILED_TASKS: @@ -1392,7 +1428,7 @@ def is_deployed(self, service): service=service, task_arns=running_tasks[u'taskArns'] ) - return service.desired_count == running_count + return service.desired_count == running_count and service.primary_deployment.has_completed def get_running_tasks_count(self, service, task_arns): running_count = 0 diff --git a/tests/test_cli.py b/tests/test_cli.py index 96bff7b..3d04021 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -81,7 +81,7 @@ def test_deploy_with_rollback(get_client, runner): get_client.return_value = EcsTestClient('acces_key', 'secret_key', wait=2) result = runner.invoke(cli.deploy, (CLUSTER_NAME, SERVICE_NAME, '--timeout=1', '--rollback')) - assert result.exit_code == 1 + assert result.exit_code == 100 assert result.exception assert u"Deploying based on task definition: test-task:1" in result.output @@ -781,7 +781,6 @@ def test_deploy_with_wait_within_timeout(get_client, runner): result = runner.invoke(cli.deploy, (CLUSTER_NAME, SERVICE_NAME, '--timeout', '10')) assert result.exit_code == 0 assert u'Deploying new task definition' in result.output - assert u'...' in result.output @patch('ecs_deploy.cli.get_client')