diff --git a/README.md b/README.md index 549ac34..aa4591c 100644 --- a/README.md +++ b/README.md @@ -44,11 +44,11 @@ with open('foo.tf', 'r') as file: ### Parse Tree to HCL2 reconstruction -With version 5.0.0 the possibility of HCL2 reconstruction from Lark Parse Tree was introduced. +With version 5.x the possibility of HCL2 reconstruction from the Lark Parse Tree and Python dictionaries directly was introduced. -Example of manipulating Lark Parse Tree and reconstructing it back into valid HCL2 can be found in [tree-to-hcl2-reconstruction.md](https://github.com/amplify-education/python-hcl2/blob/main/tree-to-hcl2-reconstruction.md) file. +Documentation and an example of manipulating Lark Parse Tree and reconstructing it back into valid HCL2 can be found in [tree-to-hcl2-reconstruction.md](https://github.com/amplify-education/python-hcl2/blob/main/tree-to-hcl2-reconstruction.md) file. -More details about reconstruction implementation can be found in this [PR](https://github.com/amplify-education/python-hcl2/pull/169). +More details about reconstruction implementation can be found in PRs #169 and #177. ## Building From Source diff --git a/hcl2/__init__.py b/hcl2/__init__.py index f56787b..69404cf 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -5,4 +5,14 @@ except ImportError: __version__ = "unknown" -from .api import load, loads, parse, parses, transform, writes, AST +from .api import ( + load, + loads, + parse, + parses, + transform, + reverse_transform, + writes, + AST, + Builder, +) diff --git a/hcl2/api.py b/hcl2/api.py index 4a3ec10..ddb40e9 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -4,6 +4,7 @@ from lark.tree import Tree as AST from hcl2.parser import hcl2 from hcl2.transformer import DictTransformer +from hcl2.builder import Builder def load(file: TextIO, with_meta=False) -> dict: @@ -56,6 +57,19 @@ def transform(ast: AST, with_meta=False) -> dict: return DictTransformer(with_meta=with_meta).transform(ast) +def reverse_transform(hcl2_dict: dict) -> AST: + """Convert a dictionary to an HCL2 AST. + :param dict: a dictionary produced by `load` or `transform` + """ + # defer this import until this method is called, due to the performance hit + # of rebuilding the grammar without cache + from hcl2.reconstructor import ( # pylint: disable=import-outside-toplevel + hcl2_reverse_transformer, + ) + + return hcl2_reverse_transformer.transform(hcl2_dict) + + def writes(ast: AST) -> str: """Convert an HCL2 syntax tree to a string. :param ast: HCL2 syntax tree, output from `parse` or `parses` diff --git a/hcl2/builder.py b/hcl2/builder.py new file mode 100644 index 0000000..5ef0c48 --- /dev/null +++ b/hcl2/builder.py @@ -0,0 +1,53 @@ +"""A utility class for constructing HCL documents from Python code.""" + +from typing import List +from typing_extensions import Self + + +class Builder: + def __init__(self, attributes: dict = {}): + self.blocks = {} + self.attributes = attributes + + def block( + self, block_type: str, labels: List[str] = [], **attributes: dict + ) -> Self: + """Create a block within this HCL document.""" + block = Builder(attributes) + + # initialize a holder for blocks of that type + if block_type not in self.blocks: + self.blocks[block_type] = [] + + # store the block in the document + self.blocks[block_type].append((labels.copy(), block)) + + return block + + def build(self): + """Return the Python dictionary for this HCL document.""" + body = { + "__start_line__": -1, + "__end_line__": -1, + **self.attributes, + } + + for block_type, blocks in self.blocks.items(): + + # initialize a holder for blocks of that type + if block_type not in body: + body[block_type] = [] + + for labels, block_builder in blocks: + # build the sub-block + block = block_builder.build() + + # apply any labels + labels.reverse() + for label in labels: + block = {label: block} + + # store it in the body + body[block_type].append(block) + + return body diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index d26acc1..0349471 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -1,7 +1,7 @@ start : body body : (new_line_or_comment? (attribute | block))* new_line_or_comment? attribute : identifier EQ expression -block : identifier (identifier | STRING_LIT)* new_line_or_comment? "{" body "}" +block : identifier (identifier | STRING_LIT | string_with_interpolation)* new_line_or_comment? "{" body "}" new_line_and_or_comma: new_line_or_comment | "," | "," new_line_or_comment new_line_or_comment: ( NL_OR_COMMENT )+ NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ @@ -22,12 +22,26 @@ conditional : expression "?" new_line_or_comment? expression new_line_or_comment binary_op : expression binary_term new_line_or_comment? !binary_operator : BINARY_OP binary_term : binary_operator new_line_or_comment? expression -BINARY_OP : "==" | "!=" | "<" | ">" | "<=" | ">=" | "-" | "*" | "/" | "%" | "&&" | "||" | "+" +BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +DOUBLE_EQ : "==" +NEQ : "!=" +LT : "<" +GT : ">" +LEQ : "<=" +GEQ : ">=" +MINUS : "-" +ASTERISK : "*" +SLASH : "/" +PERCENT : "%" +DOUBLE_AMP : "&&" +DOUBLE_PIPE : "||" +PLUS : "+" expr_term : "(" new_line_or_comment? expression new_line_or_comment? ")" | float_lit | int_lit | STRING_LIT + | string_with_interpolation | tuple | object | function_call @@ -42,11 +56,10 @@ expr_term : "(" new_line_or_comment? expression new_line_or_comment? ")" | for_tuple_expr | for_object_expr - -STRING_LIT : "\"" (STRING_CHARS | INTERPOLATION)* "\"" -STRING_CHARS : /(?:(?!\${)([^"\\]|\\.))+/+ // any character except '"" unless inside a interpolation string -NESTED_INTERPOLATION : "${" /[^}]+/ "}" -INTERPOLATION : "${" (/(?:(?!\${)([^}]))+/ | NESTED_INTERPOLATION)+ "}" +STRING_LIT : "\"" (STRING_CHARS)* "\"" +STRING_CHARS : /(?:(?!\${)([^"\\]|\\.))+/+ // any character except '"' unless inside a interpolation string +string_with_interpolation: "\"" (STRING_CHARS)* interpolation_maybe_nested (STRING_CHARS | interpolation_maybe_nested)* "\"" +interpolation_maybe_nested: "${" expression "}" int_lit : DECIMAL+ !float_lit: DECIMAL+ "." DECIMAL+ (EXP_MARK DECIMAL+)? @@ -77,8 +90,9 @@ get_attr : "." identifier attr_splat : ".*" get_attr* full_splat : "[*]" (get_attr | index)* +FOR_OBJECT_ARROW : "=>" !for_tuple_expr : "[" new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? "]" -!for_object_expr : "{" new_line_or_comment? for_intro new_line_or_comment? expression "=>" new_line_or_comment? expression "..."? new_line_or_comment? for_cond? new_line_or_comment? "}" +!for_object_expr : "{" new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression "..."? new_line_or_comment? for_cond? new_line_or_comment? "}" !for_intro : "for" new_line_or_comment? identifier ("," identifier new_line_or_comment?)? new_line_or_comment? "in" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? !for_cond : "if" new_line_or_comment? expression diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index f6bbb9b..b2b57b8 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,8 +1,15 @@ """A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality""" -from lark import Lark -from lark.reconstruct import Reconstructor -from lark.utils import is_id_continue +import re +import json +from typing import List, Dict, Callable, Optional + +from lark import Lark, Tree +from lark.grammar import Terminal, NonTerminal, Symbol +from lark.lexer import Token, PatternStr, TerminalDef +from lark.reconstruct import Reconstructor, is_iter_empty +from lark.tree_matcher import is_discarded_terminal +from lark.visitors import Transformer_InPlace # this is duplicated from `parser` because we need different options here for # the reconstructor. please make sure changes are kept in sync between the two @@ -20,143 +27,632 @@ maybe_placeholders=False, # Needed for reconstruction ) -CHAR_SPACE_AFTER = set(',~@<>="|?)]:') -CHAR_SPACE_BEFORE = (CHAR_SPACE_AFTER - set(",=")) | set("'") -KEYWORDS_SPACE_AFTER = [ - "if", - "in", - "for", - "for_each", - "==", - "!=", - "<", - ">", - "<=", - ">=", - "-", - "*", - "/", - "%", - "&&", - "||", - "+", -] -KEYWORDS_SPACE_BEFORE = KEYWORDS_SPACE_AFTER -DIGITS = set("0123456789") -NEVER_SPACE_AFTER = set("[(") -NEVER_SPACE_BEFORE = set("]),.") -NEVER_COMMA_BEFORE = set("])}") -# characters that are OK to come right after an identifier with no space between -IDENT_NO_SPACE = set("()[]") - - -def _add_extra_space(prev_item, item): - # pylint: disable=too-many-boolean-expressions, too-many-return-statements - - ##### the scenarios where explicitly disallow spaces: ##### - - # if we already have a space, don't add another - if prev_item[-1].isspace() or item[0].isspace(): - return False - # none of the following should be separated by spaces: - # - groups of digits - # - namespaced::function::calls - # - characters within an identifier like array[0]() - if ( - (prev_item[-1] in DIGITS and item[0] in DIGITS) - or item == "::" - or prev_item == "::" - or (prev_item[-1] in IDENT_NO_SPACE and item[0] in IDENT_NO_SPACE) +# function to remove the backslashes within interpolated portions +def reverse_quotes_within_interpolation(interp_s: str) -> str: + """ + A common operation is to `json.dumps(s)` where s is a string to output in + HCL. This is useful for automatically escaping any quotes within the + string, but this escapes quotes within interpolation incorrectly. This + method removes any erroneous escapes within interpolated segments of a + string. + """ + return re.sub(r"\$\{(.*)\}", lambda m: m.group(0).replace('\\"', '"'), interp_s) + + +class WriteTokensAndMetaTransformer(Transformer_InPlace): + """ + Inserts discarded tokens into their correct place, according to the rules + of grammar, and annotates with metadata during reassembly. The metadata + tracked here include the terminal which generated a particular string + output, and the rule that that terminal was matched on. + + This is a modification of lark.reconstruct.WriteTokensTransformer + """ + + tokens: Dict[str, TerminalDef] + term_subs: Dict[str, Callable[[Symbol], str]] + + def __init__( + self, + tokens: Dict[str, TerminalDef], + term_subs: Dict[str, Callable[[Symbol], str]], + ) -> None: + self.tokens = tokens + self.term_subs = term_subs + + def __default__(self, data, children, meta): + """ + This method is called for every token the transformer visits. + """ + + if not getattr(meta, "match_tree", False): + return Tree(data, children) + iter_args = iter( + [child[2] if isinstance(child, tuple) else child for child in children] + ) + to_write = [] + for sym in meta.orig_expansion: + if is_discarded_terminal(sym): + try: + value = self.term_subs[sym.name](sym) + except KeyError as exc: + token = self.tokens[sym.name] + if not isinstance(token.pattern, PatternStr): + raise NotImplementedError( + f"Reconstructing regexps not supported yet: {token}" + ) from exc + + value = token.pattern.value + + # annotate the leaf with the specific rule (data) and terminal + # (sym) it was generated from + to_write.append((data, sym, value)) + else: + item = next(iter_args) + if isinstance(item, list): + to_write += item + else: + if isinstance(item, Token): + assert Terminal(item.type) == sym, item + # annotate the leaf with the specific rule (data) and + # terminal (sym) it was generated from + to_write.append((data, sym, item)) + else: + assert NonTerminal(item.data) == sym, (sym, item) + to_write.append(item) + + assert is_iter_empty(iter_args) + return to_write + + +class HCLReconstructor(Reconstructor): + """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + + # these variables track state during reconstuction to enable us to make + # informed decisions about formatting our output. They are primarily used + # by the _should_add_space(...) method. + last_char_space = True + last_terminal = None + last_rule = None + deferred_item = None + + def __init__( + self, + parser: Lark, + term_subs: Optional[Dict[str, Callable[[Symbol], str]]] = None, ): - return False + Reconstructor.__init__(self, parser, term_subs) + + self.write_tokens = WriteTokensAndMetaTransformer( + {token.name: token for token in self.tokens}, term_subs or {} + ) - # specific characters are also blocklisted from having spaces - if prev_item[-1] in NEVER_SPACE_AFTER or item[0] in NEVER_SPACE_BEFORE: + # space around these terminals if they're within for or if statements + FOR_IF_KEYWORDS = [ + Terminal("IF"), + Terminal("IN"), + Terminal("FOR"), + Terminal("FOR_EACH"), + Terminal("FOR_OBJECT_ARROW"), + Terminal("COLON"), + ] + + # space on both sides, in ternaries and binary operators + BINARY_OPS = [ + Terminal("QMARK"), + Terminal("COLON"), + Terminal("BINARY_OP"), + ] + + # pylint: disable=too-many-branches, too-many-return-statements + def _should_add_space(self, rule, current_terminal): + """ + This method documents the situations in which we add space around + certain tokens while reconstructing the generated HCL. + + Additional rules can be added here if the generated HCL has + improper whitespace (affecting parse OR affecting ability to perfectly + reconstruct a file down to the whitespace level.) + + It has the following information available to make its decision: + + - the last token (terminal) we output + - the last rule that token belonged to + - the current token (terminal) we're about to output + - the rule the current token belongs to + + This should be sufficient to make a spacing decision. + """ + # we don't need to add multiple spaces + if self.last_char_space: + return False + + # we don't add a space at the start of the file + if not self.last_terminal or not self.last_rule: + return False + + # always add a space after the equals sign in an attribute + if ( + isinstance(self.last_rule, Token) + and self.last_rule.value == "attribute" + and self.last_terminal == Terminal("EQ") + and current_terminal != Terminal("NL_OR_COMMENT") + ): + return True + + # always add a space after the equals sign in an object + if ( + isinstance(self.last_rule, Token) + and self.last_rule.value == "object_elem" + and self.last_terminal == Terminal("EQ") + and current_terminal != Terminal("NL_OR_COMMENT") + ): + return True + + # if we're in a ternary or binary operator, add space around the operator + if ( + isinstance(rule, Token) + and rule.value + in [ + "conditional", + "binary_operator", + ] + and current_terminal in self.BINARY_OPS + ): + return True + + # if we just left a ternary or binary operator, add space around the + # operator unless there's a newline already + if ( + isinstance(self.last_rule, Token) + and self.last_rule.value + in [ + "conditional", + "binary_operator", + ] + and self.last_terminal in self.BINARY_OPS + and current_terminal != Terminal("NL_OR_COMMENT") + ): + return True + + # if we're in a for or if statement and find a keyword, add a space + if ( + isinstance(rule, Token) + and rule.value + in [ + "for_object_expr", + "for_cond", + "for_intro", + ] + and current_terminal in self.FOR_IF_KEYWORDS + ): + return True + + # if we've just left a for or if statement and find a keyword, add a + # space, unless we have a newline + if ( + isinstance(self.last_rule, Token) + and self.last_rule.value + in [ + "for_object_expr", + "for_cond", + "for_intro", + ] + and self.last_terminal in self.FOR_IF_KEYWORDS + and current_terminal != Terminal("NL_OR_COMMENT") + ): + return True + + # if we're in a block + if (isinstance(rule, Token) and rule.value == "block") or ( + isinstance(rule, str) and re.match(r"^__block_(star|plus)_.*", rule) + ): + # always add space before the starting brace + if current_terminal == Terminal("LBRACE"): + return True + + # always add space before the closing brace + if current_terminal == Terminal( + "RBRACE" + ) and self.last_terminal != Terminal("LBRACE"): + return True + + # always add space between string literals + if current_terminal == Terminal("STRING_LIT"): + return True + + # if we just opened a block, add a space, unless the block is empty + # or has a newline + if ( + isinstance(self.last_rule, Token) + and self.last_rule.value == "block" + and self.last_terminal == Terminal("LBRACE") + and current_terminal not in [Terminal("RBRACE"), Terminal("NL_OR_COMMENT")] + ): + return True + + # if we're in a tuple or function arguments (this rule matches commas between items) + if isinstance(self.last_rule, str) and re.match( + r"^__(tuple|arguments)_(star|plus)_.*", self.last_rule + ): + + # string literals, decimals, and identifiers should always be + # preceeded by a space if they're following a comma in a tuple or + # function arg + if current_terminal in [ + Terminal("STRING_LIT"), + Terminal("DECIMAL"), + Terminal("NAME"), + ]: + return True + + # the catch-all case, we're not sure, so don't add a space return False - ##### the scenarios where we add spaces: ##### + def _reconstruct(self, tree): + unreduced_tree = self.match_tree(tree, tree.data) + res = self.write_tokens.transform(unreduced_tree) + for item in res: + # any time we encounter a child tree, we recurse + if isinstance(item, Tree): + yield from self._reconstruct(item) - # scenario 1, the prev token ended with an identifier character - # and the next character is not an "IDENT_NO_SPACE" character - if is_id_continue(prev_item[-1]) and not item[0] in IDENT_NO_SPACE: - return True + # every leaf should be a tuple, which contains information about + # which terminal the leaf represents + elif isinstance(item, tuple): + rule, terminal, value = item + + # first, handle any deferred items + if self.deferred_item is not None: + deferred_rule, deferred_terminal, deferred_value = ( + self.deferred_item + ) + + # if we deferred a comma and the next character ends a + # parenthesis or block, we can throw it out + if deferred_terminal == Terminal("COMMA") and terminal in [ + Terminal("RPAR"), + Terminal("RBRACE"), + ]: + pass + # in any other case, we print the deferred item + else: + yield deferred_value + + # and do our bookkeeping + self.last_terminal = deferred_terminal + self.last_rule = deferred_rule + if deferred_value and not deferred_value[-1].isspace(): + self.last_char_space = False + + # clear the deferred item + self.deferred_item = None + + # potentially add a space before the next token + if self._should_add_space(rule, terminal): + yield " " + self.last_char_space = True + + # potentially defer the item if needs to be + if terminal in [Terminal("COMMA")]: + self.deferred_item = item + else: + # otherwise print the next token + yield value + + # and do our bookkeeping so we can make an informed + # decision about formatting next time + self.last_terminal = terminal + self.last_rule = rule + if value: + self.last_char_space = value[-1].isspace() + + else: + raise RuntimeError(f"Unknown bare token type: {item}") + + def reconstruct(self, tree, postproc=None, insert_spaces=False): + """Convert a Lark.Tree AST back into a string representation of HCL.""" + return Reconstructor.reconstruct( + self, + tree, + postproc, + insert_spaces, + ) + + +class HCLReverseTransformer: + """ + The reverse of hcl2.transformer.DictTransformer. This method attempts to + convert a dict back into a working AST, which can be written back out. + """ + def __init__(self): + pass + + def transform(self, hcl_dict: dict) -> Tree: + """Given a dict, return a Lark.Tree representing the HCL AST.""" + level = 0 + body = self._transform_dict_to_body(hcl_dict, level) + start = Tree(Token("RULE", "start"), [body]) + return start + + def _newline(self, level: int, comma: bool = False, count: int = 1) -> Tree: + # some rules expect the `new_line_and_or_comma` token + if comma: + return Tree( + Token("RULE", "new_line_and_or_comma"), + [self._newline(level=level, comma=False, count=count)], + ) + + # otherwise, return the `new_line_or_comment` token + return Tree( + Token("RULE", "new_line_or_comment"), + [Token("NL_OR_COMMENT", f"\n{' ' * level}") for _ in range(count)], + ) + + # rules: the value of a block is always an array of dicts, + # the key is the block type + def _list_is_a_block(self, value: list) -> bool: + for obj in value: + if not self._dict_is_a_block(obj): + return False - # scenario 2, the prev token or the next token should be followed by a space - if ( - prev_item[-1] in CHAR_SPACE_AFTER - or prev_item in KEYWORDS_SPACE_AFTER - or item[0] in CHAR_SPACE_BEFORE - or item in KEYWORDS_SPACE_BEFORE - ): return True - # scenario 3, the previous token was a block opening brace and - # the next token is not a closing brace (so the block is on one - # line and not empty) - if prev_item[-1] == "{" and item[0] != "}": + def _dict_is_a_block(self, sub_obj: any) -> bool: + # if the list doesn't contain dictionaries, it's not a block + if not isinstance(sub_obj, dict): + return False + + # if the sub object has "start_line" and "end_line" metadata, + # the block itself is unlabeled, but it is a block + if "__start_line__" in sub_obj.keys() or "__end_line__" in sub_obj.keys(): + return True + + # if the objects in the array have no metadata and more than 2 keys and + # no metadata, it's just an array of objects, not a block + if len(list(sub_obj)) != 1: + return False + + # if the sub object has a single string key whose value is an object, + # it _could_ be a labeled block... but we'd have to check if the sub + # object is a block (recurse) + label = list(sub_obj)[0] + sub_sub_obj = sub_obj[label] + if self._dict_is_a_block(sub_sub_obj): + return True + + # if the objects in the array have a single key whose child is not a + # block, the array is just an array of objects, not a block + return False + + def _block_has_label(self, block: dict) -> bool: + return len(block.keys()) == 1 + + def _calculate_block_labels(self, block: dict) -> List[str]: + # if b doesn't have a label + if len(block.keys()) != 1: + return ([], block) + + # otherwise, find the label + curr_label = list(block)[0] + potential_body = block[curr_label] + + # __start_line__ and __end_line__ metadata are not labels + if ( + "__start_line__" in potential_body.keys() + or "__end_line__" in potential_body.keys() + ): + return ([curr_label], potential_body) + + # recurse and append the label + next_label, block_body = self._calculate_block_labels(potential_body) + return ([curr_label] + next_label, block_body) + + def _is_string_wrapped_tf(self, interp_s: str) -> bool: + """ + Determines whether a string is a complex HCL datastructure + wrapped in ${ interpolation } characters. + """ + if not interp_s.startswith("${") or not interp_s.endswith("}"): + return False + + nested_tokens = [] + for match in re.finditer(r"\$?\{|\}", interp_s): + if match.group(0) in ["${", "{"]: + nested_tokens.append(match.group(0)) + elif match.group(0) == "}": + nested_tokens.pop() + + # if we exit ${ interpolation } before the end of the string, + # this interpolated string has string parts and can't represent + # a valid HCL expression on its own (without quotes) + if len(nested_tokens) == 0 and match.end() != len(interp_s): + return False + return True - ##### otherwise, we don't add a space ##### - return False + def _name_to_identifier(self, name: str) -> Tree: + """Converts a string to a NAME token within an identifier rule.""" + return Tree(Token("RULE", "identifier"), [Token("NAME", name)]) + def _escape_interpolated_str(self, interp_s: str) -> str: + # begin by doing basic JSON string escaping, to add backslashes + interp_s = json.dumps(interp_s) -def _postprocess_reconstruct(items): - """ - Postprocess the stream of tokens derived from the AST during reconstruction. + # find each interpolation within the string and remove the backslashes + interp_s = reverse_quotes_within_interpolation(interp_s) + return interp_s - For HCL2, this is used exclusively for adding whitespace in the right locations. - """ - prev_item = "" - for item in items: - # first, handle any deferred tokens - if isinstance(prev_item, tuple) and prev_item[0] == "_deferred": - prev_item = prev_item[1] - - # if the deferred token was a comma, see if we're ending a block - if prev_item == ",": - if item[0] not in NEVER_COMMA_BEFORE: - yield prev_item + def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> List[Tree]: + # we add a newline at the top of a body within a block, not the root body + # >2 here is to ignore the __start_line__ and __end_line__ metadata + if level > 0 and len(hcl_dict) > 2: + children = [self._newline(level)] + else: + children = [] + + # iterate thru each attribute or sub-block of this block + for key, value in hcl_dict.items(): + if key in ["__start_line__", "__end_line__"]: + continue + + # construct the identifier, whether that be a block type name or an attribute key + identifier_name = self._name_to_identifier(key) + + # first, check whether the value is a "block" + if isinstance(value, list) and self._list_is_a_block(value): + for block_v in value: + block_labels, block_body_dict = self._calculate_block_labels( + block_v + ) + block_label_tokens = [ + Token("STRING_LIT", f'"{block_label}"') + for block_label in block_labels + ] + block_body = self._transform_dict_to_body( + block_body_dict, level + 1 + ) + + # create our actual block to add to our own body + block = Tree( + Token("RULE", "block"), + [identifier_name] + block_label_tokens + [block_body], + ) + children.append(block) + children.append(self._newline(level, count=2)) + + # if the value isn't a block, it's an attribute else: - yield prev_item + expr_term = self._transform_value_to_expr_term(value, level) + attribute = Tree( + Token("RULE", "attribute"), + [identifier_name, Token("EQ", " ="), expr_term], + ) + children.append(attribute) + children.append(self._newline(level)) - # if we're between two tokens, determine if we need to add an extra space - # we need the previous item and the current item to exist to evaluate these rules - if prev_item and item and _add_extra_space(prev_item, item): - yield " " + # since we're leaving a block body here, reduce the indentation of the + # final newline if it exists + if ( + len(children) > 0 + and isinstance(children[-1], Tree) + and children[-1].data.type == "RULE" + and children[-1].data.value == "new_line_or_comment" + ): + children[-1] = self._newline(level - 1) - # in some cases, we may want to defer printing the next token - defer_item = False + return Tree(Token("RULE", "body"), children) - # prevent the inclusion of extra commas if they are not intended - if item[0] == ",": - item = ("_deferred", item) - defer_item = True + # pylint: disable=too-many-branches, too-many-return-statements + def _transform_value_to_expr_term(self, value, level) -> Token: + """Transforms a value from a dictionary into an "expr_term" (a value in HCL2) - # print the actual token - if not defer_item: - yield item + Anything passed to this function is treated "naively". Any lists passed + are assumed to be tuples, and any dicts passed are assumed to be objects. + No more checks will be performed for either to see if they are "blocks" + as ehis check happens in `_transform_dict_to_body`. + """ - # store the previous item for the next token - prev_item = item + # for lists, recursively turn the child elements into expr_terms and + # store within a tuple + if isinstance(value, list): + tuple_tree = Tree( + Token("RULE", "tuple"), + [ + self._transform_value_to_expr_term(tuple_v, level) + for tuple_v in value + ], + ) + return Tree(Token("RULE", "expr_term"), [tuple_tree]) - # if the last token was deferred, print it before continuing - if isinstance(prev_item, tuple) and prev_item[0] == "_deferred": - yield prev_item[1] + # for dicts, recursively turn the child k/v pairs into object elements + # and store within an object + if isinstance(value, dict): + elems = [] + # if the object has elements, put it on a newline + if len(value) > 0: + elems.append(self._newline(level + 1)) -class HCLReconstructor: - """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - def __init__(self, parser): - self._recons = Reconstructor(parser) + # iterate thru the items and add them to the object + for i, (k, dict_v) in enumerate(value.items()): + if k in ["__start_line__", "__end_line__"]: + continue + identifier = self._name_to_identifier(k) + value_expr_term = self._transform_value_to_expr_term(dict_v, level + 1) + elems.append( + Tree( + Token("RULE", "object_elem"), + [identifier, Token("EQ", " ="), value_expr_term], + ) + ) - def reconstruct(self, tree): - """Convert a Lark.Tree AST back into a string representation of HCL.""" - return self._recons.reconstruct( - tree, - _postprocess_reconstruct, - insert_spaces=False, - ) + # add indentation appropriately + if i < len(value) - 1: + elems.append(self._newline(level + 1, comma=True)) + else: + elems.append(self._newline(level, comma=True)) + return Tree( + Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elems)] + ) + + # treat booleans appropriately + if isinstance(value, bool): + return Tree( + Token("RULE", "expr_term"), + [ + Tree( + Token("RULE", "identifier"), + [Token("NAME", "true" if value else "false")], + ) + ], + ) + + # store integers as literals, digit by digit + if isinstance(value, int): + return Tree( + Token("RULE", "expr_term"), + [ + Tree( + Token("RULE", "int_lit"), + [Token("DECIMAL", digit) for digit in str(value)], + ) + ], + ) + + # store strings as single literals + if isinstance(value, str): + # potentially unpack a complex syntax structure + if self._is_string_wrapped_tf(value): + # we have to unpack it by parsing it + wrapped_value = re.match(r"\$\{(.*)\}", value).group(1) + ast = hcl2.parse(f"value = {wrapped_value}") + + assert ast.data == Token("RULE", "start") + body = ast.children[0] + assert body.data == Token("RULE", "body") + attribute = body.children[0] + assert attribute.data == Token("RULE", "attribute") + assert attribute.children[1] == Token("EQ", " =") + parsed_value = attribute.children[2] + assert isinstance(parsed_value, Tree) + + if parsed_value.data == Token("RULE", "expr_term"): + return parsed_value + + # wrap other types of syntax as an expression (in parenthesis) + return Tree(Token("RULE", "expr_term"), [parsed_value]) + + # otherwise it's just a string. + return Tree( + Token("RULE", "expr_term"), + [Token("STRING_LIT", self._escape_interpolated_str(value))], + ) + + # otherwise, we don't know the type + raise RuntimeError(f"Unknown type to transform {type(value)}") hcl2_reconstructor = HCLReconstructor(hcl2) +hcl2_reverse_transformer = HCLReverseTransformer() diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 866cef2..120564d 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -1,4 +1,5 @@ """A Lark Transformer for transforming a Lark parse tree into a Python dict""" +import json import re import sys from collections import namedtuple @@ -7,6 +8,8 @@ from lark.tree import Meta from lark.visitors import Transformer, Discard, _DiscardType, v_args +from .reconstructor import reverse_quotes_within_interpolation + HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) @@ -36,10 +39,10 @@ def __init__(self, with_meta: bool = False): super().__init__() def float_lit(self, args: List) -> float: - return float("".join([str(arg) for arg in args])) + return float("".join([self.to_tf_inline(arg) for arg in args])) def int_lit(self, args: List) -> int: - return int("".join([str(arg) for arg in args])) + return int("".join([self.to_tf_inline(arg) for arg in args])) def expr_term(self, args: List) -> Any: args = self.strip_new_line_tokens(args) @@ -76,14 +79,14 @@ def attr_splat_expr_term(self, args: List) -> str: return f"{args[0]}{args[1]}" def attr_splat(self, args: List) -> str: - args_str = "".join(str(arg) for arg in args) + args_str = "".join(self.to_tf_inline(arg) for arg in args) return f".*{args_str}" def full_splat_expr_term(self, args: List) -> str: return f"{args[0]}{args[1]}" def full_splat(self, args: List) -> str: - args_str = "".join(str(arg) for arg in args) + args_str = "".join(self.to_tf_inline(arg) for arg in args) return f"[*]{args_str}" def tuple(self, args: List) -> List: @@ -111,14 +114,18 @@ def function_call(self, args: List) -> str: args = self.strip_new_line_tokens(args) args_str = "" if len(args) > 1: - args_str = ", ".join([str(arg) for arg in args[1] if arg is not Discard]) + args_str = ", ".join( + [self.to_tf_inline(arg) for arg in args[1] if arg is not Discard] + ) return f"{args[0]}({args_str})" def provider_function_call(self, args: List) -> str: args = self.strip_new_line_tokens(args) args_str = "" if len(args) > 5: - args_str = ", ".join([str(arg) for arg in args[5] if arg is not Discard]) + args_str = ", ".join( + [self.to_tf_inline(arg) for arg in args[5] if arg is not Discard] + ) provider_func = "::".join([args[0], args[2], args[4]]) return f"{provider_func}({args_str})" @@ -159,14 +166,14 @@ def conditional(self, args: List) -> str: return f"{args[0]} ? {args[1]} : {args[2]}" def binary_op(self, args: List) -> str: - return " ".join([str(arg) for arg in args]) + return " ".join([self.to_tf_inline(arg) for arg in args]) def unary_op(self, args: List) -> str: - return "".join([str(arg) for arg in args]) + return "".join([self.to_tf_inline(arg) for arg in args]) def binary_term(self, args: List) -> str: args = self.strip_new_line_tokens(args) - return " ".join([str(arg) for arg in args]) + return " ".join([self.to_tf_inline(arg) for arg in args]) def body(self, args: List) -> Dict[str, List]: # See https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#bodies @@ -251,25 +258,32 @@ def new_line_or_comment(self, args: List) -> _DiscardType: def for_tuple_expr(self, args: List) -> str: args = self.strip_new_line_tokens(args) - for_expr = " ".join([str(arg) for arg in args[1:-1]]) + for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) return f"[{for_expr}]" def for_intro(self, args: List) -> str: args = self.strip_new_line_tokens(args) - return " ".join([str(arg) for arg in args]) + return " ".join([self.to_tf_inline(arg) for arg in args]) def for_cond(self, args: List) -> str: args = self.strip_new_line_tokens(args) - return " ".join([str(arg) for arg in args]) + return " ".join([self.to_tf_inline(arg) for arg in args]) def for_object_expr(self, args: List) -> str: args = self.strip_new_line_tokens(args) - for_expr = " ".join([str(arg) for arg in args[1:-1]]) + for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) # doubled curly braces stands for inlining the braces # and the third pair of braces is for the interpolation # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}" return f"{{{for_expr}}}" + def string_with_interpolation(self, args: List) -> str: + return '"' + ("".join(args)) + '"' + + def interpolation_maybe_nested(self, args: List) -> str: + # return "".join(args) + return "${" + ("".join(args)) + "}" + def strip_new_line_tokens(self, args: List) -> List: """ Remove new line and Discard tokens. @@ -280,8 +294,13 @@ def strip_new_line_tokens(self, args: List) -> List: def to_string_dollar(self, value: Any) -> Any: """Wrap a string in ${ and }""" if isinstance(value, str): + # if it's already wrapped, pass it unmodified + if value.startswith("${") and value.endswith("}"): + return value + if value.startswith('"') and value.endswith('"'): - return str(value)[1:-1] + value = str(value)[1:-1] + return self.process_escape_sequences(value) return f"${{{value}}}" return value @@ -289,9 +308,44 @@ def strip_quotes(self, value: Any) -> Any: """Remove quote characters from the start and end of a string""" if isinstance(value, str): if value.startswith('"') and value.endswith('"'): - return str(value)[1:-1] + value = str(value)[1:-1] + return self.process_escape_sequences(value) + return value + + def process_escape_sequences(self, value: str) -> str: + """Process HCL escape sequences within quoted template expressions.""" + if isinstance(value, str): + # normal escape sequences + value = value.replace("\\n", "\n") + value = value.replace("\\r", "\r") + value = value.replace("\\t", "\t") + value = value.replace('\\"', '"') + value = value.replace("\\\\", "\\") + + # we will leave Unicode escapes (\uNNNN and \UNNNNNNNN) untouched + # for now, but this method can be extended in the future return value + def to_tf_inline(self, value: any) -> str: + """ + Converts complex objects (e.g.) dicts to an "inline" HCL syntax + for use in function calls and ${interpolation} strings + """ + if isinstance(value, dict): + dict_v = json.dumps(value) + return reverse_quotes_within_interpolation(dict_v) + if isinstance(value, list): + value = [self.to_tf_inline(item) for item in value] + return f"[{', '.join(value)}]" + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, str): + return value + if isinstance(value, int): + return str(value) + + raise RuntimeError(f"Invalid type to convert to inline HCL: {type(value)}") + def identifier(self, value: Any) -> Any: # Making identifier a token by capitalizing it to IDENTIFIER # seems to return a token object instead of the str diff --git a/test/helpers/terraform-config-json/a.json b/test/helpers/terraform-config-json/a.json new file mode 100644 index 0000000..f248684 --- /dev/null +++ b/test/helpers/terraform-config-json/a.json @@ -0,0 +1,12 @@ +{ + "block": [ + { + "a": 1 + }, + { + "label": { + "b": 2 + } + } + ] +} diff --git a/test/helpers/terraform-config-json/escapes.json b/test/helpers/terraform-config-json/escapes.json new file mode 100644 index 0000000..41c7d54 --- /dev/null +++ b/test/helpers/terraform-config-json/escapes.json @@ -0,0 +1,9 @@ +{ + "block": [ + { + "block_with_newlines": { + "a": "line1\nline2" + } + } + ] +} diff --git a/test/helpers/terraform-config-json/multi_level_interpolation.json b/test/helpers/terraform-config-json/multi_level_interpolation.json new file mode 100644 index 0000000..ab2af72 --- /dev/null +++ b/test/helpers/terraform-config-json/multi_level_interpolation.json @@ -0,0 +1,10 @@ +{ + "block": [ + { + "a": "${\"${\"${\"a\"}\"}\"}" + }, + { + "b": "${var.b}" + } + ] +} diff --git a/test/helpers/terraform-config-json/multiline_expressions.json b/test/helpers/terraform-config-json/multiline_expressions.json index 5011086..55f3cb1 100644 --- a/test/helpers/terraform-config-json/multiline_expressions.json +++ b/test/helpers/terraform-config-json/multiline_expressions.json @@ -49,7 +49,7 @@ { "some_var2": { "description": "description", - "default": "${concat([{'1': '1'}], [{'2': '2'}])}" + "default": "${concat([{\"1\": \"1\"}], [{\"2\": \"2\"}])}" } } ] diff --git a/test/helpers/terraform-config-json/variables.json b/test/helpers/terraform-config-json/variables.json index 0024f62..1a089b3 100644 --- a/test/helpers/terraform-config-json/variables.json +++ b/test/helpers/terraform-config-json/variables.json @@ -28,14 +28,14 @@ }, { "var_with_validation": { - "type": "${list(object({'id': '${string}', 'nested': \"${list(object({'id': '${string}', 'type': '${string}'}))}\"}))}", + "type": "${list(object({\"id\": \"${string}\", \"nested\": \"${list(object({\"id\": \"${string}\", \"type\": \"${string}\"}))}\"}))}", "validation": [ { - "condition": "${!contains([for v in flatten(var.var_with_validation[*].id) : can(regex(\"^(A|B)$\", v))], False)}", + "condition": "${!contains([for v in flatten(var.var_with_validation[*].id) : can(regex(\"^(A|B)$\", v))], false)}", "error_message": "The property `id` must be one of value [A, B]." }, { - "condition": "${!contains([for v in flatten(var.var_with_validation[*].nested[*].type) : can(regex(\"^(A|B)$\", v))], False)}", + "condition": "${!contains([for v in flatten(var.var_with_validation[*].nested[*].type) : can(regex(\"^(A|B)$\", v))], false)}", "error_message": "The property `nested.type` must be one of value [A, B]." } ] @@ -50,7 +50,7 @@ } }, { - "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {'aws_account_ids': '${[for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}'}}}", + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {\"aws_account_ids\": \"${[for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}\"}}}", "has_valid_forwarding_rules_template_inputs": "${length(keys(var.forwarding_rules_template.copy_resolver_rules)) > 0 && length(var.forwarding_rules_template.replace_with_target_ips) > 0 && length(var.forwarding_rules_template.exclude_cidrs) > 0}", "for_whitespace": "${{for i in [1, 2, 3] : i => i}}" }, @@ -106,7 +106,7 @@ "ids_level_1": "${distinct(local.nested_data[*].id)}", "ids_level_2": "${flatten(local.nested_data[*].nested[*].id)}", "ids_level_3": "${flatten(local.nested_data[*].nested[*].again[*][0].foo.bar[0])}", - "bindings_by_role": "${distinct(flatten([for name in local.real_entities : [for role , members in var.bindings : {'name': '${name}', 'role': '${role}', 'members': '${members}'}]]))}" + "bindings_by_role": "${distinct(flatten([for name in local.real_entities : [for role , members in var.bindings : {\"name\": \"${name}\", \"role\": \"${role}\", \"members\": \"${members}\"}]]))}" } ] } diff --git a/test/helpers/terraform-config/a.tf b/test/helpers/terraform-config/a.tf new file mode 100644 index 0000000..36cf750 --- /dev/null +++ b/test/helpers/terraform-config/a.tf @@ -0,0 +1,7 @@ +block { + a = 1 +} + +block "label" { + b = 2 +} diff --git a/test/helpers/terraform-config/escapes.tf b/test/helpers/terraform-config/escapes.tf new file mode 100644 index 0000000..0e82ab8 --- /dev/null +++ b/test/helpers/terraform-config/escapes.tf @@ -0,0 +1,3 @@ +block "block_with_newlines" { + a = "line1\nline2" +} diff --git a/test/helpers/terraform-config/locals_embedded_condition.tf b/test/helpers/terraform-config/locals_embedded_condition.tf index 55b57b3..25de5a2 100644 --- a/test/helpers/terraform-config/locals_embedded_condition.tf +++ b/test/helpers/terraform-config/locals_embedded_condition.tf @@ -1,6 +1,6 @@ locals { terraform = { - channels = local.running_in_ci ? local.ci_channels : local.local_channels + channels = (local.running_in_ci ? local.ci_channels : local.local_channels) authentication = [] } } diff --git a/test/helpers/terraform-config/multi_level_interpolation.tf b/test/helpers/terraform-config/multi_level_interpolation.tf new file mode 100644 index 0000000..22ef7f5 --- /dev/null +++ b/test/helpers/terraform-config/multi_level_interpolation.tf @@ -0,0 +1,7 @@ +block { + a = "${"${"${"a"}"}"}" +} + +block { + b = "${var.b}" +} \ No newline at end of file diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py new file mode 100644 index 0000000..0ab3792 --- /dev/null +++ b/test/unit/test_builder.py @@ -0,0 +1,94 @@ +"""Test building an HCL file from scratch""" + +import json +from pathlib import Path +from unittest import TestCase + +import hcl2 +import hcl2.builder + + +HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" +HCL2_DIR = HELPERS_DIR / "terraform-config" +JSON_DIR = HELPERS_DIR / "terraform-config-json" +HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] + + +class TestBuilder(TestCase): + """Test building a variety of hcl files""" + + # print any differences fully to the console + maxDiff = None + + def test_build_a_tf(self): + builder = hcl2.Builder() + + builder.block("block", a=1) + builder.block("block", ["label"], b=2) + + self.compare_filenames(builder, "a.tf") + + def test_build_escapes_tf(self): + builder = hcl2.Builder() + + builder.block("block", ["block_with_newlines"], a="line1\nline2") + + self.compare_filenames(builder, "escapes.tf") + + def test_locals_embdedded_condition_tf(self): + builder = hcl2.Builder() + + builder.block( + "locals", + terraform={ + "channels": "${local.running_in_ci ? local.ci_channels : local.local_channels}", + "authentication": [], + }, + ) + + self.compare_filenames(builder, "locals_embedded_condition.tf") + + def test_locals_embedded_function_tf(self): + builder = hcl2.Builder() + + builder.block( + "locals", + function_test='${var.basename}-${var.forwarder_function_name}_${md5("${var.vpc_id}${data.aws_region.current.name}")}', + ) + + self.compare_filenames(builder, "locals_embedded_function.tf") + + def test_locals_embedded_interpolation_tf(self): + builder = hcl2.Builder() + + builder.block( + "locals", + embedded_interpolation='${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo', + ) + + self.compare_filenames(builder, "locals_embedded_interpolation.tf") + + def test_provider_function_tf(self): + builder = hcl2.Builder() + + builder.block( + "locals", + name2='${provider::test2::test("a")}', + name3='${test("a")}', + ) + + self.compare_filenames(builder, "provider_function.tf") + + def compare_filenames(self, builder: hcl2.Builder, filename: str): + hcl_dict = builder.build() + hcl_ast = hcl2.reverse_transform(hcl_dict) + hcl_content_built = hcl2.writes(hcl_ast) + + hcl_path = (HCL2_DIR / filename).absolute() + with hcl_path.open("r") as hcl_file: + hcl_file_content = hcl_file.read() + self.assertMultiLineEqual( + hcl_content_built, + hcl_file_content, + f"file {filename} does not match its programmatically built version.", + ) diff --git a/test/unit/test_hcl2_syntax.py b/test/unit/test_hcl2_syntax.py index 3e6f19d..50b4499 100644 --- a/test/unit/test_hcl2_syntax.py +++ b/test/unit/test_hcl2_syntax.py @@ -73,7 +73,7 @@ def test_block_multiple_labels(self): def test_unary_operation(self): operations = [ ("identifier = -10", {"identifier": "${-10}"}), - ("identifier = !true", {"identifier": "${!True}"}), + ("identifier = !true", {"identifier": "${!true}"}), ] for hcl, dict_ in operations: result = self.load_to_dict(hcl) @@ -94,7 +94,7 @@ def test_tuple(self): "${identifier}", "string", 100, - "${True == False}", + "${true == false}", "${5 + 5}", "${function()}", ] @@ -115,7 +115,7 @@ def test_object(self): "key1": "${identifier}", "key2": "string", "key3": 100, - "key4": "${True == False}", + "key4": "${true == false}", "key5": "${5 + 5}", "key6": "${function()}", } diff --git a/test/unit/test_load.py b/test/unit/test_load.py index d21ca02..d6a56b6 100644 --- a/test/unit/test_load.py +++ b/test/unit/test_load.py @@ -17,6 +17,9 @@ class TestLoad(TestCase): """Test parsing a variety of hcl files""" + # print any differences fully to the console + maxDiff = None + def test_load_terraform(self): """Test parsing a set of hcl2 files and force recreating the parser file""" # delete the parser file to force it to be recreated diff --git a/test/unit/test_reconstruct.py b/test/unit/test_reconstruct_ast.py similarity index 100% rename from test/unit/test_reconstruct.py rename to test/unit/test_reconstruct_ast.py diff --git a/test/unit/test_reconstruct_dict.py b/test/unit/test_reconstruct_dict.py new file mode 100644 index 0000000..4f3591c --- /dev/null +++ b/test/unit/test_reconstruct_dict.py @@ -0,0 +1,84 @@ +""" Test reconstructing hcl files""" + +import json +import traceback +from pathlib import Path +from unittest import TestCase + +import hcl2 + + +HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" +HCL2_DIR = HELPERS_DIR / "terraform-config" +HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] +JSON_DIR = HELPERS_DIR / "terraform-config-json" + + +class TestReconstruct(TestCase): + """Test reconstructing a variety of hcl files""" + + # print any differences fully to the console + maxDiff = None + + def test_write_terraform(self): + """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" + + # the reconstruction process is not precise, so some files do not + # reconstruct any embedded HCL expressions exactly the same. this + # list captures those, and should be manually inspected regularly to + # ensure that files remain syntactically equivalent + inexact_files = [ + # one level of interpolation is stripped from this file during + # reconstruction, since we don't have a way to distinguish it from + # a complex HCL expression. the output parses to the same value + # though + "multi_level_interpolation.tf", + ] + + for hcl_path in HCL2_FILES: + if hcl_path not in inexact_files: + yield self.check_terraform, hcl_path + + def check_terraform(self, hcl_path_str: str): + """ + Loads a single hcl2 file, parses it, reconstructs it, + parses the reconstructed file, and compares with the expected json + """ + hcl_path = (HCL2_DIR / hcl_path_str).absolute() + json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") + with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: + try: + hcl2_dict_correct = hcl2.load(hcl_file) + except Exception as exc: + assert ( + False + ), f"failed to tokenize 'correct' terraform in `{hcl_path_str}`: {traceback.format_exc()}" + + json_dict = json.load(json_file) + + try: + hcl_ast = hcl2.reverse_transform(json_dict) + except Exception as exc: + assert ( + False + ), f"failed to reverse transform HCL from `{json_path.name}`: {traceback.format_exc()}" + + try: + hcl_reconstructed = hcl2.writes(hcl_ast) + except Exception as exc: + assert ( + False + ), f"failed to reconstruct terraform from AST from `{json_path.name}`: {traceback.format_exc()}" + + try: + hcl2_dict_reconstructed = hcl2.loads(hcl_reconstructed) + except Exception as exc: + assert ( + False + ), f"failed to tokenize 'reconstructed' terraform from AST from `{json_path.name}`: {exc},\n{hcl_reconstructed}" + + self.assertDictEqual( + hcl2_dict_reconstructed, + hcl2_dict_correct, + f"failed comparing {hcl_path_str} with reconstructed version from {json_path.name}", + ) diff --git a/tree-to-hcl2-reconstruction.md b/tree-to-hcl2-reconstruction.md index c403be5..5089164 100644 --- a/tree-to-hcl2-reconstruction.md +++ b/tree-to-hcl2-reconstruction.md @@ -1,13 +1,111 @@ -Given `example.tf` file with following content +# Writing HCL2 from Python + +Version 5 of this library supports reconstructing HCL files directly from +Python. This guide details how the reconstruction process takes place. See +also: [Limitations](#limitations) + +There are three major phases: + +- [Building a Python Dictionary](#building-a-python-dictionary) +- [Building an AST](#building-an-ast) +- [Reconstructing the file from the AST](#reconstructing-the-file-from-the-ast) + +## Example + +To create the `example.tf` file with the following content: ```terraform resource "aws_s3_bucket" "bucket" { bucket = "bucket_id" - force_destroy = true + force_destroy = true +} +``` + +You can use the `hcl2.Builder` class like so: + +```python +import hcl2 + +example = hcl2.Builder() + +example.block( + "resource", + ["aws_s3_bucket", "bucket"], + bucket="bucket_id", + force_destroy=True, +) + +example_dict = example.build() +example_ast = hcl2.reverse_transform(example_dict) +example_file = hcl2.writes(example_ast) + +print(example_file) +# resource "aws_s3_bucket" "bucket" { +# bucket = "bucket_id" +# force_destroy = true +# } +# +``` + +This demonstrates a couple of different phases of the process worth mentioning. + +### Building a Python dictionary + +The `hcl2.Builder` class produces a dictionary that should be identical to the +output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword +argument is important here. HCL "blocks" in the Python dictionary are +identified by the presence of `__start_line__` and `__end_line__` metadata +within them. The `Builder` class handles adding that metadata. If that metadata +is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to +identify what is a block and what is just an attribute with an object value. +Without that metadata, this dictionary: + +```python +{ + "resource": [ + { + "aws_s3_bucket": { + "bucket": { + "bucket": "bucket_id", + "force_destroy": True, + # "__start_line__": -1, + # "__end_line__": -1, + } + } + } + ] } ``` -below code will add a `tags` object to the S3 bucket definition. The code can also be used to print out readable representation of **any** Parse Tree (any valid HCL2 file), which can be useful when working on your own logic for arbitrary Parse Tree manipulation. +Would produce this HCL output: + +```terraform +resource = [{ + aws_s3_bucket = { + bucket = { + bucket = "bucket_id" + force_destroy = true + } + } +}] +``` + +(This output parses to the same datastructure, but isn't formatted in blocks +as desired by the user. Therefore, using the `Builder` class is recommended.) + +### Building an AST + +The `hcl2.reconstructor.HCLReconstructor` class operates on an "abstract +syntax tree" (`hcl2.AST` or `Lark.Tree`, they're the same.) To produce this AST +from scratch in Python, use `hcl2.reverse_transform(hcl_dict)`, and to produce +this AST from an existing HCL file, use `hcl2.parse(hcl_file)`. + +You can also build these ASTs manually, if you want more control over the +generated HCL output. If you do this, though, make sure the AST you generate is +valid within the `hcl2.lark` grammar. + +Here's an example, which would add a "tags" element to that `example.tf` file +mentioned above. ```python from copy import deepcopy @@ -128,3 +226,23 @@ if __name__ == "__main__": main() ``` + +### Reconstructing the file from the AST + +Once the AST has been generated, you can convert it back to valid HCL using +`hcl2.writes(ast)`. In the above example, that conversion is done in the +`main()` function. + +## Limitations + +- Some formatting choices are impossible to specify via `hcl2.Builder()` and + require manual intervention of the AST produced after the `reverse_transform` + step. + + - Most notably, this means it's not possible to generate files containing + comments (both inline and block comments) + +- Even when parsing a file directly and writing it back out, some formatting + information may be lost due to Terminals discarded during the parsing process. + The reconstructed output should still parse to the same dictionary at the end + of the day though.