mirror of
				https://github.com/enpaul/vault2vault.git
				synced 2025-11-04 09:26:46 +00:00 
			
		
		
		
	Add documentation and fix runtime bug
Fix issue where a newline was added after every character of newly vaulted yaml content Add docs, fix linting errors, confess to my sins
This commit is contained in:
		
							
								
								
									
										146
									
								
								vault2vault.py
									
									
									
									
									
								
							
							
						
						
									
										146
									
								
								vault2vault.py
									
									
									
									
									
								
							@@ -7,8 +7,8 @@ import shutil
 | 
			
		||||
import sys
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import Any
 | 
			
		||||
from typing import Iterable
 | 
			
		||||
from typing import List
 | 
			
		||||
from typing import Sequence
 | 
			
		||||
from typing import Tuple
 | 
			
		||||
from typing import Union
 | 
			
		||||
 | 
			
		||||
@@ -41,6 +41,16 @@ def rekey(
 | 
			
		||||
    new: ansible.parsing.vault.VaultLib,
 | 
			
		||||
    content: bytes,
 | 
			
		||||
) -> bytes:
 | 
			
		||||
    """Rekey vaulted content to use a new vault password
 | 
			
		||||
 | 
			
		||||
    :param old: ``VaultLib`` object populated with the vault password the content is
 | 
			
		||||
                currently encrypted with
 | 
			
		||||
    :param new: ``VaultLib`` object populated with the vault password the content will
 | 
			
		||||
                be re-encrypted with
 | 
			
		||||
    :param content: Content to decrypt using ``old`` and re-encrypt using ``new``
 | 
			
		||||
    :returns: The value of ``content`` decrypted using the existing vault password and
 | 
			
		||||
              re-encrypted using the new vault password
 | 
			
		||||
    """
 | 
			
		||||
    return new.encrypt(old.decrypt(content))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -114,7 +124,9 @@ def _confirm(prompt: str, default: bool = True) -> bool:
 | 
			
		||||
        print("Please input one of the specified options", file=sys.stderr)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _process_file(
 | 
			
		||||
# This whole function needs to be rebuilt from the ground up so I don't
 | 
			
		||||
# feel bad about disabling this warning
 | 
			
		||||
def _process_file(  # pylint: disable=too-many-statements
 | 
			
		||||
    path: Path,
 | 
			
		||||
    old: ansible.parsing.vault.VaultLib,
 | 
			
		||||
    new: ansible.parsing.vault.VaultLib,
 | 
			
		||||
@@ -133,57 +145,90 @@ def _process_file(
 | 
			
		||||
        elif isinstance(data, list):
 | 
			
		||||
            for index, item in enumerate(data):
 | 
			
		||||
                content = _process_yaml_data(content, item, f"{name}.{index}")
 | 
			
		||||
        elif isinstance(data, ruamel.yaml.comments.TaggedScalar):
 | 
			
		||||
            if old.is_encrypted(data.value):
 | 
			
		||||
                logger.debug(f"Identified vaulted content in {path} at '{name}'")
 | 
			
		||||
                confirm = (
 | 
			
		||||
                    _confirm(f"Rekey vault encrypted variable {name} in file {path}?")
 | 
			
		||||
                    if interactive
 | 
			
		||||
                    else True
 | 
			
		||||
                )
 | 
			
		||||
        elif isinstance(data, ruamel.yaml.comments.TaggedScalar) and old.is_encrypted(
 | 
			
		||||
            data.value
 | 
			
		||||
        ):
 | 
			
		||||
            logger.debug(f"Identified vaulted content in {path} at '{name}'")
 | 
			
		||||
            confirm = (
 | 
			
		||||
                _confirm(f"Rekey vault encrypted variable {name} in file {path}?")
 | 
			
		||||
                if interactive
 | 
			
		||||
                else True
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
                if not confirm:
 | 
			
		||||
            if not confirm:
 | 
			
		||||
                logger.debug(
 | 
			
		||||
                    f"User skipped vault encrypted content in {path} at '{name}' via interactive mode"
 | 
			
		||||
                )
 | 
			
		||||
                return content
 | 
			
		||||
 | 
			
		||||
            new_data = rekey(old, new, data.value.encode())
 | 
			
		||||
            content_decoded = content.decode("utf-8")
 | 
			
		||||
 | 
			
		||||
            # Ok so this next section is probably the worst possible way to do this, but I did
 | 
			
		||||
            # it this way to solve a very specific problem that would absolutely prevent people
 | 
			
		||||
            # from using this tool: round trip YAML format preservation. Namely, that it's impossible.
 | 
			
		||||
            # Ruamel gets the closest to achieving this: it can do round trip format preservation
 | 
			
		||||
            # when the starting state is in _some_ known state (this is better than competitors which
 | 
			
		||||
            # require the starting state to be in a _specific_ known state). But given how many
 | 
			
		||||
            # ways there are to write YAML- and by extension, how many opinions there are on the
 | 
			
		||||
            # "correct" way to write YAML- it is not possible to configure ruamel to account for all of
 | 
			
		||||
            # them, even if everyones YAML style was compatible with ruamel's roundtrip formatting (note:
 | 
			
		||||
            # they aren't). So there's the problem: to be useful, this tool would need to reformat every
 | 
			
		||||
            # YAML file it touched, which means nobody would use it.
 | 
			
		||||
            #
 | 
			
		||||
            # To avoid the YAML formatting problem, we need a way to replace the target content
 | 
			
		||||
            # in the raw text of the file without dumping the parsed YAML. We want to preserve
 | 
			
		||||
            # indendation, remove any extra newlines that would be left over, add any necessary
 | 
			
		||||
            # newlines without clobbering the following lines, and ideally avoid reimplementing
 | 
			
		||||
            # a YAML formatter. The answer to this problem- as the answer to so many stupid problems
 | 
			
		||||
            # seems to be- is a regex. If this is too janky for you (I know it is for me) go support
 | 
			
		||||
            # the estraven project I'm trying to get off the ground: https://github.com/enpaul/estraven
 | 
			
		||||
            #
 | 
			
		||||
            # Ok, thanks for sticking with me as I was poetic about this. The solution below...
 | 
			
		||||
            # is awful, I can admit that. But it does work, so I'll leave it up to
 | 
			
		||||
            # your judgement as to whether it's worthwhile or not. Here's how it works:
 | 
			
		||||
            #
 | 
			
		||||
            # 1. First we take the first line of the original (unmodified) vaulted content. This line
 | 
			
		||||
            #    of text has several important qualities: 1) it exists in the raw text of the file, 2)
 | 
			
		||||
            #    it is pseudo-guaranteed to be unique, and 3) it is guaranteed to exist (vaulted content
 | 
			
		||||
            #    will be at least one line long, but possibly no more)
 | 
			
		||||
            search_data = data.value.split("\n")[1]
 | 
			
		||||
            try:
 | 
			
		||||
                # 2. Next we use a regex to grab the full line of text from the file that includes the above
 | 
			
		||||
                #    string. This is important because the full line of text will include the leading
 | 
			
		||||
                #    whitespace, which ruamel helpfully strips out from the parsed data.
 | 
			
		||||
                # 3. Next we grab the number of leading spaces on the line using the capture group from the
 | 
			
		||||
                #    regex
 | 
			
		||||
                padding = len(
 | 
			
		||||
                    re.search(rf"\n(\s*){search_data}\n", content_decoded).groups()[0]
 | 
			
		||||
                )
 | 
			
		||||
            except (TypeError, AttributeError):
 | 
			
		||||
                # This is to handle an edgecase where
 | 
			
		||||
                if data.anchor.value:
 | 
			
		||||
                    logger.debug(
 | 
			
		||||
                        f"User skipped vault encrypted content in {path} at '{name}' via interactive mode"
 | 
			
		||||
                        f"Content replacement for encrypted content in {path} at {name} was not found, so replacement will be skipped because target is a YAML anchor"
 | 
			
		||||
                    )
 | 
			
		||||
                    return content
 | 
			
		||||
                raise
 | 
			
		||||
 | 
			
		||||
                new_data = rekey(old, new, data.value.encode())
 | 
			
		||||
                content_decoded = content.decode("utf-8")
 | 
			
		||||
            # 4. Now with the leading whitespace padding, we add this same number of spaces to each line
 | 
			
		||||
            #    of *both* the old vaulted data and the new vaulted data. It's important to do both because
 | 
			
		||||
            #    we'll need to do a replacement in a moment so we need to know both what we're replacing
 | 
			
		||||
            #    and what we're replacing it with.
 | 
			
		||||
            padded_old_data = "\n".join(
 | 
			
		||||
                [f"{' ' * padding}{item}" for item in data.value.split("\n") if item]
 | 
			
		||||
            )
 | 
			
		||||
            padded_new_data = "\n".join(
 | 
			
		||||
                [
 | 
			
		||||
                    f"{' ' * padding}{item}"
 | 
			
		||||
                    for item in new_data.decode("utf-8").split("\n")
 | 
			
		||||
                    if item
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
                search_data = data.value.split("\n")[1]
 | 
			
		||||
                try:
 | 
			
		||||
                    padding = len(
 | 
			
		||||
                        re.search(rf"\n(\s*){search_data}\n", content_decoded).groups()[
 | 
			
		||||
                            0
 | 
			
		||||
                        ]
 | 
			
		||||
                    )
 | 
			
		||||
                except (TypeError, AttributeError):
 | 
			
		||||
                    if data.anchor.value:
 | 
			
		||||
                        logger.debug(
 | 
			
		||||
                            f"Content replacement for encrypted content in {path} at {name} was not found, so replacement will be skipped because target is a YAML anchor"
 | 
			
		||||
                        )
 | 
			
		||||
                        return content
 | 
			
		||||
                    raise
 | 
			
		||||
 | 
			
		||||
                padded_old_data = "\n".join(
 | 
			
		||||
                    [
 | 
			
		||||
                        f"{' ' * padding}{item}"
 | 
			
		||||
                        for item in data.value.split("\n")
 | 
			
		||||
                        if item
 | 
			
		||||
                    ]
 | 
			
		||||
                )
 | 
			
		||||
                padded_new_data = "\n".join(
 | 
			
		||||
                    [
 | 
			
		||||
                        f"{' ' * padding}{item}"
 | 
			
		||||
                        for item in new_data.decode("utf-8")
 | 
			
		||||
                        if item
 | 
			
		||||
                    ]
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
                content = content_decoded.replace(
 | 
			
		||||
                    padded_old_data, padded_new_data
 | 
			
		||||
                ).encode()
 | 
			
		||||
            # 5. Finally, we actually replace the content. We also need to re-encode it back to bytes
 | 
			
		||||
            #    because all file operations with vault are done in bytes mode
 | 
			
		||||
            content = content_decoded.replace(padded_old_data, padded_new_data).encode()
 | 
			
		||||
        return content
 | 
			
		||||
 | 
			
		||||
    with path.open("rb") as infile:
 | 
			
		||||
@@ -215,7 +260,7 @@ def _process_file(
 | 
			
		||||
            if ignore:
 | 
			
		||||
                logger.warning(msg)
 | 
			
		||||
                return
 | 
			
		||||
            raise RuntimeError(msg)
 | 
			
		||||
            raise RuntimeError(msg) from None
 | 
			
		||||
    elif path.suffix.lower() in YAML_FILE_EXTENSIONS:
 | 
			
		||||
        logger.debug(f"Identified YAML file: {path}")
 | 
			
		||||
 | 
			
		||||
@@ -247,7 +292,7 @@ def _process_file(
 | 
			
		||||
        outfile.write(updated)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _expand_paths(paths: Sequence[Union[Path, str]]) -> List[Path]:
 | 
			
		||||
def _expand_paths(paths: Iterable[Path]) -> List[Path]:
 | 
			
		||||
    logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
    results = []
 | 
			
		||||
@@ -267,7 +312,7 @@ def _expand_paths(paths: Sequence[Union[Path, str]]) -> List[Path]:
 | 
			
		||||
def _read_vault_pass_file(path: Union[Path, str]) -> str:
 | 
			
		||||
    logger = logging.getLogger(__name__)
 | 
			
		||||
    try:
 | 
			
		||||
        with Path(path).resolve().open() as infile:
 | 
			
		||||
        with Path(path).resolve().open(encoding="utf-8") as infile:
 | 
			
		||||
            return infile.read()
 | 
			
		||||
    except (FileNotFoundError, PermissionError):
 | 
			
		||||
        logger.error(
 | 
			
		||||
@@ -315,6 +360,7 @@ def _load_passwords(
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    """Main program entrypoint and CLI interface"""
 | 
			
		||||
    args = _get_args()
 | 
			
		||||
 | 
			
		||||
    logger = logging.getLogger(__name__)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user