5 Commits
0.1.0 ... 0.1.1

Author SHA1 Message Date
ba6b71687e Fix typos in readme 2022-04-22 17:10:48 -04:00
d61d2cb1a1 Update documentation for 0.1.1
Update changelog with 0.1.1
2022-04-21 15:29:49 -04:00
c7c2a87ebb Bump patch version 2022-04-21 15:29:49 -04:00
8e9df58f43 Fix replacement bug when the same vaulted block appears twice in a file 2022-04-21 15:29:48 -04:00
9943dd112c Update internal logic
Reduce duplication in password loading logic
Reorder internal functions to improve logical grouping
Improve logging clarity
Add better handling of decryption errors
2022-04-21 15:17:29 -04:00
4 changed files with 272 additions and 233 deletions

View File

@@ -2,6 +2,17 @@
See also: [Github Release Page](https://github.com/enpaul/vault2vault/releases).
## Version 0.1.1
View this release on: [Github](https://github.com/enpaul/vault2vault/releases/tag/0.1.1),
[PyPI](https://pypi.org/project/vault2vault/0.1.1/)
- Fix bug causing stack trace when the same vaulted block appears in a YAML file more than
once
- Fix bug where the `--ignore-undecryptable` option was not respected for vaulted variables
in YAML files
- Update logging messages and levels to improve verbose output
## Version 0.1.0
View this release on: [Github](https://github.com/enpaul/vault2vault/releases/tag/0.1.0),

View File

@@ -20,17 +20,20 @@ but works recursively on encrypted files and in-line variables
## What is this?
If you use [Ansible Vault](https://docs.ansible.com/ansible/latest/user_guide/vault.html)
then you may have encountered the problem of needing to role your vault password. Maybe
then you may have encountered the problem of needing to roll your vault password. Maybe
you found it written down on a sticky note, maybe a coworker who knows it left the
company, maybe you accidentally typed it into Slack when you thought the focus was on your
terminal. Whatever, these things happen.
The builtin tool Ansible provides,
The built-in tool Ansible provides,
[`ansible-vault rekey`](https://docs.ansible.com/ansible/latest/cli/ansible-vault.html#rekey),
works suffers from two main drawbacks: first, it only works on vault encrypted files and
not on vault encrypted YAML data; and second, it only works on a single vault encrypted
file at a time. To rekey everything in a large project you'd need to write a script that
goes through every file and rekeys everything in every format it can find.
suffers from two main drawbacks:
1. It only works on vault encrypted files and not on vault encrypted YAML data
2. It only works on a single vault encrypted file at a time.
To rekey everything in a large project you'd need to write a script that goes through
every file and rekeys everything in every format it can find.
This is that script.
@@ -64,8 +67,8 @@ and redevelopment. Here are the command line options:
```
> vault2vault --help
usage: vault2vault [-h] [--version] [--interactive] [-v] [-b] [-i VAULT_ID] [--ignore-undecryptable] [--old-pass-file OLD_PASS_FILE]
[--new-pass-file NEW_PASS_FILE]
usage: vault2vault [-h] [--version] [--interactive] [-v] [-b] [-i VAULT_ID] [--ignore-undecryptable]
[--old-pass-file OLD_PASS_FILE] [--new-pass-file NEW_PASS_FILE]
[paths ...]
Recursively rekey ansible-vault encrypted files and in-line variables
@@ -76,13 +79,15 @@ positional arguments:
options:
-h, --help show this help message and exit
--version Show program version and exit
--interactive Step through files and variables interactively, prompting for confirmation before making each change
--interactive Step through files and variables interactively, prompting for confirmation before making
each change
-v, --verbose Increase verbosity; can be repeated
-b, --backup Write a backup of every file to be modified, suffixed with '.bak'
-i VAULT_ID, --vault-id VAULT_ID
Limit rekeying to encrypted secrets with the specified Vault ID
--ignore-undecryptable
Ignore any file or variable that is not decryptable with the provided vault secret instead of raising an error
Ignore any file or variable that is not decryptable with the provided vault secret instead
of raising an error
--old-pass-file OLD_PASS_FILE
Path to a file with the old vault password to decrypt secrets with
--new-pass-file NEW_PASS_FILE

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "vault2vault"
version = "0.1.0"
version = "0.1.1"
license = "MIT"
authors = ["Ethan Paul <24588726+enpaul@users.noreply.github.com>"]
description = "Recursively rekey ansible-vault encrypted files and in-line variables"

View File

@@ -9,24 +9,26 @@ from pathlib import Path
from typing import Any
from typing import Iterable
from typing import List
from typing import Tuple
from typing import Union
from typing import Optional
import ruamel.yaml
try:
import ansible.constants
import ansible.parsing.vault
from ansible.parsing.vault import VaultSecret
from ansible.parsing.vault import VaultLib
from ansible.parsing.vault import AnsibleVaultError
except ImportError:
print(
"FATAL: No supported version of Ansible could be imported under the current python interpreter"
"FATAL: No supported version of Ansible could be imported under the current python interpreter",
file=sys.stderr,
)
sys.exit(1)
__title__ = "vault2vault"
__summary__ = "Recursively rekey ansible-vault encrypted files and in-line variables"
__version__ = "0.1.0"
__version__ = "0.1.1"
__url__ = "https://github.com/enpaul/vault2vault/"
__license__ = "MIT"
__authors__ = ["Ethan Paul <24588726+enpaul@users.noreply.github.com>"]
@@ -44,8 +46,8 @@ ruamel.yaml.add_constructor(
def rekey(
old: ansible.parsing.vault.VaultLib,
new: ansible.parsing.vault.VaultLib,
old: VaultLib,
new: VaultLib,
content: bytes,
) -> bytes:
"""Rekey vaulted content to use a new vault password
@@ -61,6 +63,195 @@ def rekey(
return new.encrypt(old.decrypt(content))
# This whole function needs to be rebuilt from the ground up so I don't
# feel bad about disabling this warning
def _process_file( # pylint: disable=too-many-statements
path: Path,
old: VaultLib,
new: VaultLib,
interactive: bool,
backup: bool,
ignore: bool,
) -> None:
logger = logging.getLogger(__name__)
logger.debug(f"Processing file {path}")
def _process_yaml_data( # pylint: disable=too-many-locals
content: bytes, data: Any, ignore: bool, name: str = ""
):
if isinstance(data, dict):
for key, value in data.items():
content = _process_yaml_data(
content, value, ignore, name=f"{name}.{key}"
)
elif isinstance(data, list):
for index, item in enumerate(data):
content = _process_yaml_data(
content, item, ignore, name=f"{name}.{index}"
)
elif isinstance(data, ruamel.yaml.comments.TaggedScalar) and old.is_encrypted(
data.value
):
logger.info(f"Identified vaulted content in {path} at {name}")
confirm = (
_confirm(f"Rekey vault encrypted variable {name} in file {path}?")
if interactive
else True
)
if not confirm:
logger.debug(
f"User skipped vault encrypted content in {path} at {name} via interactive mode"
)
return content
try:
new_data = rekey(old, new, data.value.encode())
except AnsibleVaultError as err:
msg = f"Failed to decrypt vault encrypted data in {path} at {name} with provided vault secret"
if ignore:
logger.warning(msg)
return content
raise RuntimeError(msg) from err
content_decoded = content.decode("utf-8")
# Ok so this next section is probably the worst possible way to do this, but I did
# it this way to solve a very specific problem that would absolutely prevent people
# from using this tool: round trip YAML format preservation. Namely, that it's impossible.
# Ruamel gets the closest to achieving this: it can do round trip format preservation
# when the starting state is in _some_ known state (this is better than competitors which
# require the starting state to be in a _specific_ known state). But given how many
# ways there are to write YAML- and by extension, how many opinions there are on the
# "correct" way to write YAML- it is not possible to configure ruamel to account for all of
# them, even if everyones YAML style was compatible with ruamel's roundtrip formatting (note:
# they aren't). So there's the problem: to be useful, this tool would need to reformat every
# YAML file it touched, which means nobody would use it.
#
# To avoid the YAML formatting problem, we need a way to replace the target content
# in the raw text of the file without dumping the parsed YAML. We want to preserve
# indendation, remove any extra newlines that would be left over, add any necessary
# newlines without clobbering the following lines, and ideally avoid reimplementing
# a YAML formatter. The answer to this problem- as the answer to so many stupid problems
# seems to be- is a regex. If this is too janky for you (I know it is for me) go support
# the estraven project I'm trying to get off the ground: https://github.com/enpaul/estraven
#
# Ok, thanks for sticking with me as I was poetic about this. The solution below...
# is awful, I can admit that. But it does work, so I'll leave it up to
# your judgement as to whether it's worthwhile or not. Here's how it works:
#
# 1. First we take the first line of the original (unmodified) vaulted content. This line
# of text has several important qualities: 1) it exists in the raw text of the file, 2)
# it is pseudo-guaranteed to be unique, and 3) it is guaranteed to exist (vaulted content
# will be at least one line long, but possibly no more)
search_data = data.value.split("\n")[1]
try:
# 2. Next we use a regex to grab the full line of text from the file that includes the above
# string. This is important because the full line of text will include the leading
# whitespace, which ruamel helpfully strips out from the parsed data.
# 3. Next we grab the number of leading spaces on the line using the capture group from the
# regex
padding = len(
re.search(rf"\n(\s*){search_data}\n", content_decoded).groups()[0]
)
except (TypeError, AttributeError):
# This is to handle an edgecase where the vaulted content is actually a yaml anchor. For
# example, if a single vaulted secret needs to be stored under multiple variable names.
# In that case, the vaulted content iself will only appear once in the file, but the data
# parsed by ruamel will include it twice. If we fail to get a match on the first line, then
# we check whether the data is a yaml anchor and, if it is, we skip it.
if data.anchor.value:
logger.debug(
f"Content replacement for encrypted content in {path} at {name} was not found, so replacement will be skipped because target is a YAML anchor"
)
return content
raise
# 4. Now with the leading whitespace padding, we add this same number of spaces to each line
# of *both* the old vaulted data and the new vaulted data. It's important to do both because
# we'll need to do a replacement in a moment so we need to know both what we're replacing
# and what we're replacing it with.
padded_old_data = "\n".join(
[f"{' ' * padding}{item}" for item in data.value.split("\n") if item]
)
padded_new_data = "\n".join(
[
f"{' ' * padding}{item}"
for item in new_data.decode("utf-8").split("\n")
if item
]
)
# 5. Finally, we actually replace the content. This needs to have a count=1 so that if the same
# encrypted block appears twice in the same file we only replace the first occurance of it,
# otherwise the later replacement attempts will fail. We also need to re-encode it back to
# bytes because all file operations with vault are done in bytes mode
content = content_decoded.replace(
padded_old_data, padded_new_data, 1
).encode()
return content
with path.open("rb") as infile:
raw = infile.read()
# The 'is_encrypted' check doesn't rely on the vault secret in the VaultLib matching the
# secret the data was encrypted with, it just checks that the data is encrypted with some
# vault secret. We could use either `old` or `new` for this check, it doesn't actually matter.
if old.is_encrypted(raw):
logger.info(f"Identified vault encrypted file: {path}")
confirm = (
_confirm(f"Rekey vault encrypted file {path}?") if interactive else True
)
if not confirm:
logger.debug(
f"User skipped vault encrypted file {path} via interactive mode"
)
return
if backup:
path.rename(f"{path}.bak")
try:
updated = rekey(old, new, raw)
except AnsibleVaultError:
msg = f"Failed to decrypt vault encrypted file {path} with provided vault secret"
if ignore:
logger.warning(msg)
return
raise RuntimeError(msg) from None
elif path.suffix.lower() in YAML_FILE_EXTENSIONS:
logger.debug(f"Identified YAML file: {path}")
confirm = (
_confirm(f"Search YAML file {path} for vault encrypted variables?")
if interactive
else True
)
data = yaml.load(raw)
if not confirm:
logger.debug(
f"User skipped processing YAML file {path} via interactive mode"
)
return
if backup:
shutil.copy(path, f"{path}.bak")
updated = _process_yaml_data(raw, data, ignore=ignore)
else:
logger.debug(f"Skipping non-vault file {path}")
return
logger.debug(f"Writing updated file contents to {path}")
with path.open("wb") as outfile:
outfile.write(updated)
def _get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
prog=__title__,
@@ -131,174 +322,6 @@ def _confirm(prompt: str, default: bool = True) -> bool:
print("Please input one of the specified options", file=sys.stderr)
# This whole function needs to be rebuilt from the ground up so I don't
# feel bad about disabling this warning
def _process_file( # pylint: disable=too-many-statements
path: Path,
old: ansible.parsing.vault.VaultLib,
new: ansible.parsing.vault.VaultLib,
interactive: bool,
backup: bool,
ignore: bool,
) -> None:
logger = logging.getLogger(__name__)
logger.debug(f"Processing file {path}")
def _process_yaml_data(content: bytes, data: Any, name: str = ""):
if isinstance(data, dict):
for key, value in data.items():
content = _process_yaml_data(content, value, f"{name}.{key}")
elif isinstance(data, list):
for index, item in enumerate(data):
content = _process_yaml_data(content, item, f"{name}.{index}")
elif isinstance(data, ruamel.yaml.comments.TaggedScalar) and old.is_encrypted(
data.value
):
logger.debug(f"Identified vaulted content in {path} at '{name}'")
confirm = (
_confirm(f"Rekey vault encrypted variable {name} in file {path}?")
if interactive
else True
)
if not confirm:
logger.debug(
f"User skipped vault encrypted content in {path} at '{name}' via interactive mode"
)
return content
new_data = rekey(old, new, data.value.encode())
content_decoded = content.decode("utf-8")
# Ok so this next section is probably the worst possible way to do this, but I did
# it this way to solve a very specific problem that would absolutely prevent people
# from using this tool: round trip YAML format preservation. Namely, that it's impossible.
# Ruamel gets the closest to achieving this: it can do round trip format preservation
# when the starting state is in _some_ known state (this is better than competitors which
# require the starting state to be in a _specific_ known state). But given how many
# ways there are to write YAML- and by extension, how many opinions there are on the
# "correct" way to write YAML- it is not possible to configure ruamel to account for all of
# them, even if everyones YAML style was compatible with ruamel's roundtrip formatting (note:
# they aren't). So there's the problem: to be useful, this tool would need to reformat every
# YAML file it touched, which means nobody would use it.
#
# To avoid the YAML formatting problem, we need a way to replace the target content
# in the raw text of the file without dumping the parsed YAML. We want to preserve
# indendation, remove any extra newlines that would be left over, add any necessary
# newlines without clobbering the following lines, and ideally avoid reimplementing
# a YAML formatter. The answer to this problem- as the answer to so many stupid problems
# seems to be- is a regex. If this is too janky for you (I know it is for me) go support
# the estraven project I'm trying to get off the ground: https://github.com/enpaul/estraven
#
# Ok, thanks for sticking with me as I was poetic about this. The solution below...
# is awful, I can admit that. But it does work, so I'll leave it up to
# your judgement as to whether it's worthwhile or not. Here's how it works:
#
# 1. First we take the first line of the original (unmodified) vaulted content. This line
# of text has several important qualities: 1) it exists in the raw text of the file, 2)
# it is pseudo-guaranteed to be unique, and 3) it is guaranteed to exist (vaulted content
# will be at least one line long, but possibly no more)
search_data = data.value.split("\n")[1]
try:
# 2. Next we use a regex to grab the full line of text from the file that includes the above
# string. This is important because the full line of text will include the leading
# whitespace, which ruamel helpfully strips out from the parsed data.
# 3. Next we grab the number of leading spaces on the line using the capture group from the
# regex
padding = len(
re.search(rf"\n(\s*){search_data}\n", content_decoded).groups()[0]
)
except (TypeError, AttributeError):
# This is to handle an edgecase where
if data.anchor.value:
logger.debug(
f"Content replacement for encrypted content in {path} at {name} was not found, so replacement will be skipped because target is a YAML anchor"
)
return content
raise
# 4. Now with the leading whitespace padding, we add this same number of spaces to each line
# of *both* the old vaulted data and the new vaulted data. It's important to do both because
# we'll need to do a replacement in a moment so we need to know both what we're replacing
# and what we're replacing it with.
padded_old_data = "\n".join(
[f"{' ' * padding}{item}" for item in data.value.split("\n") if item]
)
padded_new_data = "\n".join(
[
f"{' ' * padding}{item}"
for item in new_data.decode("utf-8").split("\n")
if item
]
)
# 5. Finally, we actually replace the content. We also need to re-encode it back to bytes
# because all file operations with vault are done in bytes mode
content = content_decoded.replace(padded_old_data, padded_new_data).encode()
return content
with path.open("rb") as infile:
raw = infile.read()
# The 'is_encrypted' check doesn't rely on the vault secret in the VaultLib matching the
# secret the data was encrypted with, it just checks that the data is encrypted with some
# vault secret. We could use either `old` or `new` for this check, it doesn't actually matter.
if old.is_encrypted(raw):
logger.debug(f"Identified vault encrypted file: {path}")
confirm = (
_confirm(f"Rekey vault encrypted file {path}?") if interactive else True
)
if not confirm:
logger.debug(
f"User skipped vault encrypted file {path} via interactive mode"
)
return
if backup:
path.rename(f"{path}.bak")
try:
updated = rekey(old, new, raw)
except ansible.parsing.vault.AnsibleVaultError:
msg = f"Failed to decrypt vault encrypted file {path} with provided vault secret"
if ignore:
logger.warning(msg)
return
raise RuntimeError(msg) from None
elif path.suffix.lower() in YAML_FILE_EXTENSIONS:
logger.debug(f"Identified YAML file: {path}")
confirm = (
_confirm(f"Search YAML file {path} for vault encrypted variables?")
if interactive
else True
)
data = yaml.load(raw)
if not confirm:
logger.debug(
f"User skipped processing YAML file {path} via interactive mode"
)
return
if backup:
shutil.copy(path, f"{path}.bak")
updated = _process_yaml_data(raw, data)
else:
logger.debug(f"Skipping non-vault file {path}")
return
logger.debug(f"Writing updated file contents to {path}")
with path.open("wb") as outfile:
outfile.write(updated)
def _expand_paths(paths: Iterable[Path]) -> List[Path]:
logger = logging.getLogger(__name__)
@@ -309,61 +332,52 @@ def _expand_paths(paths: Iterable[Path]) -> List[Path]:
logger.debug(f"Including file {path}")
results.append(path)
elif path.is_dir():
logger.debug(f"Descending into subdirectory {path}")
logger.debug(f"Identifying files under {path}")
results += _expand_paths(path.iterdir())
else:
logger.debug(f"Discarding path {path}")
return results
def _read_vault_pass_file(path: Union[Path, str]) -> str:
logger = logging.getLogger(__name__)
try:
with Path(path).resolve().open(encoding="utf-8") as infile:
return infile.read()
except (FileNotFoundError, PermissionError):
logger.error(
f"Specified vault password file '{path}' does not exist or is unreadable"
)
sys.exit(1)
def _load_password(
fpath: Optional[str], desc: str = "", confirm: bool = True
) -> VaultSecret:
"""Load a password from a file or interactively
:param fpath: Optional path to the file containing the vault password. If not provided then
the password will be prompted for interactively.
:param desc: Description text to inject into the interactive password prompt. Useful when using
this function multiple times to identify different passwords to the user.
:returns: Populated vault secret object with the loaded password
"""
def _load_passwords(
old_file: str, new_file: str
) -> Tuple[ansible.parsing.vault.VaultSecret, ansible.parsing.vault.VaultSecret]:
logger = logging.getLogger(__name__)
if old_file:
old_vault_pass = _read_vault_pass_file(old_file)
logger.info(f"Loaded old vault password from {Path(old_file).resolve()}")
else:
logger.debug(
"No old vault password file provided, prompting for old vault password input"
)
old_vault_pass = getpass.getpass(
prompt="Old Ansible Vault password: ", stream=sys.stderr
if fpath:
try:
with Path(fpath).resolve().open("rb", encoding="utf-8") as infile:
return VaultSecret(infile.read())
except (FileNotFoundError, PermissionError) as err:
raise RuntimeError(
f"Specified vault password file '{fpath}' does not exist or is unreadable"
) from err
logger.debug("No vault password file provided, prompting for interactive input")
password_1 = getpass.getpass(
prompt=f"Enter {desc} Ansible Vault password: ", stream=sys.stderr
)
if confirm:
password_2 = getpass.getpass(
prompt=f"Confirm (re-enter) {desc} Ansible Vault password: ",
stream=sys.stderr,
)
if new_file:
new_vault_pass = _read_vault_pass_file(new_file)
logger.info(f"Loaded new vault password from {Path(new_file).resolve()}")
else:
logger.debug(
"No new vault password file provided, prompting for new vault password input"
)
new_vault_pass = getpass.getpass(
prompt="New Ansible Vault password: ", stream=sys.stderr
)
confirm = getpass.getpass(
prompt="Confirm new Ansible Vault password: ", stream=sys.stderr
)
if new_vault_pass != confirm:
logger.error("New vault passwords do not match")
sys.exit(1)
if password_1 != password_2:
raise RuntimeError(f"Provided {desc} passwords do not match")
return ansible.parsing.vault.VaultSecret(
old_vault_pass.encode("utf-8")
), ansible.parsing.vault.VaultSecret(new_vault_pass.encode("utf-8"))
return VaultSecret(password_1.encode("utf-8"))
def main():
@@ -383,17 +397,26 @@ def main():
sys.exit(0)
if not args.paths:
logger.warning("No path provided, nothing to do!")
logger.warning("No paths provided, nothing to do!")
sys.exit(0)
old_pass, new_pass = _load_passwords(args.old_pass_file, args.new_pass_file)
in_vault = ansible.parsing.vault.VaultLib([(args.vault_id, old_pass)])
out_vault = ansible.parsing.vault.VaultLib([(args.vault_id, new_pass)])
try:
old_pass = _load_password(args.old_pass_file, desc="existing", confirm=False)
new_pass = _load_password(args.new_pass_file, desc="new", confirm=True)
logger.debug(
in_vault = VaultLib([(args.vault_id, old_pass)])
out_vault = VaultLib([(args.vault_id, new_pass)])
except RuntimeError as err:
logger.error(str(err))
sys.exit(1)
except KeyboardInterrupt:
sys.exit(130)
logger.info(
f"Identifying all files under {len(args.paths)} input paths: {', '.join(args.paths)}"
)
files = _expand_paths(args.paths)
logger.info(f"Identified {len(files)} files for processing")
for filepath in files:
_process_file(