Merge pull request #7 from enpaul/enp/ci

Update CI to always use python3.10 for metaenv
2025-06-06 00:03:23 +00:00 · 2024-03-21 18:33:05 -04:00 · 2024-03-21 18:28:27 -04:00 · 2023-04-03 12:01:05 -04:00 · 2023-04-03 11:58:58 -04:00 · 2023-04-03 11:57:12 -04:00
11 changed files with 2547 additions and 1701 deletions
--- a/.github/scripts/setup-env.sh
+++ b/.github/scripts/setup-env.sh
@ -4,31 +4,34 @@
 # to create a repeatable local environment for tests to be run in. The python env
 # this script creates can be accessed at the location defined by the CI_VENV variable
 # below.
+#
+# POETRY_VERSION can be set to install a specific version of Poetry

 set -e;

 CI_CACHE=$HOME/.cache;
-POETRY_VERSION=1.1.12;
+INSTALL_POETRY_VERSION="${POETRY_VERSION:-1.3.2}";

 mkdir --parents "$CI_CACHE";

 command -v python;
-python --version;
+python3.10 --version;

 curl --location https://install.python-poetry.org \
  --output "$CI_CACHE/install-poetry.py" \
  --silent \
  --show-error;
 python "$CI_CACHE/install-poetry.py" \
-  --version "$POETRY_VERSION" \
+  --version "$INSTALL_POETRY_VERSION" \
  --yes;
 poetry --version --no-ansi;
 poetry run pip --version;

 poetry install \
-  --quiet \
-  --remove-untracked \
-  --no-ansi;
+  --sync \
+  --no-ansi \
+  --no-root \
+  --only ci;

 poetry env info;
 poetry run tox --version;
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -5,14 +5,16 @@ on:
    types: ["opened", "synchronize"]
  push:
    branches: ["devel"]
+env:
+  POETRY_VERSION: 1.4.1
 jobs:
  Test:
+    name: Python ${{ matrix.python.version }}
    runs-on: ubuntu-latest
    strategy:
+      fail-fast: true
      matrix:
        python:
-          - version: "3.6"
-            toxenv: py36
          - version: "3.7"
            toxenv: py37
          - version: "3.8"
@ -21,15 +23,24 @@ jobs:
            toxenv: py39
          - version: "3.10"
            toxenv: py310
+          - version: "3.11"
+            toxenv: py311
    steps:
      - name: Checkout
        uses: actions/checkout@v2
+
+      - name: Install Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
      - name: Install Python ${{ matrix.python.version }}
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python.version }}
+
      - name: Configure Job Cache
-        uses: actions/cache@v2
+        uses: actions/cache@v3
        with:
          path: |
            ~/.cache/pip
@ -39,38 +50,49 @@ jobs:
          # will be invalidated, and thus all packages will be redownloaded, if the
          # lockfile is updated
          key: ${{ runner.os }}-${{ matrix.python.toxenv }}-${{ hashFiles('**/poetry.lock') }}
+
      - name: Configure Path
        run: echo "$HOME/.local/bin" >> $GITHUB_PATH
+
      - name: Configure Environment
        run: .github/scripts/setup-env.sh
+
      - name: Run Toxenv ${{ matrix.python.toxenv }}
        run: poetry run tox -e ${{ matrix.python.toxenv }}
+
  Check:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v2
-      - name: Install Python 3.8
-        uses: actions/setup-python@v1
+
+      - name: Install Python 3.10
+        uses: actions/setup-python@v4
        with:
-          python-version: 3.8
+          python-version: "3.10"
+
      - name: Configure Job Cache
-        uses: actions/cache@v2
+        uses: actions/cache@v3
        with:
          path: |
            ~/.cache/pip
            ~/.cache/pypoetry/cache
            ~/.poetry
-          # Hardcoded 'py38' slug here lets this cache piggyback on the 'py38' cache
+          # Hardcoded 'py310' slug here lets this cache piggyback on the 'py310' cache
          # that is generated for the tests above
-          key: ${{ runner.os }}-py38-${{ hashFiles('**/poetry.lock') }}
+          key: ${{ runner.os }}-py310-${{ hashFiles('**/poetry.lock') }}
+
      - name: Configure Path
        run: echo "$HOME/.local/bin" >> $GITHUB_PATH
+
      - name: Configure Environment
        run: .github/scripts/setup-env.sh
+
      - name: Run Static Analysis Checks
        run: poetry run tox -e static
+
      - name: Run Static Analysis Checks (Tests)
        run: poetry run tox -e static-tests
+
      - name: Run Security Checks
        run: poetry run tox -e security
--- a/.pylintrc
+++ b/.pylintrc
@ -11,7 +11,6 @@
 # --disable=W"
 disable=logging-fstring-interpolation
        ,logging-format-interpolation
-        ,bad-continuation
        ,line-too-long
        ,ungrouped-imports
        ,typecheck
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,34 @@

 See also: [Github Release Page](https://github.com/enpaul/vault2vault/releases).

+## Version 0.1.3
+
+View this release on: [Github](https://github.com/enpaul/vault2vault/releases/tag/0.1.3),
+[PyPI](https://pypi.org/project/vault2vault/0.1.3/)
+
+- Fix incorrect encoding specification when opening password files. Contributed by
+  [brycelowe](https://github.com/brycelowe) (#2)
+
+## Version 0.1.2
+
+View this release on: [Github](https://github.com/enpaul/vault2vault/releases/tag/0.1.2),
+[PyPI](https://pypi.org/project/vault2vault/0.1.2/)
+
+- Add user documentation
+- Add project road map
+- Fix incorrect and missing docstrings for internal functions
+
+## Version 0.1.1
+
+View this release on: [Github](https://github.com/enpaul/vault2vault/releases/tag/0.1.1),
+[PyPI](https://pypi.org/project/vault2vault/0.1.1/)
+
+- Fix bug causing stack trace when the same vaulted block appears in a YAML file more than
+  once
+- Fix bug where the `--ignore-undecryptable` option was not respected for vaulted
+  variables in YAML files
+- Update logging messages and levels to improve verbose output
+
 ## Version 0.1.0

 View this release on: [Github](https://github.com/enpaul/vault2vault/releases/tag/0.1.0),
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -27,9 +27,10 @@ Examples of unacceptable behavior include:
 - The use of sexualized language or imagery, and sexual attention or advances of any kind
 - Trolling, insulting or derogatory comments, and personal or political attacks
 - Public or private harassment
- Publishing others' private information, such as a physical or email address, without their
-  explicit permission
- Other conduct which could reasonably be considered inappropriate in a professional setting
+- Publishing others' private information, such as a physical or email address, without
+  their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a professional
+  setting

 ## Enforcement Responsibilities

@ -52,8 +53,8 @@ offline event.
 ## Enforcement

 Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the
-community leaders responsible for enforcement at \[INSERT CONTACT METHOD\]. All
-complaints will be reviewed and investigated promptly and fairly.
+community leaders responsible for enforcement at \[INSERT CONTACT METHOD\]. All complaints
+will be reviewed and investigated promptly and fairly.

 All community leaders are obligated to respect the privacy and security of the reporter of
 any incident.
@ -105,8 +106,8 @@ toward or disparagement of classes of individuals.
 This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0,
 available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.

-Community Impact Guidelines were inspired by [Mozilla's code of conduct
-enforcement ladder](https://github.com/mozilla/diversity).
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).

 For answers to common questions about this code of conduct, see the FAQ at
 https://www.contributor-covenant.org/faq. Translations are available at
--- a/4
+++ b/4
@ -30,10 +30,10 @@ source: ## Build Python source distribution package
 	poetry build --format sdist

 test: ## Run the project testsuite(s)
-	poetry run tox --recreate
+	poetry run tox --recreate --parallel

 dev: ## Create the local dev environment
-	poetry install
+	poetry install --with dev --extras ansible --sync
 	poetry run pre-commit install

 publish: test wheel source ## Build and upload to pypi (requires $PYPI_API_KEY be set)
--- a/README.md
+++ b/README.md
@ -10,27 +10,29 @@ but works recursively on encrypted files and in-line variables
 [![Python Supported Versions](https://img.shields.io/pypi/pyversions/vault2vault)](https://www.python.org)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

-⚠️ **This project is alpha software and is under active development** ⚠️
+⚠️ **This project is beta software and is under active development** ⚠️

 - [What is this?](#what-is-this)
 - [Installing](#installing)
- [Using](#using)
+- [Usage](#usage)
+  - [Recovering from a failed migration](#recovering-from-a-failed-migration)
+- [Roadmap](#roadmap)
 - [Developing](#developer-documentation)

 ## What is this?

 If you use [Ansible Vault](https://docs.ansible.com/ansible/latest/user_guide/vault.html)
-then you may have encountered the problem of needing to role your vault password. Maybe
+then you may have encountered the problem of needing to roll your vault password. Maybe
 you found it written down on a sticky note, maybe a coworker who knows it left the
 company, maybe you accidentally typed it into Slack when you thought the focus was on your
 terminal. Whatever, these things happen.

-The builtin tool Ansible provides,
+The built-in tool Ansible provides,
 [`ansible-vault rekey`](https://docs.ansible.com/ansible/latest/cli/ansible-vault.html#rekey),
-works suffers from two main drawbacks: first, it only works on vault encrypted files and
-not on vault encrypted YAML data; and second, it only works on a single vault encrypted
-file at a time. To rekey everything in a large project you'd need to write a script that
-goes through every file and rekeys everything in every format it can find.
+suffers from two main drawbacks: first, it only works on vault encrypted files and not on
+vault encrypted YAML data; and second, it only works on a single vault encrypted file at a
+time. To rekey everything in a large project you'd need to write a script that recursively
+goes through every file and rekeys every encrypted file and YAML variable all at once.

 This is that script.

@ -55,47 +57,110 @@ install `vault2vault` using [PipX](https://pypa.github.io/pipx/) and the `ansibl
 pipx install vault2vault[ansible]
 ```

-**Note: vault2vault requires an Ansible installation to function. If you are installing to a standalone virtual environment (like with PipX) then you must install it with the `ansible` extra to ensure a version of Ansible is available to the application.**
+> Note: vault2vault requires an Ansible installation to function. If you are installing to
+> a standalone virtual environment (like with PipX) then you must install it with the
+> `ansible` extra to ensure a version of Ansible is available to the application.\*\*

-## Using
+## Usage

-These docs are pretty sparse, largely because this project is still under active design
-and redevelopment. Here are the command line options:
+> Note: the full command reference is available by running `vault2vault --help`

-```
-> vault2vault --help
-usage: vault2vault [-h] [--version] [--interactive] [-v] [-b] [-i VAULT_ID] [--ignore-undecryptable] [--old-pass-file OLD_PASS_FILE]
-                   [--new-pass-file NEW_PASS_FILE]
-                   [paths ...]
+Vault2Vault works with files in any arbitrary directory structures, so there is no need to
+have your Ansible project(s) structured in a specific way for the tool to work. The
+simplest usage of Vault2Vault is by passing the path to your Ansible project directory to
+the command:

-Recursively rekey ansible-vault encrypted files and in-line variables
-
-positional arguments:
-  paths                 Paths to search for Ansible Vault encrypted content
-
-options:
-  -h, --help            show this help message and exit
-  --version             Show program version and exit
-  --interactive         Step through files and variables interactively, prompting for confirmation before making each change
-  -v, --verbose         Increase verbosity; can be repeated
-  -b, --backup          Write a backup of every file to be modified, suffixed with '.bak'
-  -i VAULT_ID, --vault-id VAULT_ID
-                        Limit rekeying to encrypted secrets with the specified Vault ID
-  --ignore-undecryptable
-                        Ignore any file or variable that is not decryptable with the provided vault secret instead of raising an error
-  --old-pass-file OLD_PASS_FILE
-                        Path to a file with the old vault password to decrypt secrets with
-  --new-pass-file NEW_PASS_FILE
-                        Path to a file with the new vault password to rekey secrets with
+```bash
+vault2vault ./my-ansible-project/
 ```

-Please report any bugs or issues you encounter on
-[Github](https://github.com/enpaul/vault2vault/issues).
+The tool will prompt for the current vault password and the new vault password and then
+process every file under the provided path. You can also specify multiple paths and
+they'll all be processed together:
+
+```bash
+vault2vault \
+  ./my-ansible-project/playbooks/ \
+  ./my-ansible-project/host_vars/ \
+  ./my-ansible-project/group_vars/
+```
+
+To skip the interactive password prompts you can put the password in a file and have the
+tool read it in at runtime. The `--old-pass-file` and `--new-pass-file` parameters work
+the same way as the `--vault-password-file` option from the `ansible` command:
+
+```bash
+vault2vault ./my-ansible-project/ \
+  --old-pass-file=./oldpass.txt \
+  --new-pass-file=./newpass.txt
+```
+
+If you use multiple vault passwords in your project and want to roll them you'll need to
+run `vault2vault` once for each password you want to change. By default, `vault2vault`
+will fail with an error if it encounters vaulted data that it cannot decrypt with the
+provided current vault password. To change this behavior and instead just ignore any
+vaulted data that can't be decrypted (like, for example, if you have data encrypted with
+multiple vault passwords) you can pass the `--ignore-undecryptable` flag to turn the
+errors into warnings.
+
+> Please report any bugs or issues you encounter on
+> [Github](https://github.com/enpaul/vault2vault/issues).
+
+### Recovering from a failed migration
+
+This tool is still pretty early in it's development, and to be honest it hooks into
+Ansible's functionality in some fragile ways. I've tested as best I can to ensure it
+covers as many edge cases as possible, but there is still the chance that you might get
+partway through a password migration and then have the tool fail out, leaving half of your
+data successfully rekeyed and the other half not.
+
+In the spirit of the
+[Unix philosophy](https://hackaday.com/2018/09/10/doing-one-thing-well-the-unix-philosophy/)
+this tool does not include any built-in way to recover from this state. However, it can be
+done very effectively using a version control tool.
+
+If you are using Git to track your project files then you can use the command
+`git reset --hard` to restore all files to the state of the currently checked out commit.
+This does have the side effect of erasing any other un-committed work in the repository,
+so it's recommended to always have a clean working tree when using Vault2Vault.
+
+If you are not using a version control system to track your project files then you can
+create a temporary Git repository to use in the event of a migration failure:
+
+```bash
+cd my-project/
+
+# Initialize the new repository
+git init
+
+# Add and commit all your existing files to the git tree
+git add .
+git commit -m "initial commit"
+
+# Run vault migrations
+vault2vault ...
+
+# If no recovery is necessary, delete the git repository data
+rm -rf .git
+```
+
+## Roadmap
+
+This project is considered feature complete as of the
+[0.1.1](https://github.com/enpaul/vault2vault/releases/tag/0.1.1) release. As a result the
+roadmap focuses on stability and user experience ahead of a 1.0 release.
+
+- [ ] Reimplement core vaulted data processing function to enable multithreading
+- [ ] Implement multithreading for performance in large environments
+- [ ] Add unit tests
+- [ ] Add integration tests
+- [ ] Redesign logging messages to improve clarity and consistency

 ## Developer Documentation

 All project contributors and participants are expected to adhere to the
-[Contributor Covenant Code of Conduct, v2](CODE_OF_CONDUCT.md) ([external link](https://www.contributor-covenant.org/version/2/0/code_of_conduct/)).
+[Contributor Covenant Code of Conduct, v2](CODE_OF_CONDUCT.md)
+([external link](https://www.contributor-covenant.org/version/2/0/code_of_conduct/)).

 The `devel` branch has the latest (and potentially unstable) changes. The stable releases
 are tracked on [Github](https://github.com/enpaul/vault2vault/releases),
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "vault2vault"
-version = "0.1.0"
+version = "0.1.3"
 license = "MIT"
 authors = ["Ethan Paul <24588726+enpaul@users.noreply.github.com>"]
 description = "Recursively rekey ansible-vault encrypted files and in-line variables"
@ -12,7 +12,7 @@ packages = [
 keywords = ["ansible", "vault", "playbook", "yaml", "password"]
 readme = "README.md"
 classifiers = [
-  "Development Status :: 3 - Alpha",
+  "Development Status :: 4 - Beta",
  "Environment :: Console",
  "Framework :: Ansible",
  "Intended Audience :: Developers",
@ -22,11 +22,11 @@ classifiers = [
  "Natural Language :: English",
  "Operating System :: OS Independent",
  "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.6",
  "Programming Language :: Python :: 3.7",
  "Programming Language :: Python :: 3.8",
  "Programming Language :: Python :: 3.9",
  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: Implementation :: CPython"
 ]

@ -37,30 +37,43 @@ vault2vault = "vault2vault:main"
 ansible = ["ansible-core"]

 [tool.poetry.dependencies]
-python = "^3.6.1"
+python = "^3.7"
 "ruamel.yaml" = "^0.17.16"
 ansible-core = {version = "^2.11.5", optional = true}

-[tool.poetry.dev-dependencies]
-bandit = "^1.6.2"
-black = { version = "^21.9b0", allow-prereleases = true, python = "^3.7" }
-blacken-docs = "^1.8.0"
-ipython = { version = "^7.18.1", python = "^3.7" }
-mypy = "^0.800"
-pre-commit = "^2.7.1"
-pre-commit-hooks = "^3.3.0"
-pylint = "^2.4.4"
-pytest = "^6.0.2"
-pytest-cov = "^2.10.1"
-reorder-python-imports = "^2.3.5"
-safety = "^1.9.0"
-toml = "^0.10.1"
-tox = "^3.20.0"
-tox-poetry-installer = { version = "^0.8.1", extras = ["poetry"] }
-types-toml = "^0.10.4"
-mdformat = "^0.6.4"
-mdformat-gfm = "^0.2"
+[tool.poetry.group.dev.dependencies]
+black = {version = "^23.1.0", python = "^3.10"}
+blacken-docs = {version = "^1.13.0", python = "^3.10"}
+ipython = {version = "^8.10.1", python = "^3.10"}
+mdformat = {version = "^0.7.16", python = "^3.10"}
+mdformat-gfm = {version = "^0.3.5", python = "^3.10"}
+mypy = {version = "^1.1.1", python = "^3.10"}
+pre-commit = {version = "^2.7.1", python = "^3.10"}
+pre-commit-hooks = {version = "^3.3.0", python = "^3.10"}
+pylint = {version = "^2.4.4", python = "^3.10"}
+reorder-python-imports = {version = "^2.3.5", python = "^3.10"}
+types-toml = {version = "^0.10.4", python = "^3.10"}
+# Implicit python version check fails for this one
+packaging = {version = "^23.0", python = "^3.10"}
+
+[tool.poetry.group.security.dependencies]
+bandit = {version = "^1.6.2", python = "^3.10"}
+safety = {version = "^2.2.0", python = "^3.10"}
+poetry = {version = "^1.2.0", python = "^3.10"}
+
+[tool.poetry.group.test.dependencies]
+pytest = {version = "^6.0.2"}
+pytest-cov = {version = "^2.10.1"}
+toml = {version = "^0.10.1"}
+typing-extensions = {version = "^4.5.0", python = "^3.8"}
+
+[tool.poetry.group.ci.dependencies]
+tox = {version = "^3.20.0"}
+tox-poetry-installer = {version = "^0.10.1", extras = ["poetry"]}
+# This doesn't get installed under py3.7 for some reason, but it's
+# required for poetry. Will need to debug this more in the future
+backports-cached-property = "^1.0.2"

 [build-system]
-requires = ["poetry-core>=1.0.0"]
+requires = ["poetry-core>=1.1.0"]
 build-backend = "poetry.core.masonry.api"
--- a/tox.ini
+++ b/tox.ini
@ -1,5 +1,5 @@
 [tox]
-envlist = py36, py37, py38, py39, py310, static, static-tests, security
+envlist = py3{7-11}, static, static-tests, security
 isolated_build = true
 skip_missing_interpreters = true

@ -9,10 +9,8 @@ require_locked_deps = true
 require_poetry = true
 extras =
    ansible
-locked_deps =
-    pytest
-    pytest-cov
-    toml
+poetry_dep_groups =
+    test
 commands =
    pytest {toxinidir}/tests/ \
      --cov vault2vault \
@ -21,20 +19,11 @@ commands =

 [testenv:static]
 description = Static formatting and quality enforcement
-basepython = python3.8
+basepython = python3.10
 platform = linux
 ignore_errors = true
-locked_deps =
-    black
-    blacken-docs
-    mdformat
-    mdformat-gfm
-    mypy
-    reorder-python-imports
-    pre-commit
-    pre-commit-hooks
-    pylint
-    types-toml
+poetry_dep_groups =
+    dev
 commands =
    pre-commit run \
      --all-files
@ -46,7 +35,7 @@ commands =

 [testenv:static-tests]
 description = Static formatting and quality enforcement for the tests
-basepython = python3.8
+basepython = python3.10
 platform = linux
 ignore_errors = true
 locked_deps =
@ -63,14 +52,12 @@ commands =

 [testenv:security]
 description = Security checks
-basepython = python3.8
+basepython = python3.10
 platform = linux
 ignore_errors = true
 skip_install = true
-locked_deps =
-    bandit
-    safety
-    poetry
+poetry_dep_groups =
+    security
 commands =
    bandit {toxinidir}/vault2vault.py \
      --recursive \
@ -82,8 +69,14 @@ commands =
    poetry export \
      --format requirements.txt \
      --output {envtmpdir}/requirements.txt \
-      --without-hashes \
-      --dev
+      --without-hashes
+# For now these groups are disabled until this bug is resolved
+# in poetry-plugin-export:
+# https://github.com/python-poetry/poetry-plugin-export/issues/176
+#      --with dev \
+#      --with ci \
+#      --with security \
+#      --with test
    safety check \
      --file {envtmpdir}/requirements.txt \
      --json
--- a/vault2vault.py
+++ b/vault2vault.py
@ -9,24 +9,26 @@ from pathlib import Path
 from typing import Any
 from typing import Iterable
 from typing import List
-from typing import Tuple
-from typing import Union
+from typing import Optional

 import ruamel.yaml

 try:
    import ansible.constants
-    import ansible.parsing.vault
+    from ansible.parsing.vault import VaultSecret
+    from ansible.parsing.vault import VaultLib
+    from ansible.parsing.vault import AnsibleVaultError
 except ImportError:
    print(
-        "FATAL: No supported version of Ansible could be imported under the current python interpreter"
+        "FATAL: No supported version of Ansible could be imported under the current python interpreter",
+        file=sys.stderr,
    )
    sys.exit(1)


 __title__ = "vault2vault"
 __summary__ = "Recursively rekey ansible-vault encrypted files and in-line variables"
-__version__ = "0.1.0"
+__version__ = "0.1.3"
 __url__ = "https://github.com/enpaul/vault2vault/"
 __license__ = "MIT"
 __authors__ = ["Ethan Paul <24588726+enpaul@users.noreply.github.com>"]
@ -44,8 +46,8 @@ ruamel.yaml.add_constructor(


 def rekey(
-    old: ansible.parsing.vault.VaultLib,
-    new: ansible.parsing.vault.VaultLib,
+    old: VaultLib,
+    new: VaultLib,
    content: bytes,
 ) -> bytes:
    """Rekey vaulted content to use a new vault password
@ -61,6 +63,208 @@ def rekey(
    return new.encrypt(old.decrypt(content))


+# This whole function needs to be rebuilt from the ground up so I don't
+# feel bad about disabling this warning
+def _process_file(  # pylint: disable=too-many-statements
+    path: Path,
+    old: VaultLib,
+    new: VaultLib,
+    interactive: bool,
+    backup: bool,
+    ignore: bool,
+) -> None:
+    """Determine whether a filepath includes vaulted data and if so, rekey it
+
+    :param path: Path to the file to check
+    :param old: VaultLib object with the current (old) vault password encoded in it
+    :param new: VaultLib object with the target (new) vault password encoded in it
+    :param interactive: Whether to prompt interactively for confirmation before each
+                        rekey operation
+    :param backup: Whether to copy the original file to a backup before making any
+                   in-place changes
+    :param ignore: Whether to ignore any errors that come from failing to decrypt
+                   any vaulted data
+    """
+
+    logger = logging.getLogger(__name__)
+
+    logger.debug(f"Processing file {path}")
+
+    def _process_yaml_data(  # pylint: disable=too-many-locals
+        content: bytes, data: Any, ignore: bool, name: str = ""
+    ):
+        if isinstance(data, dict):
+            for key, value in data.items():
+                content = _process_yaml_data(
+                    content, value, ignore, name=f"{name}.{key}"
+                )
+        elif isinstance(data, list):
+            for index, item in enumerate(data):
+                content = _process_yaml_data(
+                    content, item, ignore, name=f"{name}.{index}"
+                )
+        elif isinstance(data, ruamel.yaml.comments.TaggedScalar) and old.is_encrypted(
+            data.value
+        ):
+            logger.info(f"Identified vaulted content in {path} at {name}")
+            confirm = (
+                _confirm(f"Rekey vault encrypted variable {name} in file {path}?")
+                if interactive
+                else True
+            )
+
+            if not confirm:
+                logger.debug(
+                    f"User skipped vault encrypted content in {path} at {name} via interactive mode"
+                )
+                return content
+
+            try:
+                new_data = rekey(old, new, data.value.encode())
+            except AnsibleVaultError as err:
+                msg = f"Failed to decrypt vault encrypted data in {path} at {name} with provided vault secret"
+                if ignore:
+                    logger.warning(msg)
+                    return content
+                raise RuntimeError(msg) from err
+            content_decoded = content.decode("utf-8")
+
+            # Ok so this next section is probably the worst possible way to do this, but I did
+            # it this way to solve a very specific problem that would absolutely prevent people
+            # from using this tool: round trip YAML format preservation. Namely, that it's impossible.
+            # Ruamel gets the closest to achieving this: it can do round trip format preservation
+            # when the starting state is in _some_ known state (this is better than competitors which
+            # require the starting state to be in a _specific_ known state). But given how many
+            # ways there are to write YAML- and by extension, how many opinions there are on the
+            # "correct" way to write YAML- it is not possible to configure ruamel to account for all of
+            # them, even if everyones YAML style was compatible with ruamel's roundtrip formatting (note:
+            # they aren't). So there's the problem: to be useful, this tool would need to reformat every
+            # YAML file it touched, which means nobody would use it.
+            #
+            # To avoid the YAML formatting problem, we need a way to replace the target content
+            # in the raw text of the file without dumping the parsed YAML. We want to preserve
+            # indendation, remove any extra newlines that would be left over, add any necessary
+            # newlines without clobbering the following lines, and ideally avoid reimplementing
+            # a YAML formatter. The answer to this problem- as the answer to so many stupid problems
+            # seems to be- is a regex. If this is too janky for you (I know it is for me) go support
+            # the estraven project I'm trying to get off the ground: https://github.com/enpaul/estraven
+            #
+            # Ok, thanks for sticking with me as I was poetic about this. The solution below...
+            # is awful, I can admit that. But it does work, so I'll leave it up to
+            # your judgement as to whether it's worthwhile or not. Here's how it works:
+            #
+            # 1. First we take the first line of the original (unmodified) vaulted content. This line
+            #    of text has several important qualities: 1) it exists in the raw text of the file, 2)
+            #    it is pseudo-guaranteed to be unique, and 3) it is guaranteed to exist (vaulted content
+            #    will be at least one line long, but possibly no more)
+            search_data = data.value.split("\n")[1]
+            try:
+                # 2. Next we use a regex to grab the full line of text from the file that includes the above
+                #    string. This is important because the full line of text will include the leading
+                #    whitespace, which ruamel helpfully strips out from the parsed data.
+                # 3. Next we grab the number of leading spaces on the line using the capture group from the
+                #    regex
+                padding = len(
+                    re.search(rf"\n(\s*){search_data}\n", content_decoded).groups()[0]
+                )
+            except (TypeError, AttributeError):
+                # This is to handle an edgecase where the vaulted content is actually a yaml anchor. For
+                # example, if a single vaulted secret needs to be stored under multiple variable names.
+                # In that case, the vaulted content iself will only appear once in the file, but the data
+                # parsed by ruamel will include it twice. If we fail to get a match on the first line, then
+                # we check whether the data is a yaml anchor and, if it is, we skip it.
+                if data.anchor.value:
+                    logger.debug(
+                        f"Content replacement for encrypted content in {path} at {name} was not found, so replacement will be skipped because target is a YAML anchor"
+                    )
+                    return content
+                raise
+
+            # 4. Now with the leading whitespace padding, we add this same number of spaces to each line
+            #    of *both* the old vaulted data and the new vaulted data. It's important to do both because
+            #    we'll need to do a replacement in a moment so we need to know both what we're replacing
+            #    and what we're replacing it with.
+            padded_old_data = "\n".join(
+                [f"{' ' * padding}{item}" for item in data.value.split("\n") if item]
+            )
+            padded_new_data = "\n".join(
+                [
+                    f"{' ' * padding}{item}"
+                    for item in new_data.decode("utf-8").split("\n")
+                    if item
+                ]
+            )
+
+            # 5. Finally, we actually replace the content. This needs to have a count=1 so that if the same
+            #    encrypted block appears twice in the same file we only replace the first occurance of it,
+            #    otherwise the later replacement attempts will fail. We also need to re-encode it back to
+            #    bytes because all file operations with vault are done in bytes mode
+            content = content_decoded.replace(
+                padded_old_data, padded_new_data, 1
+            ).encode()
+        return content
+
+    with path.open("rb") as infile:
+        raw = infile.read()
+
+    # The 'is_encrypted' check doesn't rely on the vault secret in the VaultLib matching the
+    # secret the data was encrypted with, it just checks that the data is encrypted with some
+    # vault secret. We could use either `old` or `new` for this check, it doesn't actually matter.
+    if old.is_encrypted(raw):
+        logger.info(f"Identified vault encrypted file: {path}")
+
+        confirm = (
+            _confirm(f"Rekey vault encrypted file {path}?") if interactive else True
+        )
+
+        if not confirm:
+            logger.debug(
+                f"User skipped vault encrypted file {path} via interactive mode"
+            )
+            return
+
+        if backup:
+            path.rename(f"{path}.bak")
+
+        try:
+            updated = rekey(old, new, raw)
+        except AnsibleVaultError:
+            msg = f"Failed to decrypt vault encrypted file {path} with provided vault secret"
+            if ignore:
+                logger.warning(msg)
+                return
+            raise RuntimeError(msg) from None
+    elif path.suffix.lower() in YAML_FILE_EXTENSIONS:
+        logger.debug(f"Identified YAML file: {path}")
+
+        confirm = (
+            _confirm(f"Search YAML file {path} for vault encrypted variables?")
+            if interactive
+            else True
+        )
+
+        data = yaml.load(raw)
+
+        if not confirm:
+            logger.debug(
+                f"User skipped processing YAML file {path} via interactive mode"
+            )
+            return
+
+        if backup:
+            shutil.copy(path, f"{path}.bak")
+
+        updated = _process_yaml_data(raw, data, ignore=ignore)
+    else:
+        logger.debug(f"Skipping non-vault file {path}")
+        return
+
+    logger.debug(f"Writing updated file contents to {path}")
+
+    with path.open("wb") as outfile:
+        outfile.write(updated)
+
+
 def _get_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        prog=__title__,
@ -131,174 +335,6 @@ def _confirm(prompt: str, default: bool = True) -> bool:
        print("Please input one of the specified options", file=sys.stderr)


-# This whole function needs to be rebuilt from the ground up so I don't
-# feel bad about disabling this warning
-def _process_file(  # pylint: disable=too-many-statements
-    path: Path,
-    old: ansible.parsing.vault.VaultLib,
-    new: ansible.parsing.vault.VaultLib,
-    interactive: bool,
-    backup: bool,
-    ignore: bool,
-) -> None:
-    logger = logging.getLogger(__name__)
-
-    logger.debug(f"Processing file {path}")
-
-    def _process_yaml_data(content: bytes, data: Any, name: str = ""):
-        if isinstance(data, dict):
-            for key, value in data.items():
-                content = _process_yaml_data(content, value, f"{name}.{key}")
-        elif isinstance(data, list):
-            for index, item in enumerate(data):
-                content = _process_yaml_data(content, item, f"{name}.{index}")
-        elif isinstance(data, ruamel.yaml.comments.TaggedScalar) and old.is_encrypted(
-            data.value
-        ):
-            logger.debug(f"Identified vaulted content in {path} at '{name}'")
-            confirm = (
-                _confirm(f"Rekey vault encrypted variable {name} in file {path}?")
-                if interactive
-                else True
-            )
-
-            if not confirm:
-                logger.debug(
-                    f"User skipped vault encrypted content in {path} at '{name}' via interactive mode"
-                )
-                return content
-
-            new_data = rekey(old, new, data.value.encode())
-            content_decoded = content.decode("utf-8")
-
-            # Ok so this next section is probably the worst possible way to do this, but I did
-            # it this way to solve a very specific problem that would absolutely prevent people
-            # from using this tool: round trip YAML format preservation. Namely, that it's impossible.
-            # Ruamel gets the closest to achieving this: it can do round trip format preservation
-            # when the starting state is in _some_ known state (this is better than competitors which
-            # require the starting state to be in a _specific_ known state). But given how many
-            # ways there are to write YAML- and by extension, how many opinions there are on the
-            # "correct" way to write YAML- it is not possible to configure ruamel to account for all of
-            # them, even if everyones YAML style was compatible with ruamel's roundtrip formatting (note:
-            # they aren't). So there's the problem: to be useful, this tool would need to reformat every
-            # YAML file it touched, which means nobody would use it.
-            #
-            # To avoid the YAML formatting problem, we need a way to replace the target content
-            # in the raw text of the file without dumping the parsed YAML. We want to preserve
-            # indendation, remove any extra newlines that would be left over, add any necessary
-            # newlines without clobbering the following lines, and ideally avoid reimplementing
-            # a YAML formatter. The answer to this problem- as the answer to so many stupid problems
-            # seems to be- is a regex. If this is too janky for you (I know it is for me) go support
-            # the estraven project I'm trying to get off the ground: https://github.com/enpaul/estraven
-            #
-            # Ok, thanks for sticking with me as I was poetic about this. The solution below...
-            # is awful, I can admit that. But it does work, so I'll leave it up to
-            # your judgement as to whether it's worthwhile or not. Here's how it works:
-            #
-            # 1. First we take the first line of the original (unmodified) vaulted content. This line
-            #    of text has several important qualities: 1) it exists in the raw text of the file, 2)
-            #    it is pseudo-guaranteed to be unique, and 3) it is guaranteed to exist (vaulted content
-            #    will be at least one line long, but possibly no more)
-            search_data = data.value.split("\n")[1]
-            try:
-                # 2. Next we use a regex to grab the full line of text from the file that includes the above
-                #    string. This is important because the full line of text will include the leading
-                #    whitespace, which ruamel helpfully strips out from the parsed data.
-                # 3. Next we grab the number of leading spaces on the line using the capture group from the
-                #    regex
-                padding = len(
-                    re.search(rf"\n(\s*){search_data}\n", content_decoded).groups()[0]
-                )
-            except (TypeError, AttributeError):
-                # This is to handle an edgecase where
-                if data.anchor.value:
-                    logger.debug(
-                        f"Content replacement for encrypted content in {path} at {name} was not found, so replacement will be skipped because target is a YAML anchor"
-                    )
-                    return content
-                raise
-
-            # 4. Now with the leading whitespace padding, we add this same number of spaces to each line
-            #    of *both* the old vaulted data and the new vaulted data. It's important to do both because
-            #    we'll need to do a replacement in a moment so we need to know both what we're replacing
-            #    and what we're replacing it with.
-            padded_old_data = "\n".join(
-                [f"{' ' * padding}{item}" for item in data.value.split("\n") if item]
-            )
-            padded_new_data = "\n".join(
-                [
-                    f"{' ' * padding}{item}"
-                    for item in new_data.decode("utf-8").split("\n")
-                    if item
-                ]
-            )
-
-            # 5. Finally, we actually replace the content. We also need to re-encode it back to bytes
-            #    because all file operations with vault are done in bytes mode
-            content = content_decoded.replace(padded_old_data, padded_new_data).encode()
-        return content
-
-    with path.open("rb") as infile:
-        raw = infile.read()
-
-    # The 'is_encrypted' check doesn't rely on the vault secret in the VaultLib matching the
-    # secret the data was encrypted with, it just checks that the data is encrypted with some
-    # vault secret. We could use either `old` or `new` for this check, it doesn't actually matter.
-    if old.is_encrypted(raw):
-        logger.debug(f"Identified vault encrypted file: {path}")
-
-        confirm = (
-            _confirm(f"Rekey vault encrypted file {path}?") if interactive else True
-        )
-
-        if not confirm:
-            logger.debug(
-                f"User skipped vault encrypted file {path} via interactive mode"
-            )
-            return
-
-        if backup:
-            path.rename(f"{path}.bak")
-
-        try:
-            updated = rekey(old, new, raw)
-        except ansible.parsing.vault.AnsibleVaultError:
-            msg = f"Failed to decrypt vault encrypted file {path} with provided vault secret"
-            if ignore:
-                logger.warning(msg)
-                return
-            raise RuntimeError(msg) from None
-    elif path.suffix.lower() in YAML_FILE_EXTENSIONS:
-        logger.debug(f"Identified YAML file: {path}")
-
-        confirm = (
-            _confirm(f"Search YAML file {path} for vault encrypted variables?")
-            if interactive
-            else True
-        )
-
-        data = yaml.load(raw)
-
-        if not confirm:
-            logger.debug(
-                f"User skipped processing YAML file {path} via interactive mode"
-            )
-            return
-
-        if backup:
-            shutil.copy(path, f"{path}.bak")
-
-        updated = _process_yaml_data(raw, data)
-    else:
-        logger.debug(f"Skipping non-vault file {path}")
-        return
-
-    logger.debug(f"Writing updated file contents to {path}")
-
-    with path.open("wb") as outfile:
-        outfile.write(updated)
-
-
 def _expand_paths(paths: Iterable[Path]) -> List[Path]:
    logger = logging.getLogger(__name__)

@ -309,61 +345,53 @@ def _expand_paths(paths: Iterable[Path]) -> List[Path]:
            logger.debug(f"Including file {path}")
            results.append(path)
        elif path.is_dir():
-            logger.debug(f"Descending into subdirectory {path}")
+            logger.debug(f"Identifying files under {path}")
            results += _expand_paths(path.iterdir())
        else:
            logger.debug(f"Discarding path {path}")
    return results


-def _read_vault_pass_file(path: Union[Path, str]) -> str:
-    logger = logging.getLogger(__name__)
-    try:
-        with Path(path).resolve().open(encoding="utf-8") as infile:
-            return infile.read()
-    except (FileNotFoundError, PermissionError):
-        logger.error(
-            f"Specified vault password file '{path}' does not exist or is unreadable"
-        )
-        sys.exit(1)
+def _load_password(
+    fpath: Optional[str], desc: str = "", confirm: bool = True
+) -> VaultSecret:
+    """Load a password from a file or interactively

+    :param fpath: Optional path to the file containing the vault password. If not provided then
+                  the password will be prompted for interactively.
+    :param desc: Description text to inject into the interactive password prompt. Useful when using
+                 this function multiple times to identify different passwords to the user.
+    :param confirm: Whether to prompt twice for the input and check that the two inputs match
+    :returns: Populated vault secret object with the loaded password
+    """

-def _load_passwords(
-    old_file: str, new_file: str
-) -> Tuple[ansible.parsing.vault.VaultSecret, ansible.parsing.vault.VaultSecret]:
    logger = logging.getLogger(__name__)

-    if old_file:
-        old_vault_pass = _read_vault_pass_file(old_file)
-        logger.info(f"Loaded old vault password from {Path(old_file).resolve()}")
-    else:
-        logger.debug(
-            "No old vault password file provided, prompting for old vault password input"
-        )
-        old_vault_pass = getpass.getpass(
-            prompt="Old Ansible Vault password: ", stream=sys.stderr
+    if fpath:
+        try:
+            with Path(fpath).resolve().open("rb") as infile:
+                return VaultSecret(infile.read())
+        except (FileNotFoundError, PermissionError) as err:
+            raise RuntimeError(
+                f"Specified vault password file '{fpath}' does not exist or is unreadable"
+            ) from err
+
+    logger.debug("No vault password file provided, prompting for interactive input")
+
+    password_1 = getpass.getpass(
+        prompt=f"Enter {desc} Ansible Vault password: ", stream=sys.stderr
+    )
+
+    if confirm:
+        password_2 = getpass.getpass(
+            prompt=f"Confirm (re-enter) {desc} Ansible Vault password: ",
+            stream=sys.stderr,
        )

-    if new_file:
-        new_vault_pass = _read_vault_pass_file(new_file)
-        logger.info(f"Loaded new vault password from {Path(new_file).resolve()}")
-    else:
-        logger.debug(
-            "No new vault password file provided, prompting for new vault password input"
-        )
-        new_vault_pass = getpass.getpass(
-            prompt="New Ansible Vault password: ", stream=sys.stderr
-        )
-        confirm = getpass.getpass(
-            prompt="Confirm new Ansible Vault password: ", stream=sys.stderr
-        )
-        if new_vault_pass != confirm:
-            logger.error("New vault passwords do not match")
-            sys.exit(1)
+        if password_1 != password_2:
+            raise RuntimeError(f"Provided {desc} passwords do not match")

-    return ansible.parsing.vault.VaultSecret(
-        old_vault_pass.encode("utf-8")
-    ), ansible.parsing.vault.VaultSecret(new_vault_pass.encode("utf-8"))
+    return VaultSecret(password_1.encode("utf-8"))


 def main():
@ -383,17 +411,26 @@ def main():
        sys.exit(0)

    if not args.paths:
-        logger.warning("No path provided, nothing to do!")
+        logger.warning("No paths provided, nothing to do!")
        sys.exit(0)

-    old_pass, new_pass = _load_passwords(args.old_pass_file, args.new_pass_file)
-    in_vault = ansible.parsing.vault.VaultLib([(args.vault_id, old_pass)])
-    out_vault = ansible.parsing.vault.VaultLib([(args.vault_id, new_pass)])
+    try:
+        old_pass = _load_password(args.old_pass_file, desc="existing", confirm=False)
+        new_pass = _load_password(args.new_pass_file, desc="new", confirm=True)

-    logger.debug(
+        in_vault = VaultLib([(args.vault_id, old_pass)])
+        out_vault = VaultLib([(args.vault_id, new_pass)])
+    except RuntimeError as err:
+        logger.error(str(err))
+        sys.exit(1)
+    except KeyboardInterrupt:
+        sys.exit(130)
+
+    logger.info(
        f"Identifying all files under {len(args.paths)} input paths: {', '.join(args.paths)}"
    )
    files = _expand_paths(args.paths)
+    logger.info(f"Identified {len(files)} files for processing")

    for filepath in files:
        _process_file(
Author	SHA1	Message	Date
Ethan Paul	0bb654c2e2	Merge pull request #7 from enpaul/enp/ci Update CI to always use python3.10 for metaenv	2024-03-21 18:33:05 -04:00
Ethan Paul	8e621138e9	Update CI to always use python3.10 for metaenv	2024-03-21 18:28:27 -04:00
Ethan Paul	de4ff0031f	Update changelog for version 0.1.3	2023-04-03 12:01:05 -04:00
Ethan Paul	50a5481108	Bump version to 0.1.3	2023-04-03 11:58:58 -04:00
Ethan Paul	3586a4e277	Merge pull request #3 from brycelowe/fix/password-binary-encoding fix: remove encoding from password file reads	2023-04-03 11:57:12 -04:00
Bryce Lowe	45ab9addb3	fix: remove encoding from password file reads The password files are opened in binary mode so an encoding argument isn't necessary and causes the script to crash. Fixes #2	2023-04-01 21:50:40 -07:00
Ethan Paul	90e4a32753	Merge pull request #4 from enpaul/enp/ci Fix CI	2023-03-29 20:17:40 -04:00
Ethan Paul	3dc062c849	Update markdown to new GFM formatting spec	2023-03-29 20:11:23 -04:00
Ethan Paul	fdad46a945	Remove deprecated option from pylintrc	2023-03-29 20:06:38 -04:00
Ethan Paul	96bd80db6e	Fix bug with poetry installation on python3.7 Remove python3.6 compatibility	2023-03-29 20:06:36 -04:00
Ethan Paul	fcaac8ca43	Add workaround for export plugin issue	2023-03-29 19:03:47 -04:00
Ethan Paul	9c6486ce55	Update poetry CI version to 1.4.1 Update CI env script to move poetry version to main CI workflow config	2023-03-29 19:03:01 -04:00
Ethan Paul	c3fe7bdef9	Remove python 3.6 CI tests	2023-03-29 18:46:29 -04:00
Ethan Paul	98d1bf3e8e	Update repo automation to use new command syntax Update CI to use Poetry 1.3 Update workflows to match patterns from tox-poetry-installer Update workflows to use newer job versions Update makefile and CI to use Poetry 1.3 command syntax	2023-03-02 17:03:20 -05:00
Ethan Paul	d11af1658d	Update to use poetry dev groups	2023-03-02 16:59:44 -05:00
Ethan Paul	29243223fe	Update dev environment to use Python-3.10 for all dependencies	2023-03-02 16:19:11 -05:00
Ethan Paul	a98dd16358	Add parallel flag to tox call in makefile	2022-05-07 18:51:34 -04:00
Ethan Paul	226c717684	Update classifiers for beta status	2022-05-07 18:46:25 -04:00
Ethan Paul	b55af77051	Update changelog with version 0.1.2	2022-05-07 18:45:37 -04:00
Ethan Paul	4550a73404	Merge pull request #1 from enpaul/enp/docs Add documentation	2022-05-07 18:43:33 -04:00
Ethan Paul	bdb62993a2	Bump patch version	2022-05-07 18:37:21 -04:00
Ethan Paul	3f6f5cf7e0	Add documentation for the tools usage	2022-05-07 18:36:54 -04:00
Ethan Paul	2f75180623	Fix styling of introduction docs	2022-05-07 12:27:29 -04:00
Ethan Paul	c729414b03	Document main logic function for processing filepaths Fix docstring on confirm function	2022-05-07 12:26:56 -04:00
Ethan Paul	ba6b71687e	Fix typos in readme	2022-04-22 17:10:48 -04:00
Ethan Paul	d61d2cb1a1	Update documentation for 0.1.1 Update changelog with 0.1.1	2022-04-21 15:29:49 -04:00
Ethan Paul	c7c2a87ebb	Bump patch version	2022-04-21 15:29:49 -04:00
Ethan Paul	8e9df58f43	Fix replacement bug when the same vaulted block appears twice in a file	2022-04-21 15:29:48 -04:00
Ethan Paul	9943dd112c	Update internal logic Reduce duplication in password loading logic Reorder internal functions to improve logical grouping Improve logging clarity Add better handling of decryption errors	2022-04-21 15:17:29 -04:00