From a3501eaa41208b61437e18699fd94451eb5fe1b9 Mon Sep 17 00:00:00 2001 From: aidan garske Date: Tue, 23 Jun 2026 18:15:54 -0700 Subject: [PATCH] Add nightly OpenSSL command performance regression testing --- .github/workflows/README.md | 69 ++++ .github/workflows/perf-regression.yml | 98 ++++++ .github/workflows/pr-osp-select.yml | 4 + .gitignore | 2 + scripts/perf_test/clean-perf-test.sh | 29 ++ scripts/perf_test/do-perf-tests.sh | 73 ++++ scripts/perf_test/perf-baseline.fips.json | 13 + scripts/perf_test/perf-baseline.nonfips.json | 13 + scripts/perf_test/perf-cmd-test.sh | 346 +++++++++++++++++++ 9 files changed, 647 insertions(+) create mode 100644 .github/workflows/perf-regression.yml create mode 100755 scripts/perf_test/clean-perf-test.sh create mode 100755 scripts/perf_test/do-perf-tests.sh create mode 100644 scripts/perf_test/perf-baseline.fips.json create mode 100644 scripts/perf_test/perf-baseline.nonfips.json create mode 100755 scripts/perf_test/perf-cmd-test.sh diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 0c21f92b..2c6700a9 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -263,6 +263,75 @@ The scan-build and infer thresholds are baseline-based, not strict — they let pre-existing issues slide but flag obvious regressions. Bringing them to 0 is a future cleanup. +## Performance regression testing + +`perf-regression.yml` runs nightly at 07:00 UTC (and on +`workflow_dispatch`). Customers run scripts that fire many `openssl` +commands in a row, and each invocation is a fresh process paying a full +wolfProvider init (plus, in FIPS builds, the per-algorithm CAST on first +use). This job guards the per-invocation cost of that path so a repeat of +the DH-CAST init blow-up gets caught automatically. + +**This is an overhead regression tripwire, not a crypto throughput +benchmark, and not a wolfProvider-vs-OpenSSL speed comparison.** It only +asks one question: did per-command load/init overhead grow versus the +committed baseline? A loadable provider inherently pays process-startup +cost the built-in default provider does not, so the measured `overhead` +is expected to sit above 1.0 — that is not a defect and not a crypto-speed +result. + +`scripts/perf_test/do-perf-tests.sh` times a small set of representative +commands — a near-no-op init probe (`list -providers`, `version`) plus +real verbs (`dgst`, `enc`, `genpkey` RSA/EC, `pkeyutl` sign, DH derive) — +taking the **minimum** of N runs to cut runner noise. Each command is +timed under both the OpenSSL default provider and wolfProvider; the +default provider serves **only as a per-run baseline to cancel +runner-speed variance**, and the `overhead` factor (wolfProvider ÷ +baseline) is checked against a committed budget +(`scripts/perf_test/perf-baseline.{nonfips,fips}.json`). The init probes +are gated on absolute ms. The job fails only when a command exceeds its +budget (× tolerance) — i.e. when overhead *regresses*, never for being +above 1.0. + +To keep the nightly from going red on a single noisy measurement, a +command that fails the gate is measured up to `PERF_CONFIRM` times total +(default 3) and only reported as a regression if it fails **every** +attempt — one passing round clears it as a fluke. This is on top of each +measurement already taking the minimum of N runs. A command that exits +non-zero is reported as an error (not a silent pass), so a broken or +removed capability fails the job instead of looking fast. + +There are two job variants. **non-FIPS** tracks general init/load +overhead. **FIPS** is the one that actually guards the CAST class — the +FIPS CAST code is compiled out of non-FIPS builds, so only the FIPS row +exercises the DH-derive CAST that originally regressed. + +It runs nightly on its own cron, and can be pulled into a PR on demand by +adding the `ci:perf` label (via `pr-osp-select.yml`, same as the OSP jobs). + +Run it locally: + +```sh +# non-FIPS +source scripts/env-setup +./scripts/perf_test/do-perf-tests.sh + +# FIPS - export before sourcing so env-setup selects provider-fips.conf +export WOLFSSL_ISFIPS=1 +source scripts/env-setup +./scripts/perf_test/do-perf-tests.sh +``` + +Timing uses GNU `date +%s.%N`, so local runs need GNU coreutils (the +script errors out early on BSD/macOS `date`). CI runs on Linux. + +The committed baselines are generous seeds — regenerate them on a stable +runner once and commit the result to tighten the gate: + +```sh +./scripts/perf_test/do-perf-tests.sh --update-baseline +``` + ## Triggering manually Every nightly-capable workflow also has `workflow_dispatch:` so you diff --git a/.github/workflows/perf-regression.yml b/.github/workflows/perf-regression.yml new file mode 100644 index 00000000..f5124609 --- /dev/null +++ b/.github/workflows/perf-regression.yml @@ -0,0 +1,98 @@ +name: Performance Regression + +on: + schedule: + - cron: '0 7 * * *' + workflow_dispatch: + workflow_call: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + discover_versions: + uses: ./.github/workflows/_discover-versions.yml + + perf_nonfips: + needs: discover_versions + name: Perf regression (non-FIPS) + runs-on: ubuntu-22.04 + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + openssl_ref: + - master + - ${{ needs.discover_versions.outputs.openssl_latest_ref }} + wolfssl_ref: ${{ fromJson(needs.discover_versions.outputs.wolfssl_latest_ref_array) }} + steps: + - name: Checkout wolfProvider + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Build wolfProvider + run: | + OPENSSL_TAG=${{ matrix.openssl_ref }} WOLFSSL_TAG=${{ matrix.wolfssl_ref }} ./scripts/build-wolfprovider.sh + + - name: Run perf regression + run: | + source scripts/env-setup + OPENSSL_TAG=${{ matrix.openssl_ref }} WOLFSSL_TAG=${{ matrix.wolfssl_ref }} ./scripts/perf_test/do-perf-tests.sh + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: perf-results-nonfips-${{ matrix.wolfssl_ref }}-${{ matrix.openssl_ref }} + path: perf_outputs/results.json + retention-days: 7 + + perf_fips: + needs: discover_versions + name: Perf regression (FIPS) + runs-on: ubuntu-22.04 + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + wolfssl_bundle_ref: [ '5.8.2' ] + openssl_ref: ${{ fromJson(needs.discover_versions.outputs.openssl_latest_ref_array) }} + steps: + - name: Checkout wolfProvider + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Download FIPS Ready Bundle + run: | + BUNDLE_URL="https://www.wolfssl.com/wolfssl-${{ matrix.wolfssl_bundle_ref }}-gplv3-fips-ready.zip" + wget -O wolfssl-fips-ready.zip "$BUNDLE_URL" + unzip wolfssl-fips-ready.zip + BUNDLE_DIR=$(find . -maxdepth 1 -type d -name "*fips-ready*" | head -n 1) + if [ -z "$BUNDLE_DIR" ]; then + echo "ERROR: Could not find FIPS ready bundle directory after extraction" + ls -la + exit 1 + fi + echo "FIPS_BUNDLE_PATH=$(pwd)/$BUNDLE_DIR" >> $GITHUB_ENV + + - name: Build wolfProvider with FIPS Ready Bundle + run: | + OPENSSL_TAG=${{ matrix.openssl_ref }} ./scripts/build-wolfprovider.sh --fips-bundle="$FIPS_BUNDLE_PATH" \ + --fips-check=ready --wolfssl-ver=v${{ matrix.wolfssl_bundle_ref }}-stable + + - name: Run perf regression + run: | + export WOLFSSL_ISFIPS=1 + source scripts/env-setup + WOLFSSL_TAG=v${{ matrix.wolfssl_bundle_ref }}-stable OPENSSL_TAG=${{ matrix.openssl_ref }} ./scripts/perf_test/do-perf-tests.sh + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: perf-results-fips-${{ matrix.wolfssl_bundle_ref }}-${{ matrix.openssl_ref }} + path: perf_outputs/results.json + retention-days: 7 diff --git a/.github/workflows/pr-osp-select.yml b/.github/workflows/pr-osp-select.yml index b996bfa1..47ce06be 100644 --- a/.github/workflows/pr-osp-select.yml +++ b/.github/workflows/pr-osp-select.yml @@ -219,3 +219,7 @@ jobs: needs: select if: contains(needs.select.outputs.run, ' all ') || contains(needs.select.outputs.run, ' multi-compiler ') uses: ./.github/workflows/nightly-multi-compiler.yml + perf: + needs: select + if: contains(needs.select.outputs.run, ' all ') || contains(needs.select.outputs.run, ' perf ') + uses: ./.github/workflows/perf-regression.yml diff --git a/.gitignore b/.gitignore index 4d7f623e..b38f19ce 100644 --- a/.gitignore +++ b/.gitignore @@ -89,7 +89,9 @@ ecc_outputs hash_outputs req_outputs rsa_outputs +perf_outputs scripts/cmd_test/*.log +scripts/perf_test/*.log IDE/Android/android-ndk-r26b/ IDE/Android/openssl-source/ diff --git a/scripts/perf_test/clean-perf-test.sh b/scripts/perf_test/clean-perf-test.sh new file mode 100755 index 00000000..dd9da2f1 --- /dev/null +++ b/scripts/perf_test/clean-perf-test.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# +# Copyright (C) 2006-2025 wolfSSL Inc. +# +# This file is part of wolfProvider. +# +# wolfProvider is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# wolfProvider is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with wolfProvider. If not, see . + +if [ -z "${DO_CMD_TESTS:-}" ]; then + echo "This script is designed to be called from do-perf-tests.sh" + echo "Do not run this script directly - use do-perf-tests.sh instead" + exit 1 +fi + +clean_perf_test() { + rm -f "./scripts/perf_test/perf-test.log" + rm -rf "./perf_outputs" +} diff --git a/scripts/perf_test/do-perf-tests.sh b/scripts/perf_test/do-perf-tests.sh new file mode 100755 index 00000000..9f3b20af --- /dev/null +++ b/scripts/perf_test/do-perf-tests.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# do-perf-tests.sh +# Run the wolfProvider performance regression test. +# +# Copyright (C) 2006-2025 wolfSSL Inc. +# +# This file is part of wolfProvider. +# +# wolfProvider is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# wolfProvider is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with wolfProvider. If not, see . + +PERF_TEST_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +REPO_ROOT="$( cd "${PERF_TEST_DIR}/../.." &> /dev/null && pwd )" + +export DO_CMD_TESTS=1 + +show_help() { + cat << EOF +Usage: $0 [OPTIONS] + +Measure per-invocation cost of representative openssl commands under +wolfProvider and compare against the committed baseline for the active +build variant (FIPS vs non-FIPS, selected by WOLFSSL_ISFIPS). + +OPTIONS: + --help Show this help message + --update-baseline Regenerate the baseline JSON from this run instead of + gating against it + +ENVIRONMENT VARIABLES: + OPENSSL_BIN Path to OpenSSL binary (auto-detected if not set) + WOLFSSL_ISFIPS Set to 1 to select the FIPS baseline + PERF_ITER Measured iterations per command (default 15) + PERF_WARMUP Warmup iterations per command (default 3) + PERF_CONFIRM Total measurement attempts for a failing command before + it is reported as a regression (default 3) +EOF + exit 0 +} + +PASS_ARGS=() +while [[ $# -gt 0 ]]; do + case $1 in + --help|-h) + show_help + ;; + --update-baseline) + PASS_ARGS+=("$1") + shift + ;; + *) + echo "Unknown option: $1" + echo "Use --help for usage information" + exit 1 + ;; + esac +done + +source "${REPO_ROOT}/scripts/cmd_test/cmd-test-common.sh" +cmd_test_env_setup + +"${PERF_TEST_DIR}/perf-cmd-test.sh" "${PASS_ARGS[@]}" +exit $? diff --git a/scripts/perf_test/perf-baseline.fips.json b/scripts/perf_test/perf-baseline.fips.json new file mode 100644 index 00000000..272da0d8 --- /dev/null +++ b/scripts/perf_test/perf-baseline.fips.json @@ -0,0 +1,13 @@ +{ + "tolerance": 0.25, + "commands": { + "init-probe": { "abs_ms_max": 15 }, + "version": { "abs_ms_max": 15 }, + "dgst-sha256": { "ratio_max": 1.8 }, + "enc-aes": { "ratio_max": 4.9 }, + "genpkey-rsa": { "ratio_max": 2.8 }, + "genpkey-ec": { "ratio_max": 3.1 }, + "pkeyutl-rsa": { "ratio_max": 14.2 }, + "dh-derive": { "ratio_max": 16.4 } + } +} diff --git a/scripts/perf_test/perf-baseline.nonfips.json b/scripts/perf_test/perf-baseline.nonfips.json new file mode 100644 index 00000000..67573586 --- /dev/null +++ b/scripts/perf_test/perf-baseline.nonfips.json @@ -0,0 +1,13 @@ +{ + "tolerance": 0.25, + "commands": { + "init-probe": { "abs_ms_max": 15 }, + "version": { "abs_ms_max": 15 }, + "dgst-sha256": { "ratio_max": 1.6 }, + "enc-aes": { "ratio_max": 1.5 }, + "genpkey-rsa": { "ratio_max": 1.8 }, + "genpkey-ec": { "ratio_max": 1.3 }, + "pkeyutl-rsa": { "ratio_max": 1.2 }, + "dh-derive": { "ratio_max": 1.5 } + } +} diff --git a/scripts/perf_test/perf-cmd-test.sh b/scripts/perf_test/perf-cmd-test.sh new file mode 100755 index 00000000..3c6c79f3 --- /dev/null +++ b/scripts/perf_test/perf-cmd-test.sh @@ -0,0 +1,346 @@ +#!/bin/bash +# perf-cmd-test.sh +# Measure per-invocation init/run cost of representative openssl commands under +# wolfProvider and gate against a committed baseline. +# +# Copyright (C) 2006-2025 wolfSSL Inc. +# +# This file is part of wolfProvider. +# +# wolfProvider is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# wolfProvider is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with wolfProvider. If not, see . + +# Own variable: cmd-test-common.sh reassigns CMD_TEST_DIR to its own location. +PERF_TEST_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +source "${PERF_TEST_DIR}/../cmd_test/cmd-test-common.sh" +source "${PERF_TEST_DIR}/clean-perf-test.sh" + +if [ -z "${DO_CMD_TESTS:-}" ]; then + echo "This script is designed to be called from do-perf-tests.sh" + echo "Do not run this script directly - use do-perf-tests.sh instead" + exit 1 +fi + +for tool in jq awk; do + if ! command -v "$tool" >/dev/null 2>&1; then + echo "ERROR: required tool '$tool' not found" + exit 1 + fi +done + +# Timing needs GNU date's nanoseconds; BSD/macOS date yields a literal 'N'. +if [ "$(date +%N)" = "N" ]; then + echo "ERROR: GNU 'date' with %N support is required (BSD/macOS date lacks it)" + exit 1 +fi + +UPDATE_BASELINE=0 +if [ "${1:-}" = "--update-baseline" ] || [ "${PERF_UPDATE_BASELINE:-0}" = "1" ]; then + UPDATE_BASELINE=1 +fi + +PERF_ITER="${PERF_ITER:-15}" +PERF_WARMUP="${PERF_WARMUP:-3}" +# Total measurement attempts for a failing command; it is only reported as a +# regression if it fails the gate on every attempt (guards against flukes). +PERF_CONFIRM="${PERF_CONFIRM:-3}" +# Headroom added above the just-measured value when writing a fresh baseline. +PERF_MARGIN="${PERF_MARGIN:-0.30}" + +if [ "${WOLFSSL_ISFIPS:-0}" = "1" ]; then + BASELINE="${PERF_TEST_DIR}/perf-baseline.fips.json" + VARIANT="fips" +else + BASELINE="${PERF_TEST_DIR}/perf-baseline.nonfips.json" + VARIANT="nonfips" +fi + +clean_perf_test +# cmd_test_init would put the log under scripts/cmd_test; keep it with this suite. +LOG_FILE="${PERF_TEST_DIR}/perf-test.log" +touch "$LOG_FILE" +exec > >(tee -a "$LOG_FILE") 2>&1 + +OUTDIR="perf_outputs" +mkdir -p "$OUTDIR" +IN="$OUTDIR/input.bin" +# pkeyutl -sign does raw (unhashed) RSA signing, capped well under the modulus +# size, so it needs a digest-sized input, not the bulk IN blob. +SIGIN="$OUTDIR/sign_input.bin" + +# Commands measured, in display order, and the gate each is checked against: +# 'abs' commands are near no-ops (gated on absolute ms, no meaningful ratio); +# 'ratio' commands are gated on wolfProvider time relative to the default provider. +CMDS=(init-probe version dgst-sha256 enc-aes genpkey-rsa genpkey-ec pkeyutl-rsa dh-derive) +declare -A GATE=( + [init-probe]=abs + [version]=abs + [dgst-sha256]=ratio + [enc-aes]=ratio + [genpkey-rsa]=ratio + [genpkey-ec]=ratio + [pkeyutl-rsa]=ratio + [dh-derive]=ratio +) + +exec_cmd() { + case "$1" in + init-probe) "$OPENSSL_BIN" list -providers ;; + version) "$OPENSSL_BIN" version ;; + dgst-sha256) "$OPENSSL_BIN" dgst -sha256 "$IN" ;; + enc-aes) "$OPENSSL_BIN" enc -aes-256-cbc -pbkdf2 -k testpass -in "$IN" -out "$OUTDIR/enc.bin" ;; + genpkey-rsa) "$OPENSSL_BIN" genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 -out "$OUTDIR/rsa_tmp.pem" ;; + genpkey-ec) "$OPENSSL_BIN" genpkey -algorithm EC -pkeyopt ec_paramgen_curve:prime256v1 -out "$OUTDIR/ec_tmp.pem" ;; + pkeyutl-rsa) "$OPENSSL_BIN" pkeyutl -sign -inkey "$OUTDIR/rsa.pem" -in "$SIGIN" -out "$OUTDIR/rsa_sig.bin" ;; + dh-derive) "$OPENSSL_BIN" pkeyutl -derive -inkey "$OUTDIR/dh1.pem" -peerkey "$OUTDIR/dh2_pub.pem" -out "$OUTDIR/dh_secret.bin" ;; + *) return 1 ;; + esac +} + +gen_or_die() { + if ! "$@" >/dev/null 2>&1; then + echo "ERROR: setup step failed: $*" + exit 1 + fi +} + +# Generate all inputs/keys under the default provider so the measured +# wolfProvider runs never include setup cost. A setup failure is fatal - a +# missing key would otherwise make the dependent command fail on every run. +generate_inputs() { + use_default_provider + gen_or_die dd if=/dev/urandom of="$IN" bs=4096 count=1 + gen_or_die dd if=/dev/urandom of="$SIGIN" bs=32 count=1 + gen_or_die "$OPENSSL_BIN" genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 -out "$OUTDIR/rsa.pem" + gen_or_die "$OPENSSL_BIN" genpkey -algorithm DH -pkeyopt group:ffdhe2048 -out "$OUTDIR/dh1.pem" + gen_or_die "$OPENSSL_BIN" genpkey -algorithm DH -pkeyopt group:ffdhe2048 -out "$OUTDIR/dh2.pem" + gen_or_die "$OPENSSL_BIN" pkey -in "$OUTDIR/dh2.pem" -pubout -out "$OUTDIR/dh2_pub.pem" +} + +write_baseline() { + local obj name r w gate cmds="{}" all + all=$(printf '%s\n' "${RESULTS[@]}" | jq -s '.') + for name in "${CMDS[@]}"; do + r=$(jq -r --arg c "$name" '.[] | select(.name==$c) | .ratio' <<< "$all") + w=$(jq -r --arg c "$name" '.[] | select(.name==$c) | .wolf_ms' <<< "$all") + gate="${GATE[$name]}" + if [ "$gate" = "abs" ]; then + obj=$(jq -nc --argjson w "$w" --argjson m "$PERF_MARGIN" '{abs_ms_max: (($w*(1+$m))*100|round/100)}') + else + obj=$(jq -nc --argjson r "$r" --argjson m "$PERF_MARGIN" '{ratio_max: (($r*(1+$m))*100|round/100)}') + fi + cmds=$(jq -nc --argjson c "$cmds" --arg n "$name" --argjson o "$obj" '$c + {($n): $o}') + done + jq -n --argjson tol "$TOL" --argjson cmds "$cmds" \ + '{tolerance: $tol, commands: $cmds}' > "$BASELINE" +} + +# Minimum wall time in ms over PERF_ITER runs after PERF_WARMUP discarded runs. +measure() { + local name=$1 + local i start end dur best="" + for ((i=0; i/dev/null 2>&1 + done + for ((i=0; i/dev/null 2>&1 + end=$(date +%s.%N) + dur=$(awk -v s="$start" -v e="$end" 'BEGIN{printf "%.3f", (e-s)*1000}') + best=$(awk -v d="$dur" -v b="$best" 'BEGIN{ if (b=="" || d+0/dev/null 2>&1 + if ! exec_cmd "$name" >/dev/null 2>&1; then + echo " [$name] ERROR: command failed under the default provider" + cmd_error=1 + fi + default_ms=$(measure "$name") + fi + use_wolf_provider >/dev/null 2>&1 + if ! exec_cmd "$name" >/dev/null 2>&1; then + echo " [$name] ERROR: command failed under wolfProvider" + cmd_error=1 + fi + wolf_ms=$(measure "$name") + ratio=$(awk -v w="$wolf_ms" -v d="$default_ms" 'BEGIN{ if (d+0>0) printf "%.3f", w/d; else printf "0" }') +} + +# Apply the baseline gate to the current wolf_ms / ratio, setting verdict / limit. +gate_check() { + local name=$1 ratio_max abs_ms_max + if [ "${cmd_error:-0}" = "1" ]; then + verdict="ERROR" + limit="-" + return + fi + ratio_max=$(jq -r --arg c "$name" '.commands[$c].ratio_max // empty' "$BASELINE" 2>/dev/null) + abs_ms_max=$(jq -r --arg c "$name" '.commands[$c].abs_ms_max // empty' "$BASELINE" 2>/dev/null) + verdict="INFO" + limit="-" + if [ "${GATE[$name]}" = "abs" ] && [ -n "$abs_ms_max" ]; then + limit="${abs_ms_max}ms" + if awk -v v="$wolf_ms" -v m="$abs_ms_max" -v t="$TOL" 'BEGIN{exit !(v+0 > m*(1+t))}'; then + verdict="FAIL" + else + verdict="PASS" + fi + elif [ "${GATE[$name]}" = "ratio" ] && [ -n "$ratio_max" ] && [ "$CAN_COMPARE" = "1" ]; then + limit="${ratio_max}x" + if awk -v v="$ratio" -v m="$ratio_max" -v t="$TOL" 'BEGIN{exit !(v+0 > m*(1+t))}'; then + verdict="FAIL" + else + verdict="PASS" + fi + fi +} + +echo "==========================================" +echo "wolfProvider per-command init-overhead check" +echo "==========================================" +echo "Regression tripwire for provider load/init overhead - NOT a crypto" +echo "throughput benchmark. The OpenSSL default provider is used only as a" +echo "per-run baseline to cancel runner-speed noise; an overhead above 1.0 is" +echo "expected because a loadable provider pays startup cost the built-in" +echo "default does not. This check only flags growth beyond the committed budget." +echo "" +echo "Variant: $VARIANT" +echo "Baseline: $BASELINE" +echo "Iterations: $PERF_ITER (warmup $PERF_WARMUP), confirm fails x$PERF_CONFIRM" +echo "" + +if [ "$UPDATE_BASELINE" = "0" ] && [ ! -f "$BASELINE" ]; then + echo "ERROR: baseline $BASELINE not found (run with --update-baseline to create it)" + exit 1 +fi + +TOL=0.25 +if [ -f "$BASELINE" ]; then + TOL=$(jq -r '.tolerance // 0.25' "$BASELINE") +fi + +generate_inputs + +CAN_COMPARE=1 +if ! can_compare_providers; then + CAN_COMPARE=0 + echo "INFO: replace-default mode - ratio gates skipped, measuring wolfProvider only" +fi + +# Regenerating ratio baselines needs the default provider to compare against; +# in replace-default mode every ratio would be 0 and poison the baseline. +if [ "$UPDATE_BASELINE" = "1" ] && [ "$CAN_COMPARE" = "0" ]; then + echo "ERROR: --update-baseline requires normal mode (cannot compare against the default provider in replace-default mode)" + exit 1 +fi + +FAIL=0 +UPDATE_FAILED=0 +RESULTS=() +printf "%-14s %12s %12s %9s %9s %s\n" "command" "base_ms" "wolfprov_ms" "overhead" "budget" "verdict" +printf -- "------------------------------------------------------------------------\n" + +for name in "${CMDS[@]}"; do + measure_pair "$name" + + verdict="INFO" + limit="-" + attempts=1 + if [ "$UPDATE_BASELINE" = "1" ]; then + if [ "${cmd_error:-0}" = "1" ]; then + echo " [$name] command failed - refusing to baseline a failing command" + UPDATE_FAILED=1 + fi + else + gate_check "$name" + # Only a failing command is re-measured. It must fail every attempt to + # be reported - a single passing round means the first FAIL was a fluke. + while [ "$verdict" = "FAIL" ] && [ "$attempts" -lt "$PERF_CONFIRM" ]; do + attempts=$((attempts + 1)) + echo " [$name] gate failed, confirming (attempt $attempts/$PERF_CONFIRM)..." + measure_pair "$name" + gate_check "$name" + done + if [ "$verdict" = "FAIL" ]; then + FAIL=1 + echo " [$name] regression confirmed over $attempts attempts" + elif [ "$verdict" = "ERROR" ]; then + FAIL=1 + echo " [$name] command did not run successfully - cannot measure" + fi + fi + + printf "%-14s %12s %12s %9s %9s %s\n" "$name" "$default_ms" "$wolf_ms" "$ratio" "$limit" "$verdict" + + RESULTS+=("$(jq -nc \ + --arg n "$name" --arg g "${GATE[$name]}" \ + --argjson d "$default_ms" --argjson w "$wolf_ms" --argjson r "$ratio" \ + --argjson a "$attempts" --arg v "$verdict" \ + '{name:$n, gate:$g, default_ms:$d, wolf_ms:$w, ratio:$r, attempts:$a, verdict:$v}')") +done + +printf -- "------------------------------------------------------------------------\n" + +RESULTS_JSON="$OUTDIR/results.json" +printf '%s\n' "${RESULTS[@]}" | jq -s \ + --arg variant "$VARIANT" \ + --arg when "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + --arg wolfssl "${WOLFSSL_TAG:-unknown}" \ + --arg openssl "${OPENSSL_TAG:-unknown}" \ + '{variant:$variant, generated:$when, wolfssl_ref:$wolfssl, openssl_ref:$openssl, results:.}' \ + > "$RESULTS_JSON" +echo "Results written to $RESULTS_JSON" + +if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then + { + echo "### wolfProvider per-command init-overhead check ($VARIANT)" + echo "" + echo "Regression tripwire for provider load/init overhead - **not** a crypto" + echo "throughput benchmark. The OpenSSL default provider is a per-run baseline" + echo "to cancel runner-speed noise; \`overhead\` above 1.0 is expected (a loadable" + echo "provider pays startup cost the built-in default does not). Only growth" + echo "beyond \`budget\` fails the check." + echo "" + echo "| command | base_ms | wolfprov_ms | overhead | budget | verdict |" + echo "|---|---|---|---|---|---|" + jq -r '.results[] | "| \(.name) | \(.default_ms) | \(.wolf_ms) | \(.ratio) | \(.gate) | \(.verdict) |"' "$RESULTS_JSON" + } >> "$GITHUB_STEP_SUMMARY" +fi + +if [ "$UPDATE_BASELINE" = "1" ]; then + if [ "$UPDATE_FAILED" -ne 0 ]; then + echo "ERROR: refusing to write baseline - one or more commands failed to run" + exit 1 + fi + write_baseline + echo "Baseline written to $BASELINE" + exit 0 +fi + +if [ "$FAIL" -ne 0 ]; then + echo "=== Init-overhead regression detected (exceeded budget) ===" + exit 1 +fi +echo "=== All commands within overhead budget ===" +exit 0