From 93dcf9efe0bf28ec0cc2a1d576f76fcdda5d92af Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 17 Jun 2026 17:33:20 +0000 Subject: [PATCH 1/8] Add AI-powered plain-language to Azure Terraform translator A small app that converts plain-English infrastructure descriptions into Terraform (HCL) for Azure using the azurerm provider, powered by Claude. - translator.py: core logic (streaming + adaptive thinking) - app.py: Flask web UI with /api/translate endpoint - cli.py: command-line interface (args or stdin) - templates/index.html: single-page UI with examples and copy-to-clipboard - README + requirements Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01BAWVgYJc7xKU8iWuZJgEVk --- terraform-translator/.gitignore | 4 + terraform-translator/README.md | 69 +++++++++++++ terraform-translator/app.py | 38 ++++++++ terraform-translator/cli.py | 36 +++++++ terraform-translator/requirements.txt | 2 + terraform-translator/templates/index.html | 114 ++++++++++++++++++++++ terraform-translator/translator.py | 87 +++++++++++++++++ 7 files changed, 350 insertions(+) create mode 100644 terraform-translator/.gitignore create mode 100644 terraform-translator/README.md create mode 100644 terraform-translator/app.py create mode 100644 terraform-translator/cli.py create mode 100644 terraform-translator/requirements.txt create mode 100644 terraform-translator/templates/index.html create mode 100644 terraform-translator/translator.py diff --git a/terraform-translator/.gitignore b/terraform-translator/.gitignore new file mode 100644 index 0000000..bc4f72a --- /dev/null +++ b/terraform-translator/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +.venv/ +.env diff --git a/terraform-translator/README.md b/terraform-translator/README.md new file mode 100644 index 0000000..561dec6 --- /dev/null +++ b/terraform-translator/README.md @@ -0,0 +1,69 @@ +# Plain Language → Azure Terraform + +A small AI-powered app that turns a plain-English description of Azure +infrastructure into [Terraform](https://www.terraform.io/) (HCL) using the +official `azurerm` provider. It is powered by Anthropic's Claude (Opus 4.8). + +You can use it two ways: + +- **Web app** — a single-page UI (Flask) +- **CLI** — pipe a description in and get `main.tf` on stdout + +> ⚠️ The generated Terraform is a starting point. Always review it (and run +> `terraform validate` / `terraform plan`) before applying to a real subscription. + +## Setup + +```bash +cd terraform-translator +python -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt +export ANTHROPIC_API_KEY=sk-ant-... # your Anthropic API key +``` + +## Web app + +```bash +python app.py +# open http://127.0.0.1:5000 +``` + +Type something like: + +> A Linux web app on a B1 plan with a PostgreSQL flexible server, both in West Europe + +…and click **Generate Terraform**. Use the **Copy** button to grab the HCL. + +## CLI + +```bash +python cli.py "a storage account with a private blob container" + +# or read the description from stdin +echo "an AKS cluster with 3 nodes and a container registry" | python cli.py +``` + +## How it works + +`translator.py` sends your description to Claude with a system prompt that makes +it act as an Azure + Terraform expert. It uses: + +- **Model:** `claude-opus-4-8` +- **Streaming** via `client.messages.stream(...)` so long outputs don't hit the + SDK's HTTP timeout (the final message is read with `get_final_message()`). +- **Adaptive thinking** (`thinking={"type": "adaptive"}`) so the model can reason + about which Azure resources to use before emitting code. +- **`effort: "high"`** for better code quality. + +The model is instructed to return only HCL; any surrounding Markdown code fence +is stripped before display. + +## Files + +| File | Purpose | +| ---------------------- | ------------------------------------------------ | +| `translator.py` | Core logic — calls the Anthropic API | +| `app.py` | Flask web server (`/` and `/api/translate`) | +| `cli.py` | Command-line entry point | +| `templates/index.html` | Web UI | +| `requirements.txt` | Python dependencies | diff --git a/terraform-translator/app.py b/terraform-translator/app.py new file mode 100644 index 0000000..8d5ad7c --- /dev/null +++ b/terraform-translator/app.py @@ -0,0 +1,38 @@ +"""Minimal Flask web app for translating plain language into Azure Terraform. + +Run locally: + export ANTHROPIC_API_KEY=sk-ant-... + pip install -r requirements.txt + python app.py + # open http://127.0.0.1:5000 +""" + +from __future__ import annotations + +from flask import Flask, jsonify, render_template, request + +from translator import TranslationError, translate + +app = Flask(__name__) + + +@app.get("/") +def index(): + return render_template("index.html") + + +@app.post("/api/translate") +def api_translate(): + data = request.get_json(silent=True) or {} + prompt = data.get("prompt", "") + try: + terraform = translate(prompt) + except TranslationError as exc: + return jsonify(error=str(exc)), 400 + except Exception as exc: # surface unexpected SDK/network errors to the UI + return jsonify(error=f"Unexpected error: {exc}"), 500 + return jsonify(terraform=terraform) + + +if __name__ == "__main__": + app.run(debug=True) diff --git a/terraform-translator/cli.py b/terraform-translator/cli.py new file mode 100644 index 0000000..8e4afb9 --- /dev/null +++ b/terraform-translator/cli.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +"""Command-line interface for the plain-language -> Azure Terraform translator. + +Usage: + python cli.py "a linux web app with a postgres flexible server database" + echo "a storage account with a private blob container" | python cli.py + +Requires the ANTHROPIC_API_KEY environment variable to be set. +""" + +from __future__ import annotations + +import sys + +from translator import TranslationError, translate + + +def main(argv: list[str]) -> int: + if len(argv) > 1: + prompt = " ".join(argv[1:]) + elif not sys.stdin.isatty(): + prompt = sys.stdin.read() + else: + print(__doc__.strip(), file=sys.stderr) + return 2 + + try: + print(translate(prompt)) + except TranslationError as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/terraform-translator/requirements.txt b/terraform-translator/requirements.txt new file mode 100644 index 0000000..83e656f --- /dev/null +++ b/terraform-translator/requirements.txt @@ -0,0 +1,2 @@ +anthropic>=0.69.0 +flask>=3.0.0 diff --git a/terraform-translator/templates/index.html b/terraform-translator/templates/index.html new file mode 100644 index 0000000..bdec33b --- /dev/null +++ b/terraform-translator/templates/index.html @@ -0,0 +1,114 @@ + + + + + + Plain Language → Azure Terraform + + + +

Plain Language → Azure Terraform

+

Describe the Azure infrastructure you want and get Terraform (HCL) using the azurerm provider.

+ + + +
+ Try: + + + +
+ +
+ + +
+ + + + + + diff --git a/terraform-translator/translator.py b/terraform-translator/translator.py new file mode 100644 index 0000000..2b28d6b --- /dev/null +++ b/terraform-translator/translator.py @@ -0,0 +1,87 @@ +"""Core translation logic: plain language -> Azure Terraform (HCL). + +Uses the Anthropic Python SDK with Claude Opus 4.8. Streaming is used so that +long generations don't hit the SDK's HTTP timeout, and adaptive thinking lets +the model reason about the right Azure resources before emitting code. +""" + +from __future__ import annotations + +import re + +from anthropic import Anthropic + +MODEL = "claude-opus-4-8" + +SYSTEM_PROMPT = """\ +You are an expert cloud infrastructure engineer specializing in Microsoft Azure \ +and Terraform (HCL). Convert the user's plain-language infrastructure \ +description into production-quality Terraform configuration using the official \ +`azurerm` provider. + +Guidelines: +- Output valid, well-formatted HCL. +- Include a `terraform` block that pins a recent `azurerm` provider version, and \ +a `provider "azurerm"` block with `features {}`. +- Create a resource group unless the user explicitly says to use an existing one. +- Use descriptive, Azure-valid resource names and locations. +- Parameterize key values with `variable` blocks (with sensible defaults) and \ +expose useful values (IDs, endpoints, connection info) via `output` blocks. +- Apply Azure + Terraform best practices: tags, secure defaults, least-privilege. +- Add concise `#` comments explaining non-obvious choices. +- If the request is ambiguous, pick reasonable defaults and note them in comments. + +Return ONLY the Terraform code. Do not add any explanation before or after it.""" + + +class TranslationError(RuntimeError): + """Raised when the model cannot produce Terraform for the request.""" + + +def _strip_code_fences(text: str) -> str: + """Remove a surrounding ```hcl ... ``` (or ```) fence if the model added one.""" + fenced = re.match(r"^\s*```[a-zA-Z]*\n(.*?)\n```\s*$", text, re.DOTALL) + if fenced: + return fenced.group(1).strip() + return text.strip() + + +def translate(prompt: str, *, client: Anthropic | None = None) -> str: + """Translate a plain-language description into Azure Terraform HCL. + + Args: + prompt: Natural-language description of the desired Azure infrastructure. + client: Optional pre-configured Anthropic client. If omitted, a default + client is created (reads ANTHROPIC_API_KEY from the environment). + + Returns: + The generated Terraform configuration as a string. + """ + prompt = (prompt or "").strip() + if not prompt: + raise TranslationError("Please describe the infrastructure you want.") + + client = client or Anthropic() + + with client.messages.stream( + model=MODEL, + max_tokens=64000, + thinking={"type": "adaptive"}, + output_config={"effort": "high"}, + system=SYSTEM_PROMPT, + messages=[{"role": "user", "content": prompt}], + ) as stream: + message = stream.get_final_message() + + if message.stop_reason == "refusal": + raise TranslationError( + "The request was declined by the model's safety system." + ) + + text = "".join( + block.text for block in message.content if block.type == "text" + ) + code = _strip_code_fences(text) + if not code: + raise TranslationError("The model did not return any Terraform code.") + return code From f8b5017f53f0cc8d502723ba84d0ee903cad3dd6 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 17 Jun 2026 17:37:28 +0000 Subject: [PATCH 2/8] Add offline tests and testing docs for the translator - Lazy-import the Anthropic client so logic can be tested without the package or an API key installed - tests/test_translator.py: fake-client tests for fence stripping, empty-prompt and refusal handling, and the happy path - README: Testing section (offline unittest + real end-to-end check) Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01BAWVgYJc7xKU8iWuZJgEVk --- terraform-translator/README.md | 28 +++++++ terraform-translator/tests/test_translator.py | 75 +++++++++++++++++++ terraform-translator/translator.py | 9 ++- 3 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 terraform-translator/tests/test_translator.py diff --git a/terraform-translator/README.md b/terraform-translator/README.md index 561dec6..175f116 100644 --- a/terraform-translator/README.md +++ b/terraform-translator/README.md @@ -43,6 +43,34 @@ python cli.py "a storage account with a private blob container" echo "an AKS cluster with 3 nodes and a container registry" | python cli.py ``` +## Testing + +There are two levels. + +**Offline tests (no API key, no network).** These inject a fake client that +mimics the SDK, so they verify the parsing/error logic anywhere: + +```bash +python -m unittest discover -s tests -v # or: pytest +``` + +**Real end-to-end check (needs `ANTHROPIC_API_KEY`).** After `pip install -r +requirements.txt` and setting the key: + +```bash +# CLI +python cli.py "a resource group named demo-rg in West Europe" + +# Web app — start it, then send a request +python app.py & +curl -s localhost:5000/api/translate \ + -H 'Content-Type: application/json' \ + -d '{"prompt":"a storage account with a private blob container"}' +``` + +Then run `terraform fmt -check` / `terraform validate` on the output to confirm +it's syntactically valid HCL. + ## How it works `translator.py` sends your description to Claude with a system prompt that makes diff --git a/terraform-translator/tests/test_translator.py b/terraform-translator/tests/test_translator.py new file mode 100644 index 0000000..795fb30 --- /dev/null +++ b/terraform-translator/tests/test_translator.py @@ -0,0 +1,75 @@ +"""Offline tests for translator.py. + +These run without an ANTHROPIC_API_KEY and without the `anthropic` package, +by injecting a fake client that mimics the SDK's streaming surface. + + python -m unittest discover -s tests # or: pytest +""" + +import os +import sys +import unittest +from types import SimpleNamespace + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from translator import TranslationError, _strip_code_fences, translate + + +class _FakeStream: + """Stands in for the context manager returned by client.messages.stream(...).""" + + def __init__(self, message): + self._message = message + + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + def get_final_message(self): + return self._message + + +class _FakeClient: + """Minimal fake exposing client.messages.stream(...).""" + + def __init__(self, text, stop_reason="end_turn"): + message = SimpleNamespace( + stop_reason=stop_reason, + content=[SimpleNamespace(type="text", text=text)], + ) + self.messages = SimpleNamespace(stream=lambda **kwargs: _FakeStream(message)) + + +class StripFenceTests(unittest.TestCase): + def test_hcl_fence(self): + self.assertEqual(_strip_code_fences('```hcl\nresource "x" "y" {}\n```'), + 'resource "x" "y" {}') + + def test_bare_fence(self): + self.assertEqual(_strip_code_fences("```\nfoo\n```"), "foo") + + def test_no_fence(self): + self.assertEqual(_strip_code_fences(" plain "), "plain") + + +class TranslateTests(unittest.TestCase): + def test_returns_code_and_strips_fence(self): + client = _FakeClient('```hcl\nprovider "azurerm" {\n features {}\n}\n```') + out = translate("a resource group", client=client) + self.assertEqual(out, 'provider "azurerm" {\n features {}\n}') + + def test_empty_prompt_raises(self): + with self.assertRaises(TranslationError): + translate(" ", client=_FakeClient("ignored")) + + def test_refusal_raises(self): + client = _FakeClient("", stop_reason="refusal") + with self.assertRaises(TranslationError): + translate("something", client=client) + + +if __name__ == "__main__": + unittest.main() diff --git a/terraform-translator/translator.py b/terraform-translator/translator.py index 2b28d6b..783fd11 100644 --- a/terraform-translator/translator.py +++ b/terraform-translator/translator.py @@ -8,8 +8,10 @@ from __future__ import annotations import re +from typing import TYPE_CHECKING -from anthropic import Anthropic +if TYPE_CHECKING: # only needed for type checkers, not at runtime + from anthropic import Anthropic MODEL = "claude-opus-4-8" @@ -61,7 +63,10 @@ def translate(prompt: str, *, client: Anthropic | None = None) -> str: if not prompt: raise TranslationError("Please describe the infrastructure you want.") - client = client or Anthropic() + if client is None: + from anthropic import Anthropic # imported lazily so tests can inject a fake + + client = Anthropic() with client.messages.stream( model=MODEL, From 15c3626f04e6800e46f1853ed494fb841a0c9a61 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 17 Jun 2026 17:39:12 +0000 Subject: [PATCH 3/8] Add CI workflow for the terraform-translator - offline-tests job: runs the unittest suite on every push/PR (no secrets) - live-check job: gated on an ANTHROPIC_API_KEY secret; generates Terraform via the CLI and validates it with terraform fmt/init/validate, skipped cleanly when the secret is absent - ignore tf-check/ and Terraform state artifacts; document CI in the README Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01BAWVgYJc7xKU8iWuZJgEVk --- .github/workflows/terraform-translator.yml | 78 ++++++++++++++++++++++ terraform-translator/.gitignore | 6 ++ terraform-translator/README.md | 11 +++ 3 files changed, 95 insertions(+) create mode 100644 .github/workflows/terraform-translator.yml diff --git a/.github/workflows/terraform-translator.yml b/.github/workflows/terraform-translator.yml new file mode 100644 index 0000000..17dc747 --- /dev/null +++ b/.github/workflows/terraform-translator.yml @@ -0,0 +1,78 @@ +name: terraform-translator + +on: + push: + paths: + - "terraform-translator/**" + - ".github/workflows/terraform-translator.yml" + pull_request: + paths: + - "terraform-translator/**" + - ".github/workflows/terraform-translator.yml" + +defaults: + run: + working-directory: terraform-translator + +jobs: + # Always runs — needs no secrets. The translator lazily imports the Anthropic + # SDK, so the offline suite runs without dependencies or an API key. + offline-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Run offline unit tests + run: python -m unittest discover -s tests -v + + # Runs only when an ANTHROPIC_API_KEY repository secret is configured. + # Generates real Terraform via the CLI and validates it with the Terraform CLI. + live-check: + runs-on: ubuntu-latest + needs: offline-tests + steps: + - uses: actions/checkout@v4 + + - name: Detect API key + id: detect + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + if [ -n "$ANTHROPIC_API_KEY" ]; then + echo "present=true" >> "$GITHUB_OUTPUT" + else + echo "present=false" >> "$GITHUB_OUTPUT" + echo "::notice::No ANTHROPIC_API_KEY secret set — skipping live check." + fi + + - uses: actions/setup-python@v5 + if: steps.detect.outputs.present == 'true' + with: + python-version: "3.11" + + - name: Install dependencies + if: steps.detect.outputs.present == 'true' + run: pip install -r requirements.txt + + - name: Generate Terraform from a prompt + if: steps.detect.outputs.present == 'true' + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + mkdir -p tf-check + python cli.py "a resource group named demo-rg in West Europe with a storage account" > tf-check/main.tf + echo "----- generated main.tf -----" + cat tf-check/main.tf + + - uses: hashicorp/setup-terraform@v3 + if: steps.detect.outputs.present == 'true' + + - name: Validate generated HCL + if: steps.detect.outputs.present == 'true' + working-directory: terraform-translator/tf-check + run: | + terraform fmt -check + terraform init -backend=false + terraform validate diff --git a/terraform-translator/.gitignore b/terraform-translator/.gitignore index bc4f72a..b26f40d 100644 --- a/terraform-translator/.gitignore +++ b/terraform-translator/.gitignore @@ -2,3 +2,9 @@ __pycache__/ *.pyc .venv/ .env + +# Terraform / CI artifacts +tf-check/ +.terraform/ +*.tfstate +*.tfstate.* diff --git a/terraform-translator/README.md b/terraform-translator/README.md index 175f116..d2f23c1 100644 --- a/terraform-translator/README.md +++ b/terraform-translator/README.md @@ -71,6 +71,17 @@ curl -s localhost:5000/api/translate \ Then run `terraform fmt -check` / `terraform validate` on the output to confirm it's syntactically valid HCL. +### Continuous integration + +`.github/workflows/terraform-translator.yml` runs on every push/PR that touches +this directory: + +- **`offline-tests`** — always runs, no secrets required. +- **`live-check`** — runs only if an `ANTHROPIC_API_KEY` repository secret is + set (Settings → Secrets and variables → Actions). It generates Terraform via + the CLI and validates it with `terraform fmt`/`init`/`validate`. Without the + secret it is skipped, not failed. + ## How it works `translator.py` sends your description to Claude with a system prompt that makes From eb27cf87fcf37a89899412cb405f4fc2d24cb803 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 17 Jun 2026 18:16:06 +0000 Subject: [PATCH 4/8] Add hardened IaC Build app.py update bundle + deploy guide Drop-in replacement for the IaC-AIStudio app.py with auth/security hardening, plus security regression tests, GitLab CI, and a Cloud Shell deploy guide. Changes in app.py: - Pin the /api/generate model server-side (ignore client-supplied model) - OTP: 6-digit crypto codes, HMAC-stored, expiry + attempt lockout, single-use - Require a verified OTP before password reset and account creation - Rotate the session on login and OAuth callback (fixation) - Uniform recovery response (email enumeration); bump SESSION_VERSION 3->4 Internal infra/security docs were intentionally not committed. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01BAWVgYJc7xKU8iWuZJgEVk --- iac-aistudio-update/.gitlab-ci.yml | 19 + iac-aistudio-update/IMPLEMENTATION_GUIDE.md | 133 +++ iac-aistudio-update/README.md | 37 + iac-aistudio-update/app.py | 974 ++++++++++++++++++++ iac-aistudio-update/requirements-dev.txt | 15 + iac-aistudio-update/requirements.txt | 30 + iac-aistudio-update/tests/test_security.py | 119 +++ 7 files changed, 1327 insertions(+) create mode 100644 iac-aistudio-update/.gitlab-ci.yml create mode 100644 iac-aistudio-update/IMPLEMENTATION_GUIDE.md create mode 100644 iac-aistudio-update/README.md create mode 100644 iac-aistudio-update/app.py create mode 100644 iac-aistudio-update/requirements-dev.txt create mode 100644 iac-aistudio-update/requirements.txt create mode 100644 iac-aistudio-update/tests/test_security.py diff --git a/iac-aistudio-update/.gitlab-ci.yml b/iac-aistudio-update/.gitlab-ci.yml new file mode 100644 index 0000000..dba12d3 --- /dev/null +++ b/iac-aistudio-update/.gitlab-ci.yml @@ -0,0 +1,19 @@ +# GitLab CI — runs the security regression tests on every push/MR. +# No secrets required: the suite runs in LOCAL_DEV mode (SQLite, dummy secret). +stages: + - test + +security-tests: + stage: test + image: python:3.12-slim + variables: + LOCAL_DEV: "1" + FLASK_SECRET_KEY: "ci-test-secret" + before_script: + - pip install --no-cache-dir -r requirements-dev.txt + script: + # Run from the bundle root so `import app` and templates/ resolve. + - pytest -v + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - if: $CI_COMMIT_BRANCH diff --git a/iac-aistudio-update/IMPLEMENTATION_GUIDE.md b/iac-aistudio-update/IMPLEMENTATION_GUIDE.md new file mode 100644 index 0000000..37b876a --- /dev/null +++ b/iac-aistudio-update/IMPLEMENTATION_GUIDE.md @@ -0,0 +1,133 @@ +# IaC Build — security hardening update (deploy guide) + +This bundle contains a hardened `app.py` plus tests. It is a **drop-in replacement +for `app.py`** in your existing deploy bundle — nothing else in your bundle +(Dockerfile, templates, `landing.html`, `app.html`, `requirements.txt`) needs to +change, and **no new environment variables or Azure resources are required** (all +new settings have safe built-in defaults). + +> Scope note: this pass covers the changes that are pure code and low-risk to a +> live deploy. It deliberately does **not** change the model/cost profile, touch +> your SSE streaming, or add infrastructure. See "Still open" at the bottom. + +--- + +## 1. What changed (and why) + +All changes are in `app.py`. Diff it against your current file before deploying +(`diff -u old/app.py app.py`). + +| Area | Change | Security-plan item | +|------|--------|--------------------| +| **LLM model pin** | `/api/generate` now uses a **server-pinned** model (`LLM_MODEL`, default `claude-sonnet-4-6`). The client-supplied `model` field is ignored. | P1-5 | +| **OTP strength** | Codes are now **6-digit, cryptographically random** (`secrets`), stored as an **HMAC** (never plaintext), with **server-side expiry** (10 min) and **lockout** after 5 wrong attempts. Single-use. | P0-1 | +| **Password-reset authorization** | `/recover_reset` now **requires a verified OTP in the session**. Previously the reset step did not check that the code had been entered, so anyone who set `recover_email` (a plain POST to `/recover_lookup`) could POST `/recover_reset` and change another account's password. This is the most important fix here. | (gap found during review) | +| **Registration authorization** | `/profile` (account creation) likewise requires a verified OTP, so accounts can't be created for an unverified email by skipping the code step. | P0-1 related | +| **Session fixation** | `login()` and the OAuth callback now rotate the session (`session.clear()` then re-issue) via a shared `_login_user()` helper. | P1-3 | +| **Email enumeration** | `/recover_lookup` returns a **uniform** "if that account exists, a code has been sent" response whether or not the email exists. | P2 (upgrades) | +| **Session version** | `SESSION_VERSION` bumped `3 → 4`, so all pre-deploy sessions are invalidated once (everyone is logged out a single time after deploy). | — | + +New env vars (all **optional**, defaults shown): + +| Env var | Default | Purpose | +|---------|---------|---------| +| `LLM_MODEL` | `claude-sonnet-4-6` | The pinned generation model. | +| `LLM_MAX_TOKENS` | `8000` | Hard clamp on output tokens (unchanged value). | +| `LLM_DEFAULT_TOKENS` | `4000` | Default when the client omits `max_tokens` (unchanged value). | +| `OTP_TTL_SECONDS` | `600` | OTP validity window. | +| `OTP_MAX_ATTEMPTS` | `5` | Wrong attempts before lockout. | + +--- + +## 2. Test before you ship (optional but recommended) + +From the bundle root (where `app.py` and `templates/` are): + +```bash +pip install -r requirements-dev.txt +LOCAL_DEV=1 FLASK_SECRET_KEY=test-secret pytest -v +``` + +This exercises the OTP logic, the reset-authorization fix, and the model pin +with no Azure dependencies and no network calls. + +--- + +## 3. Deploy (Azure Cloud Shell) + +This follows your existing build/deploy pattern. Replace only `app.py` in the +bundle, then rebuild and roll the revision. + +```bash +# 0) Note the current image so you can roll back instantly if needed. +PREV=$(az containerapp show -n ca-iacb-web -g rg-iacb-lean \ + --query "properties.template.containers[0].image" -o tsv) +echo "rollback image: $PREV" + +# 1) Unzip your current deploy bundle (adjust the zip name/path to yours). +rm -rf ~/iacb && mkdir -p ~/iacb && unzip -o ~/your-deploy-bundle.zip -d ~/iacb + +# 2) Drop in the updated app.py (from this update bundle). +cp /path/to/iac-aistudio-update/app.py ~/iacb/app.py +cd ~/iacb + +# 3) Build the image in ACR (Dockerfile base must already exist in ACR: +# FROM acriacblean.azurecr.io/python:3.12-slim-bookworm). +TAG=$(date +%Y%m%d-%H%M) +az acr build --registry acriacblean --image iacb-web:$TAG . + +# 4) Roll the Container App to the new image. +az containerapp update -n ca-iacb-web -g rg-iacb-lean \ + --image "acriacblean.azurecr.io/iacb-web:$TAG" +``` + +Optional — override any of the new settings (not required; defaults are baked in): + +```bash +az containerapp update -n ca-iacb-web -g rg-iacb-lean \ + --set-env-vars LLM_MODEL=claude-sonnet-4-6 OTP_TTL_SECONDS=600 OTP_MAX_ATTEMPTS=5 +``` + +--- + +## 4. Post-deploy verification + +```bash +# Health (also proves DB connectivity) +curl -fsS https://iac-aistudio.com/healthz && echo +``` + +Then click through once: + +1. **Register** a throwaway account → confirm the email code is now **6 digits**, + that a **wrong** code is rejected, and that completing the flow logs you in. +2. **Forgot password** for that account → confirm reset works **only after** + entering the code, and that entering an **unknown** email gives the same + "if that account exists…" message (no "no account found"). +3. **Reset bypass is closed**: a direct POST to `/recover_reset` without first + verifying a code must NOT change any password (the test asserts this too). +4. **Model pin**: in App Insights / container logs, confirm generations run on + `LLM_MODEL` regardless of what the client sends. +5. Everyone is logged out once (expected — `SESSION_VERSION` bump). + +--- + +## 5. Rollback (instant) + +```bash +az containerapp update -n ca-iacb-web -g rg-iacb-lean --image "$PREV" +``` + +--- + +## 6. Still open (not in this code drop) + +- **P0-2 — shared rate-limit store.** The limiter is still in-memory (per + replica). This needs an **Azure Cache for Redis** resource provisioned first, + then `storage_uri="redis://…"` on the `Limiter(...)`. Infrastructure, not a + pure code change — do it as a follow-up. +- **CSP `'unsafe-inline'`** on `script-src` (P1-4) — requires externalizing + inline JS to `/static/js/*.js` and moving to nonces. Large, deferred. +- **Central input-validation layer** (P1-5 cont.) — Pydantic/marshmallow on POST + bodies. Deferred. +- **Monthly LLM spend cap / WAF / image scanning** — roadmap items. diff --git a/iac-aistudio-update/README.md b/iac-aistudio-update/README.md new file mode 100644 index 0000000..aa454dc --- /dev/null +++ b/iac-aistudio-update/README.md @@ -0,0 +1,37 @@ +# IaC Build — security hardening update + +Drop-in replacement for `app.py` in the IaC-AIStudio deploy bundle, plus tests +and CI. Hardens authentication and pins the LLM model server-side, with no new +Azure resources and no required config changes. + +**Start here:** [`IMPLEMENTATION_GUIDE.md`](./IMPLEMENTATION_GUIDE.md) — what +changed, how to test, and the exact Cloud Shell deploy + rollback steps. + +## Contents + +| File | What it is | +|------|------------| +| `app.py` | Hardened application (replaces your current `app.py`) | +| `IMPLEMENTATION_GUIDE.md` | Deploy guide + change log + verification checklist | +| `tests/test_security.py` | Security regression tests (OTP, reset auth, model pin) | +| `requirements-dev.txt` | Minimal deps to run the tests in `LOCAL_DEV` mode | +| `.gitlab-ci.yml` | Runs the tests on every push/MR (no secrets) | +| `requirements.txt` | Your runtime deps (unchanged copy, for reference) | + +## Quick test + +```bash +pip install -r requirements-dev.txt +LOCAL_DEV=1 FLASK_SECRET_KEY=test-secret pytest -v +``` + +## Summary of changes + +- **Model pin (P1-5):** `/api/generate` ignores the client `model`; pinned via `LLM_MODEL`. +- **OTP (P0-1):** 6-digit crypto codes, HMAC-stored, 10-min expiry, 5-attempt lockout, single-use. +- **Reset/registration authorization:** both now require a verified OTP (closes a password-reset bypass). +- **Session fixation (P1-3):** session rotates on login and OAuth callback. +- **Email enumeration:** uniform recovery response. + +See the guide for the full table and the items still left open (Redis limiter, +CSP nonces, validation layer). diff --git a/iac-aistudio-update/app.py b/iac-aistudio-update/app.py new file mode 100644 index 0000000..b1be92b --- /dev/null +++ b/iac-aistudio-update/app.py @@ -0,0 +1,974 @@ +""" +IaC Build — Flask app for Azure Container Apps deployment. + +Entra-only PostgreSQL authentication via the Container App's system-assigned +managed identity. Flask secret key is pulled from Azure Key Vault at startup. +PG-backed sessions, in-app rate limiting, security headers. + +Environment variables (all required in cloud mode): + KEY_VAULT_URI https://kv-iacb-lean.vault.azure.net/ + PG_HOST pg-iacb-lean.postgres.database.azure.com + PG_DB appdb + PG_USER ca-iacb-web (the MI's name, case-sensitive) + APPLICATIONINSIGHTS_CONNECTION_STRING (optional, enables AI export) + +Local-dev mode: + Set LOCAL_DEV=1 to use SQLite + an env-var-based secret key. Useful + for running on your laptop without Azure dependencies. +""" +import os +import requests +import json +import tempfile +import subprocess +import re +import html +import random +import string +import time +import hmac +import hashlib +import logging +from datetime import datetime, timedelta +from urllib.parse import quote_plus + +from flask import (Flask, render_template, request, session, redirect, + url_for, Response, make_response) +from flask_sqlalchemy import SQLAlchemy +from flask_session import Session +from flask_compress import Compress +from flask_wtf import CSRFProtect +from flask_limiter import Limiter +from flask_limiter.util import get_remote_address +from werkzeug.security import generate_password_hash, check_password_hash +import secrets +from authlib.integrations.flask_client import OAuth +from werkzeug.middleware.proxy_fix import ProxyFix +from sqlalchemy import event +from sqlalchemy.engine import Engine + +# ----- Environment ---------------------------------------------------------- + +LOCAL_DEV = os.environ.get("LOCAL_DEV", "").lower() in ("1", "true", "yes") +KV_URI = os.environ.get("KEY_VAULT_URI") +PG_HOST = os.environ.get("PG_HOST") +PG_DB = os.environ.get("PG_DB", "appdb") +PG_USER = os.environ.get("PG_USER") + +OSSRDBMS_SCOPE = "https://ossrdbms-aad.database.windows.net/.default" + +logging.basicConfig(level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s") +log = logging.getLogger("iacb") + +# ----- Azure Identity ------------------------------------------------------- + +_cred = None +if not LOCAL_DEV: + from azure.identity import DefaultAzureCredential + from azure.keyvault.secrets import SecretClient + _cred = DefaultAzureCredential(exclude_interactive_browser_credential=True) + +# ----- Application Insights (auto-instrumentation) ------------------------- +# Configures the OTLP exporter to Azure Monitor and auto-instruments Flask, +# requests, urllib, logging, and SQLAlchemy. No code changes elsewhere needed. +# Must run BEFORE Flask app is created so Flask routes get instrumented. +if not LOCAL_DEV and os.environ.get("APPLICATIONINSIGHTS_CONNECTION_STRING"): + try: + from azure.monitor.opentelemetry import configure_azure_monitor + configure_azure_monitor( + logger_name="iacb", + disable_offline_storage=True, # ACA filesystem is ephemeral + ) + log.info("Application Insights instrumentation configured") + except Exception as e: + # Never let telemetry init crash the app on cold-start. + log.warning("App Insights setup failed (continuing without): %s", e) + +# Quiet Azure SDK HTTP logging: at root INFO it dumps every telemetry POST +# (URL, headers, status) to stdout, which then floods ContainerAppConsoleLogs_CL +# and App Insights traces. App's own "iacb" logger stays at INFO. +for _n in ("azure", "azure.core.pipeline.policies.http_logging_policy", + "azure.monitor.opentelemetry.exporter", "azure.identity"): + logging.getLogger(_n).setLevel(logging.WARNING) + +# ----- Flask app ------------------------------------------------------------ + +app = Flask(__name__) +Compress(app) # gzip/br responses (HTML/CSS/JS/JSON) to cut transfer size +csrf = CSRFProtect(app) # protects all HTML form POSTs; JSON proxy + logout exempted below + +# Trust ACA's TLS termination — sets request.is_secure correctly behind the +# Container Apps edge load balancer so secure cookies and url_for(_scheme) +# work as expected. +app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1) + +# ----- Secret key: Key Vault in cloud, env var local ----------------------- + +if LOCAL_DEV: + app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-only-do-not-ship") + log.warning("LOCAL_DEV=1 — using insecure dev secret key") +else: + if not KV_URI: + raise RuntimeError("KEY_VAULT_URI must be set in cloud mode") + _sc = SecretClient(vault_url=KV_URI, credential=_cred) + # Stable signing key from Key Vault. Keeps sessions and CSRF tokens valid + # across redeploys/restarts instead of logging every user out on each deploy. + app.secret_key = _sc.get_secret("flask-secret-key").value + log.info("Loaded stable session secret from Key Vault") + +# ----- LLM API key for the server-side /api/generate proxy ------------------ +# Fetched once at startup (KV in cloud, env var locally). The route returns 503 +# if the key is absent, so the app still boots before the secret is created. +_LLM_KEY = os.environ.get("LLM_API_KEY") +if not LOCAL_DEV: + try: + _LLM_KEY = _sc.get_secret("llm-api-key").value + except Exception as _e: + log.warning("LLM key not in Key Vault yet: %s", _e) + +# SECURITY (P1-5): pin the generation model and output cap server-side. +# Previously /api/generate took "model" straight from the client payload +# (only max_tokens was clamped), so a caller could request an arbitrary, more +# expensive model. These are now fixed here and overridable ONLY via env vars +# on the Container App — never by the request body. +LLM_MODEL = os.environ.get("LLM_MODEL", "claude-sonnet-4-6") +LLM_MAX_TOKENS = int(os.environ.get("LLM_MAX_TOKENS", "8000")) +LLM_DEFAULT_TOKENS = int(os.environ.get("LLM_DEFAULT_TOKENS", "4000")) + +# ----- OAuth social login (Google + GitHub) -------------------------------- +# Client IDs are public; secrets come from Key Vault (same vault as llm-api-key). +# TWO GitHub apps (one per domain) since a classic OAuth App locks to one host. +GOOGLE_CLIENT_ID = os.environ.get("GOOGLE_CLIENT_ID", "810180534624-mm6n7c8a50k2r98pmgrnksn983gcp05m.apps.googleusercontent.com") +GITHUB_CLIENT_ID_IAC = os.environ.get("GITHUB_CLIENT_ID_IAC", "Ov23litRKnOeOcC5YIBS") +GITHUB_CLIENT_ID_ONLINE = os.environ.get("GITHUB_CLIENT_ID_ONLINE", "Ov23ctEWg1GUBxzQtpdU") + +_OAUTH_SECRETS = {} +if not LOCAL_DEV: + for _sn in ("google-oauth-secret", "github-oauth-secret-iac", "github-oauth-secret-online"): + try: + _OAUTH_SECRETS[_sn] = _sc.get_secret(_sn).value + except Exception as _e: + log.warning("OAuth secret %s not loaded: %s", _sn, _e) + +oauth = OAuth(app) +if _OAUTH_SECRETS.get("google-oauth-secret"): + oauth.register( + name="google", + client_id=GOOGLE_CLIENT_ID, + client_secret=_OAUTH_SECRETS["google-oauth-secret"], + server_metadata_url="https://accounts.google.com/.well-known/openid-configuration", + client_kwargs={"scope": "openid email profile"}, + ) +for _gk, _gid, _gname in (("github-oauth-secret-iac", GITHUB_CLIENT_ID_IAC, "github_iac"), + ("github-oauth-secret-online", GITHUB_CLIENT_ID_ONLINE, "github_online")): + if _OAUTH_SECRETS.get(_gk): + oauth.register( + name=_gname, + client_id=_gid, + client_secret=_OAUTH_SECRETS[_gk], + access_token_url="https://github.com/login/oauth/access_token", + authorize_url="https://github.com/login/oauth/authorize", + api_base_url="https://api.github.com/", + client_kwargs={"scope": "read:user user:email"}, + ) + +OAUTH_ENABLED = bool(_OAUTH_SECRETS) + +def _github_client_name(): + host = (request.host or "").lower().split(":")[0] + if host.startswith("www."): + host = host[4:] + return "github_online" if "online-shield.com" in host else "github_iac" + +# ----- Session policy ------------------------------------------------------- + +app.config["PERMANENT_SESSION_LIFETIME"] = timedelta(minutes=30) +app.config["IDLE_SECONDS"] = 1800 +app.config["SESSION_COOKIE_HTTPONLY"] = True +app.config["SESSION_COOKIE_SAMESITE"] = "Lax" +# Secure cookies only in cloud (HTTPS); LOCAL_DEV runs HTTP. +app.config["SESSION_COOKIE_SECURE"] = not LOCAL_DEV + +# Bump SESSION_VERSION to invalidate ALL existing sessions on next deploy. +# Bumped 3 -> 4 with the auth hardening (OTP/session-rotation) so no pre-deploy +# session lingers in a half-migrated state. Users are logged out once. +SESSION_VERSION = 4 + +# ----- Database ------------------------------------------------------------- + +if LOCAL_DEV: + app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///users.db" +else: + # Password placeholder in URL is overwritten per-connection by the + # do_connect event listener below. + app.config["SQLALCHEMY_DATABASE_URI"] = ( + f"postgresql+psycopg://{quote_plus(PG_USER)}@{PG_HOST}/{PG_DB}?sslmode=require" + ) + app.config["SQLALCHEMY_ENGINE_OPTIONS"] = { + "pool_pre_ping": True, + "pool_recycle": 1800, # < ~1h token TTL + } +app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False + +db = SQLAlchemy(app) + +# Per-connection Entra token injection. Fires when SQLAlchemy opens a new +# physical connection (not per-query). Only affects PostgreSQL dialect, so +# local SQLite dev mode is unaffected. +@event.listens_for(Engine, "do_connect") +def _provide_token(dialect, conn_rec, cargs, cparams): + if not LOCAL_DEV and dialect.name == "postgresql": + cparams["password"] = _cred.get_token(OSSRDBMS_SCOPE).token + +# ----- PG-backed sessions (cloud only) ------------------------------------- + +if not LOCAL_DEV: + app.config["SESSION_TYPE"] = "sqlalchemy" + app.config["SESSION_SQLALCHEMY"] = db + Session(app) # auto-creates a 'sessions' table on first request + +# ----- User model ----------------------------------------------------------- + +class User(db.Model): + id = db.Column(db.Integer, primary_key=True) + email = db.Column(db.String(120), unique=True, nullable=False) + password_hash = db.Column(db.String(256), nullable=False) + first_name = db.Column(db.String(50), nullable=False) + last_name = db.Column(db.String(50), nullable=False) + phone = db.Column(db.String(20), nullable=False) + + +class GenLog(db.Model): + __tablename__ = "gen_log" + id = db.Column(db.Integer, primary_key=True) + user_id = db.Column(db.Integer, index=True, nullable=False) + ts = db.Column(db.DateTime, default=datetime.utcnow, index=True, nullable=False) + + +# ----- Access control ------------------------------------------------------- +# Admins (comma-separated emails in ADMIN_EMAILS) may run real AI generation. +# Everyone else is locked to DEMO mode so test users cannot spend API credits. +# Fail-closed: if ADMIN_EMAILS is unset, NO account can run live generation. +ADMIN_EMAILS = {e.strip().lower() for e in os.environ.get("ADMIN_EMAILS", "").split(",") if e.strip()} + +def _is_admin(user): + return bool(user) and bool(user.email) and user.email.lower() in ADMIN_EMAILS + + +with app.app_context(): + # No-op if tables already exist. Creates User table + (in cloud) sessions + # table on first run after deploy. + db.create_all() + +# ----- Rate limiter --------------------------------------------------------- + +limiter = Limiter( + app=app, + key_func=get_remote_address, + default_limits=[], + storage_uri="memory://", # single-replica prototype; OK per docx +) + +# ----- Security & cache headers -------------------------------------------- + +# Content-Security-Policy. +# HONEST CAVEAT: this is a PERMISSIVE CSP, not a strict one. The existing +# HTML has 110+ inline event handlers (onclick=..., onload=..., etc.) and 11 +# inline