diff --git a/README.md b/README.md index 1168c51..09f05e1 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,7 @@ The validator returns `fail` for issues such as: Runnable examples live under `examples/`: - `examples/pass/saas-churn` +- `examples/pass/finance-revenue` - `examples/fail/schema-drift` - `examples/fail/stale-data` - `examples/fail/broken-metric` diff --git a/examples/pass/finance-revenue/contract.yaml b/examples/pass/finance-revenue/contract.yaml new file mode 100644 index 0000000..432f644 --- /dev/null +++ b/examples/pass/finance-revenue/contract.yaml @@ -0,0 +1,63 @@ +version: "0.1" +dataset: transactions +schema: + - name: transaction_id + type: string + nullable: false + classification: internal + - name: customer_id + type: string + nullable: false + classification: internal + - name: invoice_id + type: string + nullable: false + classification: internal + - name: amount + type: number + nullable: false + classification: confidential + - name: currency + type: string + nullable: false + classification: internal + - name: product_category + type: string + nullable: true + classification: public + - name: region + type: string + nullable: true + classification: public + - name: transaction_date + type: date + nullable: false + classification: internal + - name: updated_at + type: timestamp + nullable: false + classification: internal +quality_checks: + - name: transaction_id_not_null + type: not_null + column: transaction_id + - name: transaction_id_unique + type: unique + column: transaction_id + - name: customer_id_not_null + type: not_null + column: customer_id + - name: invoice_id_not_null + type: not_null + column: invoice_id + - name: currency_values + type: accepted_values + column: currency + values: ["USD", "EUR", "GBP"] + - name: non_negative_amount + type: min + column: amount + value: 0 + - name: enough_rows + type: row_count_min + value: 5 diff --git a/examples/pass/finance-revenue/data/transactions.csv b/examples/pass/finance-revenue/data/transactions.csv new file mode 100644 index 0000000..7246261 --- /dev/null +++ b/examples/pass/finance-revenue/data/transactions.csv @@ -0,0 +1,9 @@ +transaction_id,customer_id,invoice_id,amount,currency,product_category,region,transaction_date,updated_at +txn_001,cust_a,inv_101,1500.00,USD,software,NA,2026-06-01,2026-06-16T08:00:00Z +txn_002,cust_b,inv_102,2750.00,USD,consulting,EMEA,2026-06-02,2026-06-16T08:30:00Z +txn_003,cust_a,inv_103,950.00,EUR,software,NA,2026-06-05,2026-06-16T09:00:00Z +txn_004,cust_c,inv_104,5000.00,USD,subscription,APAC,2026-06-07,2026-06-16T09:30:00Z +txn_005,cust_d,inv_105,3200.00,GBP,consulting,EMEA,2026-06-10,2026-06-16T10:00:00Z +txn_006,cust_b,inv_106,1800.00,EUR,software,EMEA,2026-06-12,2026-06-16T10:30:00Z +txn_007,cust_e,inv_107,4200.00,USD,subscription,NA,2026-06-14,2026-06-16T11:00:00Z +txn_008,cust_f,inv_108,1100.00,USD,software,LATAM,2026-06-15,2026-06-16T11:30:00Z diff --git a/examples/pass/finance-revenue/dataproduct.yaml b/examples/pass/finance-revenue/dataproduct.yaml new file mode 100644 index 0000000..d945927 --- /dev/null +++ b/examples/pass/finance-revenue/dataproduct.yaml @@ -0,0 +1,18 @@ +id: finance_revenue +name: Finance Revenue Data Product +domain: finance +version: "1.0.0" +description: Trusted net revenue metric for financial reporting and forecasting. +owner: + name: Finance Analytics + email: finance-analytics@example.com + team: Finance +datasets: + - id: transactions + path: data/transactions.csv + format: csv + table: transactions + freshness: + column: updated_at + max_age_hours: 48 + reference_time: "2026-06-17T00:00:00Z" diff --git a/examples/pass/finance-revenue/policy.yaml b/examples/pass/finance-revenue/policy.yaml new file mode 100644 index 0000000..abf7ee4 --- /dev/null +++ b/examples/pass/finance-revenue/policy.yaml @@ -0,0 +1,14 @@ +allowed_purposes: + - financial_reporting + - revenue_forecasting + - agent_context +access_notes: Use aggregated net revenue metrics only; do not expose transaction-level rows to agents. +sensitive_fields: + - customer_id + - invoice_id + - amount +agent_constraints: + - Agents may use the approved net_revenue metric only. + - Agents must include freshness and quality status with answers. +bi_constraints: + - BI dashboards must use the semantic net_revenue definition. diff --git a/examples/pass/finance-revenue/semantic.yaml b/examples/pass/finance-revenue/semantic.yaml new file mode 100644 index 0000000..d41b7c2 --- /dev/null +++ b/examples/pass/finance-revenue/semantic.yaml @@ -0,0 +1,21 @@ +metrics: + - name: net_revenue + label: Net Revenue + description: Sum of all transaction amounts in the reporting period. + dataset: transactions + expression: "sum(amount)" + grain: day + dimensions: [product_category, region] +dimensions: + - name: product_category + dataset: transactions + column: product_category + type: string + - name: region + dataset: transactions + column: region + type: string +entities: + - name: customer + dataset: transactions + key: customer_id diff --git a/pyproject.toml b/pyproject.toml index 588493f..afa94af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ build-backend = "setuptools.build_meta" where = ["src"] [tool.setuptools.package-data] -dataproduct_kit = ["templates/saas_churn/**/*"] +dataproduct_kit = ["templates/saas_churn/**/*", "templates/finance_revenue/**/*"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/src/dataproduct_kit/templates.py b/src/dataproduct_kit/templates.py index d3a4bc8..390c9ad 100644 --- a/src/dataproduct_kit/templates.py +++ b/src/dataproduct_kit/templates.py @@ -5,10 +5,17 @@ from pathlib import Path +TEMPLATE_MAP = { + "saas-churn": "saas_churn", + "finance-revenue": "finance_revenue", +} + + def scaffold_template(destination: Path, template: str) -> None: - if template != "saas-churn": + dir_name = TEMPLATE_MAP.get(template) + if dir_name is None: raise ValueError(f"unknown template '{template}'") - source = resources.files("dataproduct_kit") / "templates" / "saas_churn" + source = resources.files("dataproduct_kit") / "templates" / dir_name destination.mkdir(parents=True, exist_ok=True) for item in source.rglob("*"): relative = item.relative_to(source) diff --git a/src/dataproduct_kit/templates/finance_revenue/contract.yaml b/src/dataproduct_kit/templates/finance_revenue/contract.yaml new file mode 100644 index 0000000..432f644 --- /dev/null +++ b/src/dataproduct_kit/templates/finance_revenue/contract.yaml @@ -0,0 +1,63 @@ +version: "0.1" +dataset: transactions +schema: + - name: transaction_id + type: string + nullable: false + classification: internal + - name: customer_id + type: string + nullable: false + classification: internal + - name: invoice_id + type: string + nullable: false + classification: internal + - name: amount + type: number + nullable: false + classification: confidential + - name: currency + type: string + nullable: false + classification: internal + - name: product_category + type: string + nullable: true + classification: public + - name: region + type: string + nullable: true + classification: public + - name: transaction_date + type: date + nullable: false + classification: internal + - name: updated_at + type: timestamp + nullable: false + classification: internal +quality_checks: + - name: transaction_id_not_null + type: not_null + column: transaction_id + - name: transaction_id_unique + type: unique + column: transaction_id + - name: customer_id_not_null + type: not_null + column: customer_id + - name: invoice_id_not_null + type: not_null + column: invoice_id + - name: currency_values + type: accepted_values + column: currency + values: ["USD", "EUR", "GBP"] + - name: non_negative_amount + type: min + column: amount + value: 0 + - name: enough_rows + type: row_count_min + value: 5 diff --git a/src/dataproduct_kit/templates/finance_revenue/data/transactions.csv b/src/dataproduct_kit/templates/finance_revenue/data/transactions.csv new file mode 100644 index 0000000..7246261 --- /dev/null +++ b/src/dataproduct_kit/templates/finance_revenue/data/transactions.csv @@ -0,0 +1,9 @@ +transaction_id,customer_id,invoice_id,amount,currency,product_category,region,transaction_date,updated_at +txn_001,cust_a,inv_101,1500.00,USD,software,NA,2026-06-01,2026-06-16T08:00:00Z +txn_002,cust_b,inv_102,2750.00,USD,consulting,EMEA,2026-06-02,2026-06-16T08:30:00Z +txn_003,cust_a,inv_103,950.00,EUR,software,NA,2026-06-05,2026-06-16T09:00:00Z +txn_004,cust_c,inv_104,5000.00,USD,subscription,APAC,2026-06-07,2026-06-16T09:30:00Z +txn_005,cust_d,inv_105,3200.00,GBP,consulting,EMEA,2026-06-10,2026-06-16T10:00:00Z +txn_006,cust_b,inv_106,1800.00,EUR,software,EMEA,2026-06-12,2026-06-16T10:30:00Z +txn_007,cust_e,inv_107,4200.00,USD,subscription,NA,2026-06-14,2026-06-16T11:00:00Z +txn_008,cust_f,inv_108,1100.00,USD,software,LATAM,2026-06-15,2026-06-16T11:30:00Z diff --git a/src/dataproduct_kit/templates/finance_revenue/dataproduct.yaml b/src/dataproduct_kit/templates/finance_revenue/dataproduct.yaml new file mode 100644 index 0000000..d945927 --- /dev/null +++ b/src/dataproduct_kit/templates/finance_revenue/dataproduct.yaml @@ -0,0 +1,18 @@ +id: finance_revenue +name: Finance Revenue Data Product +domain: finance +version: "1.0.0" +description: Trusted net revenue metric for financial reporting and forecasting. +owner: + name: Finance Analytics + email: finance-analytics@example.com + team: Finance +datasets: + - id: transactions + path: data/transactions.csv + format: csv + table: transactions + freshness: + column: updated_at + max_age_hours: 48 + reference_time: "2026-06-17T00:00:00Z" diff --git a/src/dataproduct_kit/templates/finance_revenue/policy.yaml b/src/dataproduct_kit/templates/finance_revenue/policy.yaml new file mode 100644 index 0000000..abf7ee4 --- /dev/null +++ b/src/dataproduct_kit/templates/finance_revenue/policy.yaml @@ -0,0 +1,14 @@ +allowed_purposes: + - financial_reporting + - revenue_forecasting + - agent_context +access_notes: Use aggregated net revenue metrics only; do not expose transaction-level rows to agents. +sensitive_fields: + - customer_id + - invoice_id + - amount +agent_constraints: + - Agents may use the approved net_revenue metric only. + - Agents must include freshness and quality status with answers. +bi_constraints: + - BI dashboards must use the semantic net_revenue definition. diff --git a/src/dataproduct_kit/templates/finance_revenue/semantic.yaml b/src/dataproduct_kit/templates/finance_revenue/semantic.yaml new file mode 100644 index 0000000..d41b7c2 --- /dev/null +++ b/src/dataproduct_kit/templates/finance_revenue/semantic.yaml @@ -0,0 +1,21 @@ +metrics: + - name: net_revenue + label: Net Revenue + description: Sum of all transaction amounts in the reporting period. + dataset: transactions + expression: "sum(amount)" + grain: day + dimensions: [product_category, region] +dimensions: + - name: product_category + dataset: transactions + column: product_category + type: string + - name: region + dataset: transactions + column: region + type: string +entities: + - name: customer + dataset: transactions + key: customer_id diff --git a/tests/test_cli.py b/tests/test_cli.py index 5a5b615..802bc91 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -109,6 +109,29 @@ def test_cli_validate_fail_on_warn_returns_nonzero_for_warning_project(tmp_path: assert "freshness.missing" in strict_result.output +def test_cli_init_finance_revenue_template(tmp_path: Path) -> None: + from dataproduct_kit.cli import app + + runner = CliRunner() + project_dir = tmp_path / "finance-demo" + + init_result = runner.invoke(app, ["init", str(project_dir), "--template", "finance-revenue"]) + assert init_result.exit_code == 0, init_result.output + assert (project_dir / "dataproduct.yaml").exists() + assert (project_dir / "data/transactions.csv").exists() + + validate_result = runner.invoke(app, ["validate", str(project_dir)]) + assert validate_result.exit_code == 0, validate_result.output + assert "status: pass" in validate_result.output + + context_result = runner.invoke( + app, + ["context", str(project_dir), "--metric", "net_revenue", "--format", "json"], + ) + assert context_result.exit_code == 0, context_result.output + assert json.loads(context_result.output)["metric"]["name"] == "net_revenue" + + def test_cli_schema_prints_single_schema_and_writes_all(tmp_path: Path) -> None: from dataproduct_kit.cli import app diff --git a/tests/test_examples.py b/tests/test_examples.py index 6a47dda..7b496eb 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -7,13 +7,17 @@ ROOT = Path(__file__).resolve().parents[1] -def test_passing_example_validates() -> None: +PASSING_EXAMPLES = ["saas-churn", "finance-revenue"] + + +def test_passing_examples_validate() -> None: from dataproduct_kit.cli import app - result = CliRunner().invoke(app, ["validate", str(ROOT / "examples/pass/saas-churn")]) + for example in PASSING_EXAMPLES: + result = CliRunner().invoke(app, ["validate", str(ROOT / f"examples/pass/{example}")]) - assert result.exit_code == 0, result.output - assert "status: pass" in result.output + assert result.exit_code == 0, f"{example}: {result.output}" + assert "status: pass" in result.output, f"{example}: {result.output}" def test_failing_examples_fail_for_expected_primary_reason() -> None: