From 4880f80beefcfefd441e948c372630c43c6c76f8 Mon Sep 17 00:00:00 2001 From: bharat941 Date: Tue, 16 Jun 2026 11:58:00 +0530 Subject: [PATCH] feat(migration): add oak index (oakIndex) pattern support Adds the oakIndex pattern to the migration skill for handling Oak index BPA findings (index.rule.violation, standard.index.modification) via the Adobe aem-cs-source-migration-index-converter tool. Co-Authored-By: Claude Opus 4.7 --- .../best-practices/references/oak-index.md | 134 ++++++++++++++++++ .../cloud-service/skills/migration/SKILL.md | 5 +- .../migration/scripts/bpa-local-parser.js | 58 +++++++- .../scripts/fixtures/minimal-oakindex-bpa.csv | 4 + .../scripts/unified-collection-reader.js | 43 +++++- 5 files changed, 234 insertions(+), 10 deletions(-) create mode 100644 plugins/aem/cloud-service/skills/best-practices/references/oak-index.md create mode 100644 plugins/aem/cloud-service/skills/migration/scripts/fixtures/minimal-oakindex-bpa.csv diff --git a/plugins/aem/cloud-service/skills/best-practices/references/oak-index.md b/plugins/aem/cloud-service/skills/best-practices/references/oak-index.md new file mode 100644 index 00000000..b2875a05 --- /dev/null +++ b/plugins/aem/cloud-service/skills/best-practices/references/oak-index.md @@ -0,0 +1,134 @@ +# Oak Index Migration Pattern + +> **Beta Skill**: This skill is in beta and under active development. +> Results should be reviewed carefully before use in production. +> Report issues at https://github.com/adobe/skills/issues + +Rewrites legacy `_oak_index/*.xml` definitions to AEM as a Cloud Service compatible Oak index definitions by invoking Adobe's official **`@adobe/aem-cs-source-migration-index-converter`** CLI tool. Covers BPA subtypes `index.rule.violation` and `standard.index.modification` (category **OID**). + +**Before transformation steps:** [aem-cloud-service-pattern-prerequisites.md](aem-cloud-service-pattern-prerequisites.md). + +**Scope:** +- Custom Oak index definitions under `ui.apps/.../_oak_index/` +- OOTB index modifications (e.g. `damAssetLucene` customized in place) +- Lucene type indexes; property/ordered indexes are passed through unchanged by the tool + +**Out of scope (skill stops, agent reports to user):** +- Indexes outside `_oak_index/` (e.g. JSON definitions deployed at runtime) +- `nt:base` lucene indexes (the tool refuses to convert these) + +## How the skill runs + +The skill does **not** re-implement transformation rules. It invokes the Adobe-maintained tool, captures the output, shows the diff, and validates. + +### Step 1 — Detect + +- Locate `_oak_index/` directories under `ui.apps/src/main/content/jcr_root/`. If none exist, stop and report to user. +- Determine `aemVersion` for the config: the tool maps this value directly to a bundled baseline file (`.content_.xml`). Valid values are `63`, `64`, `65`, and `Cloud_Services`. Use `Cloud_Services` for AEM as a Cloud Service / SDK projects (i.e. when `pom.xml` contains `aem.sdk.api` or `aem.sdk.api.version`). Use `65` for AEM 6.5, `64` for AEM 6.4, `63` for AEM 6.3. + +### Step 2 — Invoke Index Converter + +The package has no `bin` entry — it must be run via its executor script. Install to a temp directory and invoke with a `config.yaml`: + +```bash +# 1. Install to a temp working directory (no project pollution) +WORK_DIR="/tmp/oak-index-tool-" +mkdir -p "$WORK_DIR" +cd "$WORK_DIR" +npm install @adobe/aem-cs-source-migration-index-converter + +# 2. Write config.yaml (must be in cwd when running the executor) +cat > config.yaml << 'YAML' +indexConverter: + ensureIndexDefinitionContentPackageJcrRootPath: + ensureIndexDefinitionConfigPackageJcrRootPath: + aemVersion: Cloud_Services + customOakIndexDirectoryPath: /ui.apps/src/main/content/jcr_root/_oak_index + filterXMLPath: /ui.apps/src/main/content/META-INF/vault/filter.xml +YAML + +# 3. Run the executor +node node_modules/@adobe/aem-cs-source-migration-index-converter/executors/index-converter.js +``` + +The tool writes output to `./target/index/` under the working directory: +- `./target/index/.content.xml` — the converted oak index XML +- `./target/index/filter.xml` — updated filter.xml with renamed index paths +- `./target/index/index-converter-report.md` — conversion report + +It does **not** modify the input. + +### Step 3 — Show diff in IDE + +Diff the input vs. tool output. Show the diff to the user; do not auto-apply: + +```bash +diff /ui.apps/src/main/content/jcr_root/_oak_index/.content.xml \ + $WORK_DIR/target/index/.content.xml +diff /ui.apps/src/main/content/META-INF/vault/filter.xml \ + $WORK_DIR/target/index/filter.xml +``` + +Also show `$WORK_DIR/target/index/index-converter-report.md` — it lists which indexes were converted and which need manual migration. + +### Step 4 — Apply (after user confirms) + +If the user accepts: +```bash +cp $WORK_DIR/target/index/.content.xml \ + /ui.apps/src/main/content/jcr_root/_oak_index/.content.xml +cp $WORK_DIR/target/index/filter.xml \ + /ui.apps/src/main/content/META-INF/vault/filter.xml +``` +- Stage for commit + +### Step 5 — Validate + +Run validation in this order, gate on each: + +```bash +# Compile (catches XML / filter.xml errors) +mvn -pl ui.apps clean install + +# Cloud-readiness analyser (if pom has aemanalyser-maven-plugin) +mvn -pl all aem-analyser:project-analyse +``` + +Report PASS or FAIL with file:line evidence on FAIL. + +### Step 6 — Telemetry (when enabled) + +Emit events through the migration skill's helper: +- `skill.invoked` (pattern=oakIndex) +- `tool.run` (tool=index-converter, durationMs, exitCode) +- `pattern.batch.processed` (count of indexes transformed) +- `validation.run` (passed=true|false) + +## Naming conventions produced by the tool + +The Index Converter applies these naming rules (these are the tool's behavior, documented here for reference; the skill does **not** re-implement them): + +- **OOTB extension:** `--custom-1` (e.g. `damAssetLucene-8-custom-1`) +- **New custom index:** `-custom-1` (e.g. `wkndId-custom-1`) +- **Already conforming:** passed through unchanged + +## What the skill does NOT do + +- Does not rewrite XML by hand using rules encoded in this file +- Does not decide whether to use Lucene vs Elasticsearch +- Does not modify queries that depend on the renamed indexes (separate task) +- Does not deploy to a running AEM instance + +## Verification on `aem-guides-wknd-legacy` + +Reference test project: `aem-guides-wknd-legacy` contains 3 real OID violations: +- `damAssetLucene` modified in place (`standard.index.modification`) +- `wkndId` custom index without `-custom-` suffix (`index.rule.violation`) +- `wkndTerminationDate` custom index without `-custom-` suffix (`index.rule.violation`) + +Expected after running this skill: +- `damAssetLucene` → `--custom-1` — the tool determines the exact name from the bundled Cloud Services baseline XML. With `aemVersion: Cloud_Services` and current tool version (0.2.3) this produces `damAssetStateIndex-3-custom-1`. The [reference branch `code/oid`](https://github.com/adobe/aem-guides-wknd-legacy/tree/code/oid) (created 2021) shows `damAssetLucene-6-custom-1` because that was the OOTB name at that time — both are correct for their respective baseline versions. The content is the full merged OOTB definition plus the customer's delta properties. +- `wkndId` — **not converted automatically** (property type, not lucene); must be migrated manually per tool report +- `wkndTerminationDate` — **not converted automatically** (ordered type, not lucene); must be migrated manually per tool report +- `mvn -pl ui.apps clean install` passes +- `aemanalyser-maven-plugin` reports no OID-class errors diff --git a/plugins/aem/cloud-service/skills/migration/SKILL.md b/plugins/aem/cloud-service/skills/migration/SKILL.md index f066d559..a26a126b 100644 --- a/plugins/aem/cloud-service/skills/migration/SKILL.md +++ b/plugins/aem/cloud-service/skills/migration/SKILL.md @@ -23,6 +23,7 @@ This skill is **orchestration**: BPA data, CAM/MCP, **one pattern per session**, | **Just a few files** | *"Migrate **scheduler** in `core/.../MyJob.java`"* | Manual flow: no BPA required | | **OSGi → Cloud Manager** | *"**Scan my config files and create Cloud Manager environment secrets or variables.**"* | Agent **auto-reads** [references/osgi-cfg-json-cloud-manager.md](references/osgi-cfg-json-cloud-manager.md) (full Adobe-aligned rules inlined there); no BPA pattern id | | **HTL lint warnings** | *"Fix **htlLint** issues in `ui.apps`"* | Proactive discovery via `rg` → fix per reference module | +| **Oak index findings (OID)** [BETA] | *"Fix **oakIndex** findings using `./path/to/bpa.csv`"* — covers `index.rule.violation` and `standard.index.modification` | Invokes Adobe `@adobe/aem-cs-source-migration-index-converter` per [{best-practices}/references/oak-index.md]({best-practices}/references/oak-index.md); shows diff in IDE; validates with `mvn` and `aemanalyser` | **Starter prompts (copy-paste):** @@ -204,7 +205,7 @@ If the user asks to fix everything or BPA mixes patterns, **ask which pattern fi If the request is **OSGi configs → Cloud Manager** (see **Required delegation**, branch A), do **not** map to a BPA pattern — follow [references/osgi-cfg-json-cloud-manager.md](references/osgi-cfg-json-cloud-manager.md) instead. -Otherwise map the request to a pattern id: `scheduler`, `resourceChangeListener`, `replication`, `eventListener`, `eventHandler`, `assetApi`, `htlLint`. If unclear, use **Manual Pattern Hints** in **`{best-practices}/SKILL.md`** or ask the user to pick one of those. +Otherwise map the request to a pattern id: `scheduler`, `resourceChangeListener`, `replication`, `eventListener`, `eventHandler`, `assetApi`, `htlLint`, `oakIndex`. If unclear, use **Manual Pattern Hints** in **`{best-practices}/SKILL.md`** or ask the user to pick one of those. ### Step 2: Availability @@ -212,7 +213,7 @@ If the id is missing from the best-practices table, say the pattern is not suppo ### Step 3: Targets -**For BPA patterns** (`scheduler`, `resourceChangeListener`, `replication`, `eventListener`, `eventHandler`, `assetApi`): Run **`getBpaFindings`** (with `bpaFilePath` when provided). Internally: cache → CSV → MCP → manual **only when each step is applicable and succeeds**; if MCP fails, obey **MCP errors and fallback** (stop; no silent chain). For MCP details, [references/cam-mcp.md](references/cam-mcp.md). +**For BPA patterns** (`scheduler`, `resourceChangeListener`, `replication`, `eventListener`, `eventHandler`, `assetApi`, `oakIndex`): Run **`getBpaFindings`** (with `bpaFilePath` when provided). Internally: cache → CSV → MCP → manual **only when each step is applicable and succeeds**; if MCP fails, obey **MCP errors and fallback** (stop; no silent chain). For MCP details, [references/cam-mcp.md](references/cam-mcp.md). `getBpaFindings` returns **a batch of 5 findings** (default `limit=5`) along with a `paging` envelope. The agent processes that batch only; it does **not** request the next batch until diff --git a/plugins/aem/cloud-service/skills/migration/scripts/bpa-local-parser.js b/plugins/aem/cloud-service/skills/migration/scripts/bpa-local-parser.js index 04f47ad7..3cebf848 100644 --- a/plugins/aem/cloud-service/skills/migration/scripts/bpa-local-parser.js +++ b/plugins/aem/cloud-service/skills/migration/scripts/bpa-local-parser.js @@ -20,14 +20,17 @@ const path = require('path'); const PATTERN_TO_SUBTYPE = { scheduler: "sling.commons.scheduler", assetApi: "unsupported.asset.api", + oakIndex: "oak.index.definition", }; // CSV subtype to pattern mapping (based on actual CSV structure) const CSV_SUBTYPE_TO_PATTERN = { "unsupported.asset.api": "assetApi", - "javax.jcr.observation.EventListener": "eventListener", + "javax.jcr.observation.EventListener": "eventListener", "org.apache.sling.api.resource.observation.ResourceChangeListener": "resourceChangeListener", - "org.osgi.service.event.EventHandler": "eventHandler" + "org.osgi.service.event.EventHandler": "eventHandler", + "index.rule.violation": "oakIndex", + "standard.index.modification": "oakIndex" }; // Known scheduler identifier @@ -351,6 +354,38 @@ function processResourceChangeListenerFindings(findings) { }; } +/** + * Process oak index findings from CSV. + * Oak index findings differ from Java-class patterns: the `identifier` is the + * oak index path (e.g. `/oak:index/wkndId`), not a fully-qualified class name. + * Both BPA subtypes for category OID are emitted under the `oakIndex` pattern. + */ +function processOakIndexFindings(findings) { + const oakIndexFindings = findings.filter(finding => + finding.subtype === 'index.rule.violation' || + finding.subtype === 'standard.index.modification' + ); + + const identifiers = {}; + + oakIndexFindings.forEach(finding => { + const subtype = finding.subtype; + const indexPath = (finding.identifier || '').trim(); + if (!indexPath) return; + if (!identifiers[subtype]) { + identifiers[subtype] = []; + } + if (!identifiers[subtype].includes(indexPath)) { + identifiers[subtype].push(indexPath); + } + }); + + return { + subtype: 'oak.index.definition', + identifiers: identifiers + }; +} + /** * Process event handler findings from CSV */ @@ -478,15 +513,30 @@ function createUnifiedCollection(bpaData, outputDir) { if (Object.keys(eventHandlerCollection.identifiers).length > 0) { const mongoSafeSubtype = toMongoSafeFieldName(eventHandlerCollection.subtype); subtypes[mongoSafeSubtype] = {}; - + Object.entries(eventHandlerCollection.identifiers).forEach(([identifier, classNames]) => { const mongoSafeIdentifier = toMongoSafeIdentifier(identifier); subtypes[mongoSafeSubtype][mongoSafeIdentifier] = classNames; totalFindings += classNames.length; }); - + console.log(`Found ${Object.values(eventHandlerCollection.identifiers).flat().length} event handler classes`); } + + // Process oak index findings (category OID) + const oakIndexCollection = processOakIndexFindings(findings); + if (Object.keys(oakIndexCollection.identifiers).length > 0) { + const mongoSafeSubtype = toMongoSafeFieldName(oakIndexCollection.subtype); + subtypes[mongoSafeSubtype] = {}; + + Object.entries(oakIndexCollection.identifiers).forEach(([identifier, indexPaths]) => { + const mongoSafeIdentifier = toMongoSafeIdentifier(identifier); + subtypes[mongoSafeSubtype][mongoSafeIdentifier] = indexPaths; + totalFindings += indexPaths.length; + }); + + console.log(`Found ${Object.values(oakIndexCollection.identifiers).flat().length} oak index paths`); + } // Create unified collection structure with metadata const subtypeKeys = Object.keys(subtypes); diff --git a/plugins/aem/cloud-service/skills/migration/scripts/fixtures/minimal-oakindex-bpa.csv b/plugins/aem/cloud-service/skills/migration/scripts/fixtures/minimal-oakindex-bpa.csv new file mode 100644 index 00000000..2cc57c89 --- /dev/null +++ b/plugins/aem/cloud-service/skills/migration/scripts/fixtures/minimal-oakindex-bpa.csv @@ -0,0 +1,4 @@ +code,type,subtype,importance,identifier,message,context +OID-001,issue,index.rule.violation,critical,/oak:index/wkndId,Custom index missing -custom- suffix, +OID-002,issue,index.rule.violation,critical,/oak:index/wkndTerminationDate,Custom index missing -custom- suffix, +OID-003,issue,standard.index.modification,critical,/oak:index/damAssetLucene,OOTB index modified in place, diff --git a/plugins/aem/cloud-service/skills/migration/scripts/unified-collection-reader.js b/plugins/aem/cloud-service/skills/migration/scripts/unified-collection-reader.js index 75efe978..0fa279f6 100644 --- a/plugins/aem/cloud-service/skills/migration/scripts/unified-collection-reader.js +++ b/plugins/aem/cloud-service/skills/migration/scripts/unified-collection-reader.js @@ -27,7 +27,8 @@ const MONGO_SAFE_TO_PATTERN = { "unsupported_asset_api": "assetApi", "javax_jcr_observation_EventListener": "eventListener", "org_apache_sling_api_resource_observation_ResourceChangeListener": "resourceChangeListener", - "org_osgi_service_event_EventHandler": "eventHandler" + "org_osgi_service_event_EventHandler": "eventHandler", + "oak_index_definition": "oakIndex" }; // Known scheduler identifier @@ -273,14 +274,14 @@ function processResourceChangeListenerFromUnified(subtypeData, targets) { */ function processEventHandlerFromUnified(subtypeData, targets) { let count = 0; - + // Sort identifiers alphabetically so the iteration order is deterministic // across runs, independent of how the unified-collection JSON was written. const identifierKeys = Object.keys(subtypeData || {}).sort(); for (const mongoSafeIdentifier of identifierKeys) { const classNames = subtypeData[mongoSafeIdentifier] || []; const identifier = fromMongoSafeFieldName(mongoSafeIdentifier); - + for (const className of classNames) { count++; targets.push(new BpaTarget( @@ -292,7 +293,38 @@ function processEventHandlerFromUnified(subtypeData, targets) { )); } } - + + return count; +} + +/** + * Process oak index data from unified collection. + * + * Oak index findings differ from Java-class patterns: each "className" slot + * carries an oak index JCR path (e.g. `/oak:index/wkndId`), not a fully + * qualified class name. The BPA subtype is preserved in the identifier + * field so the agent can pick the right fix (custom vs OOTB modification). + */ +function processOakIndexFromUnified(subtypeData, targets) { + let count = 0; + + const subtypeKeys = Object.keys(subtypeData || {}).sort(); + for (const mongoSafeSubtype of subtypeKeys) { + const indexPaths = subtypeData[mongoSafeSubtype] || []; + const subtype = fromMongoSafeFieldName(mongoSafeSubtype); + + for (const indexPath of indexPaths) { + count++; + targets.push(new BpaTarget( + "oakIndex", + indexPath, + subtype, + `Oak index requires Cloud-compatible rewrite (${subtype}): ${indexPath}`, + "critical" + )); + } + } + return count; } @@ -405,6 +437,9 @@ function fetchUnifiedBpaFindings(pattern = "all", collectionsDir = './unified-co } else if (pat === "eventHandler") { count = processEventHandlerFromUnified(subtypeData, result.targets); result.summary.eventHandlerCount = count; + } else if (pat === "oakIndex") { + count = processOakIndexFromUnified(subtypeData, result.targets); + result.summary.oakIndexCount = count; } console.log(`[Unified Collection Reader] Processed ${count} findings for pattern: ${pat}`);