From 17adea6d30a2831234e91bf0c67d93d5b3028961 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 01:53:51 +0000 Subject: [PATCH] Refactor: extract OCR/QR result post-processing into TextRecognition The OCR line-join + empty/line-count logic was duplicated across CaptureManager.runOCR and EditorView.performOCR, and the QR payload->openable-URL decision lived inline in performQRScan. Lift the pure post-processing out of the Vision closures into TextRecognition.ocrOutcome(from:) / barcodeOutcome(from:), which classify already-extracted strings. The Vision request setup and the toast/alert presentation stay at the call sites. Behavior-preserving: same join separator, same line counting (joined components), clipboard text is still all payloads joined, the alert still shows the first raw payload and opens the first URL. Adds TextRecognitionTests (10 cases). --- Sources/Stag/Capture/TextRecognition.swift | 43 +++++++++++++ Sources/Stag/CaptureManager.swift | 13 ++-- Sources/Stag/Views/Editor/EditorView.swift | 44 ++++++------- Tests/StagTests/TextRecognitionTests.swift | 74 ++++++++++++++++++++++ 4 files changed, 145 insertions(+), 29 deletions(-) create mode 100644 Sources/Stag/Capture/TextRecognition.swift create mode 100644 Tests/StagTests/TextRecognitionTests.swift diff --git a/Sources/Stag/Capture/TextRecognition.swift b/Sources/Stag/Capture/TextRecognition.swift new file mode 100644 index 0000000..25f5094 --- /dev/null +++ b/Sources/Stag/Capture/TextRecognition.swift @@ -0,0 +1,43 @@ +import Foundation + +/// What an OCR pass produced, once the per-observation strings have been pulled +/// out of Vision. +enum OCROutcome: Equatable { + case noText + case copied(text: String, lineCount: Int) +} + +/// What a barcode/QR scan produced, once payload strings have been pulled out of +/// Vision. `text` is always the clipboard text (all payloads joined); `url` is +/// set only when the first payload parses as an openable URL. +enum BarcodeOutcome: Equatable { + case none + case found(text: String, count: Int, url: URL?, firstPayload: String) +} + +/// Pure post-processing for Vision text/barcode results, lifted out of the OCR +/// closures in CaptureManager and EditorView so the classification rules are +/// testable without Vision. +enum TextRecognition { + + /// Joins recognized OCR lines and classifies the outcome. `lines` are the + /// per-observation strings already pulled from Vision. + static func ocrOutcome(from lines: [String]) -> OCROutcome { + let joined = lines.joined(separator: "\n") + guard !joined.isEmpty else { return .noText } + return .copied(text: joined, lineCount: joined.components(separatedBy: "\n").count) + } + + /// Classifies decoded barcode payloads. When the first payload parses as a + /// URL with a scheme, the URL is surfaced so the caller can offer to open it; + /// the clipboard text is always every payload joined by newlines. + static func barcodeOutcome(from payloads: [String]) -> BarcodeOutcome { + guard let first = payloads.first else { return .none } + let combined = payloads.joined(separator: "\n") + let url: URL? = { + guard let candidate = URL(string: first), candidate.scheme != nil else { return nil } + return candidate + }() + return .found(text: combined, count: payloads.count, url: url, firstPayload: first) + } +} diff --git a/Sources/Stag/CaptureManager.swift b/Sources/Stag/CaptureManager.swift index 6092014..7728906 100644 --- a/Sources/Stag/CaptureManager.swift +++ b/Sources/Stag/CaptureManager.swift @@ -458,18 +458,17 @@ final class CaptureManager { let request = VNRecognizeTextRequest { request, _ in defer { continuation.resume() } let observations = request.results as? [VNRecognizedTextObservation] ?? [] - let text = observations - .compactMap { $0.topCandidates(1).first?.string } - .joined(separator: "\n") + let lines = observations.compactMap { $0.topCandidates(1).first?.string } + let outcome = TextRecognition.ocrOutcome(from: lines) DispatchQueue.main.async { - if text.isEmpty { + switch outcome { + case .noText: ToastWindow.show("No text found", icon: "text.slash", iconColor: .secondary) - } else { + case .copied(let text, let lineCount): Clipboard.copy(text: text) - let lines = text.components(separatedBy: "\n").count - ToastWindow.show("Copied \(lines) line\(lines == 1 ? "" : "s")", + ToastWindow.show("Copied \(lineCount) line\(lineCount == 1 ? "" : "s")", icon: "doc.on.clipboard.fill", iconColor: .green) } diff --git a/Sources/Stag/Views/Editor/EditorView.swift b/Sources/Stag/Views/Editor/EditorView.swift index 6c43c4d..9b4c5d7 100644 --- a/Sources/Stag/Views/Editor/EditorView.swift +++ b/Sources/Stag/Views/Editor/EditorView.swift @@ -2117,16 +2117,16 @@ struct EditorView: View { self.ocrAlertMessage = "OCR failed: \(error.localizedDescription)" return } - let texts = (request.results as? [VNRecognizedTextObservation])?.compactMap { obs in + let lines = (request.results as? [VNRecognizedTextObservation])?.compactMap { obs in obs.topCandidates(1).first?.string } ?? [] - let result = texts.joined(separator: "\n") - guard !result.isEmpty else { + switch TextRecognition.ocrOutcome(from: lines) { + case .noText: self.ocrAlertMessage = "No text found in image." - return + case .copied(let text, let lineCount): + Clipboard.copy(text: text) + self.ocrAlertMessage = "Copied \(lineCount) line(s) to clipboard." } - Clipboard.copy(text: result) - self.ocrAlertMessage = "Copied \(texts.count) line(s) to clipboard." } } request.recognitionLevel = .accurate @@ -2147,24 +2147,24 @@ struct EditorView: View { } let payloads = (req.results as? [VNBarcodeObservation])? .compactMap { $0.payloadStringValue } ?? [] - guard !payloads.isEmpty else { + switch TextRecognition.barcodeOutcome(from: payloads) { + case .none: self.ocrAlertMessage = "No QR code or barcode found." - return - } - let combined = payloads.joined(separator: "\n") - Clipboard.copy(text: combined) - // If it looks like a URL, offer to open it - if let first = payloads.first, let url = URL(string: first), url.scheme != nil { - let alert = NSAlert() - alert.messageText = "QR Code Found" - alert.informativeText = first - alert.addButton(withTitle: "Open URL") - alert.addButton(withTitle: "Copied — Done") - if alert.runModal() == .alertFirstButtonReturn { - NSWorkspace.shared.open(url) + case .found(let text, let count, let url, let firstPayload): + Clipboard.copy(text: text) + // If it looks like a URL, offer to open it + if let url = url { + let alert = NSAlert() + alert.messageText = "QR Code Found" + alert.informativeText = firstPayload + alert.addButton(withTitle: "Open URL") + alert.addButton(withTitle: "Copied — Done") + if alert.runModal() == .alertFirstButtonReturn { + NSWorkspace.shared.open(url) + } + } else { + self.ocrAlertMessage = "Copied \(count) code(s) to clipboard." } - } else { - self.ocrAlertMessage = "Copied \(payloads.count) code(s) to clipboard." } } } diff --git a/Tests/StagTests/TextRecognitionTests.swift b/Tests/StagTests/TextRecognitionTests.swift new file mode 100644 index 0000000..d1274c7 --- /dev/null +++ b/Tests/StagTests/TextRecognitionTests.swift @@ -0,0 +1,74 @@ +import XCTest +@testable import Stag + +/// Post-processing rules extracted from the OCR / QR closures in CaptureManager +/// and EditorView. +final class TextRecognitionTests: XCTestCase { + + // MARK: OCR + + func testOCRNoLinesIsNoText() { + XCTAssertEqual(TextRecognition.ocrOutcome(from: []), .noText) + } + + func testOCREmptyStringsAreNoText() { + XCTAssertEqual(TextRecognition.ocrOutcome(from: [""]), .noText) + } + + func testOCRSingleLine() { + XCTAssertEqual(TextRecognition.ocrOutcome(from: ["hello"]), .copied(text: "hello", lineCount: 1)) + } + + func testOCRMultipleLinesJoinAndCount() { + XCTAssertEqual( + TextRecognition.ocrOutcome(from: ["one", "two", "three"]), + .copied(text: "one\ntwo\nthree", lineCount: 3) + ) + } + + // MARK: Barcode / QR + + func testBarcodeNoneWhenEmpty() { + XCTAssertEqual(TextRecognition.barcodeOutcome(from: []), .none) + } + + func testBarcodeURLPayloadSurfacesURL() { + let outcome = TextRecognition.barcodeOutcome(from: ["https://example.com"]) + XCTAssertEqual( + outcome, + .found(text: "https://example.com", count: 1, + url: URL(string: "https://example.com"), firstPayload: "https://example.com") + ) + } + + func testBarcodeNonURLPayloadHasNilURL() { + let outcome = TextRecognition.barcodeOutcome(from: ["just some text"]) + XCTAssertEqual( + outcome, + .found(text: "just some text", count: 1, url: nil, firstPayload: "just some text") + ) + } + + func testBarcodeMailtoSchemeIsOpenable() { + let outcome = TextRecognition.barcodeOutcome(from: ["mailto:a@b.com"]) + guard case .found(_, _, let url, _) = outcome else { return XCTFail("expected .found") } + XCTAssertEqual(url?.scheme, "mailto") + } + + func testBarcodeMultiplePayloadsJoinTextAndKeepFirstForURL() { + let outcome = TextRecognition.barcodeOutcome(from: ["https://x.com", "extra"]) + XCTAssertEqual( + outcome, + .found(text: "https://x.com\nextra", count: 2, + url: URL(string: "https://x.com"), firstPayload: "https://x.com") + ) + } + + func testBarcodeNonURLFirstWithMultipleHasNilURL() { + let outcome = TextRecognition.barcodeOutcome(from: ["plain", "https://x.com"]) + guard case .found(_, let count, let url, let first) = outcome else { return XCTFail("expected .found") } + XCTAssertNil(url) + XCTAssertEqual(count, 2) + XCTAssertEqual(first, "plain") + } +}