Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ app/src/main/java/com/pledgerio/app/
- [Account types](docs/ACCOUNTS.md) — Owned vs counterparty accounts, type codes, transaction mapping
- [Budgets](docs/BUDGETS.md) — Initial setup, expense groups, API mapping
- [Transaction form redesign](docs/TRANSACTION_FORM_REDESIGN.md) — Planned UX for creating transactions (type-first flow, amount hero, contextual account labels)
- [Invoice scan to transaction plan](docs/INVOICE_SCAN_TRANSACTION_PLAN.md) — Proposed OCR + text extraction pipeline to prefill new transactions
- [Usability modes](docs/USABILITY_MODES.md) — Guided mode for novices and Power mode for advanced users
- [Localization](docs/LOCALIZATION.md) — English, Dutch, and German (extensible)
- [Architecture Decision Records](docs/adr/README.md) — Rationale for major technical choices
Expand Down
1 change: 1 addition & 0 deletions app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ dependencies {
implementation(libs.vico.compose.m3)
implementation(libs.coil.compose)
implementation(libs.androidx.core.splashscreen)
implementation(libs.mlkit.text.recognition)
implementation(libs.androidx.compose.ui.text.google.fonts)

testImplementation(libs.junit)
Expand Down
10 changes: 10 additions & 0 deletions app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@
</intent-filter>
</activity>

<provider
android:name="androidx.core.content.FileProvider"
android:authorities="${applicationId}.fileprovider"
android:exported="false"
android:grantUriPermissions="true">
<meta-data
android:name="android.support.FILE_PROVIDER_PATHS"
android:resource="@xml/file_paths" />
</provider>

<!--
Disable the default `WorkManagerInitializer` so WorkManager is initialized
on-demand from `PledgerApp.onCreate()` AFTER Hilt has injected
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package com.pledgerio.app.data.ocr

import android.content.Context
import android.net.Uri
import com.google.mlkit.vision.text.Text
import com.google.android.gms.tasks.Task
import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.text.TextRecognition
import com.google.mlkit.vision.text.latin.TextRecognizerOptions
import com.pledgerio.app.R
import com.pledgerio.app.util.Resource
import dagger.hilt.android.qualifiers.ApplicationContext
import javax.inject.Inject
import javax.inject.Singleton
import kotlinx.coroutines.cancel
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlin.coroutines.resume

@Singleton
class InvoiceTextExtractor @Inject constructor(
@ApplicationContext private val context: Context,
) {
suspend fun extractText(uri: Uri): Resource<String> {
return try {
val image = InputImage.fromFilePath(context, uri)
val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)
val result = recognizer.process(image).awaitResult()
val text = formatResultText(result)
recognizer.close()
if (text.isBlank()) {
Resource.Error(context.getString(R.string.invoice_scan_error_no_readable_text))
} else {
Resource.Success(text)
}
} catch (e: Exception) {
Resource.Error(context.getString(R.string.invoice_scan_error_read_failed))
}
}

private fun formatResultText(result: Text): String {
val lines = result.textBlocks
.flatMap { block -> block.lines }
.mapNotNull { line ->
val box = line.boundingBox ?: return@mapNotNull null
OcrTextFormatter.OcrLine(
text = line.text,
left = box.left,
top = box.top,
right = box.right,
bottom = box.bottom,
)
}
if (lines.isEmpty()) return result.text.trim()
return OcrTextFormatter.format(lines).trim()
}
}

private suspend fun <T> Task<T>.awaitResult(): T = suspendCancellableCoroutine { continuation ->
addOnSuccessListener { result ->
if (continuation.isActive) continuation.resume(result)
}
addOnFailureListener { error ->
if (continuation.isActive) continuation.resumeWith(Result.failure(error))
}
addOnCanceledListener {
if (continuation.isActive) continuation.cancel()
}
}
144 changes: 144 additions & 0 deletions app/src/main/java/com/pledgerio/app/data/ocr/OcrTextFormatter.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package com.pledgerio.app.data.ocr

import kotlin.math.abs
import kotlin.math.max

/**
* Rebuilds OCR lines into a transaction-extraction friendly text stream.
*
* Invoices often contain a left description column and a right amount column.
* When OCR returns those as separate reading-order blocks, this formatter pairs
* rows by Y-position and emits tab-separated "description<TAB>amount" rows.
*/
object OcrTextFormatter {

data class OcrLine(
val text: String,
val left: Int,
val top: Int,
val right: Int,
val bottom: Int,
) {
val cleanText: String = text.trim()
val centerX: Float = (left + right) / 2f
val centerY: Float = (top + bottom) / 2f
val height: Int = max(1, bottom - top)
}

private val amountLikeRegex = Regex(
pattern = """^[\p{Sc}]?\s*[-+]?\d{1,3}(?:[.,\s]\d{3})*(?:[.,]\d{2})?(?:\s?[A-Z]{3})?$|^[\p{Sc}]?\s*[-+]?\d+(?:[.,]\d{2})?(?:\s?[A-Z]{3})?$""",
)

fun format(lines: List<OcrLine>): String {
val cleaned = lines.mapNotNull { line ->
if (line.cleanText.isBlank()) null else line.copy(text = line.cleanText)
}
if (cleaned.isEmpty()) return ""

val twoColumn = tryFormatTwoColumnRows(cleaned)
if (twoColumn != null) {
return twoColumn.joinToString("\n")
}
return formatByRows(cleaned).joinToString("\n")
}

private fun tryFormatTwoColumnRows(lines: List<OcrLine>): List<String>? {
if (lines.size < 4) return null
val sortedByX = lines.sortedBy { it.centerX }
val minLeft = sortedByX.minOf { it.left }
val maxRight = sortedByX.maxOf { it.right }
val totalWidth = max(1, maxRight - minLeft)

var maxGap = 0f
var splitIndex = -1
for (index in 0 until sortedByX.lastIndex) {
val gap = sortedByX[index + 1].centerX - sortedByX[index].centerX
if (gap > maxGap) {
maxGap = gap
splitIndex = index
}
}

if (splitIndex < 0 || maxGap < totalWidth * 0.22f) return null

val splitX = (sortedByX[splitIndex].centerX + sortedByX[splitIndex + 1].centerX) / 2f
val leftColumn = lines.filter { it.centerX < splitX }.sortedBy { it.centerY }
val rightColumn = lines.filter { it.centerX >= splitX }.sortedBy { it.centerY }

if (leftColumn.isEmpty() || rightColumn.isEmpty()) return null

val rightAmountRatio = rightColumn.count { isAmountLike(it.cleanText) }.toFloat() / rightColumn.size
val leftAmountRatio = leftColumn.count { isAmountLike(it.cleanText) }.toFloat() / leftColumn.size
if (rightAmountRatio < 0.6f || leftAmountRatio > 0.45f) return null

val medianHeight = medianHeight(lines)
val yThreshold = max(8f, medianHeight * 0.95f)
val usedRightIndices = mutableSetOf<Int>()
val merged = mutableListOf<Pair<Float, String>>()

leftColumn.forEach { left ->
val candidate = rightColumn
.withIndex()
.filter { (index, line) ->
index !in usedRightIndices &&
abs(line.centerY - left.centerY) <= yThreshold
}
.minByOrNull { (_, line) -> abs(line.centerY - left.centerY) }

if (candidate != null) {
usedRightIndices += candidate.index
merged += ((left.centerY + candidate.value.centerY) / 2f) to
"${left.cleanText}\t${candidate.value.cleanText}"
} else {
merged += left.centerY to left.cleanText
}
}

rightColumn.withIndex()
.filter { (index, _) -> index !in usedRightIndices }
.forEach { (_, line) ->
merged += line.centerY to line.cleanText
}

return merged
.sortedBy { it.first }
.map { it.second }
}

private fun formatByRows(lines: List<OcrLine>): List<String> {
val sorted = lines.sortedWith(compareBy<OcrLine> { it.centerY }.thenBy { it.left })
val threshold = max(8f, medianHeight(lines) * 0.65f)
val rows = mutableListOf<MutableList<OcrLine>>()

sorted.forEach { line ->
val current = rows.lastOrNull()
if (current == null) {
rows += mutableListOf(line)
return@forEach
}
val rowCenterY = current.map { it.centerY }.average().toFloat()
if (abs(line.centerY - rowCenterY) <= threshold) {
current += line
} else {
rows += mutableListOf(line)
}
}

return rows.map { row ->
row.sortedBy { it.left }
.joinToString(separator = "\t") { it.cleanText }
}
}

private fun medianHeight(lines: List<OcrLine>): Float {
val sorted = lines.map { it.height }.sorted()
if (sorted.isEmpty()) return 12f
return if (sorted.size % 2 == 1) {
sorted[sorted.size / 2].toFloat()
} else {
(sorted[sorted.size / 2 - 1] + sorted[sorted.size / 2]) / 2f
}
}

private fun isAmountLike(text: String): Boolean = amountLikeRegex.matches(text.trim())
}
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ interface PledgerApiService {
@Query("destination") destination: String? = null,
): Response<TransactionClassificationSuggestionDto>

@POST("v2/api/ai/extract")
suspend fun extractTransactionFromText(
@Body request: TransactionExtract,
): Response<Map<String, @JvmSuppressWildcards Any?>>

// Categories
@GET("v2/api/categories")
suspend fun getCategories(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.pledgerio.app.data.remote.dto

import com.squareup.moshi.Json
import com.squareup.moshi.JsonClass

@JsonClass(generateAdapter = true)
data class TransactionExtract(
@Json(name = "text") val text: String
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ import com.pledgerio.app.data.local.entity.TransactionEntity
import com.pledgerio.app.data.remote.api.PledgerApiService
import com.pledgerio.app.data.remote.dto.CreateTransactionRequest
import com.pledgerio.app.data.remote.dto.TransactionDto
import com.pledgerio.app.data.remote.dto.TransactionExtract
import com.pledgerio.app.data.remote.dto.TransactionSplitDto
import com.pledgerio.app.domain.model.TransactionSplit
import com.pledgerio.app.domain.model.Transaction
import com.pledgerio.app.domain.model.TransactionClassificationSuggestion
import com.pledgerio.app.domain.model.TransactionExtractionDraft
import com.pledgerio.app.domain.model.TransactionFilters
import com.pledgerio.app.domain.model.TransactionType
import com.pledgerio.app.domain.repository.PagedResult
Expand Down Expand Up @@ -235,6 +237,46 @@ class TransactionRepositoryImpl @Inject constructor(
}
}

override suspend fun extractTransactionFromText(text: String): Resource<TransactionExtractionDraft> {
if (text.isBlank()) return Resource.Error("No text found in document")
return try {
val response = apiService.extractTransactionFromText(
request = TransactionExtract(text),
)
if (response.isSuccessful) {
val body = response.body().orEmpty()
val sourceMap = body.readMap("source")
val targetMap = body.readMap("target")
val metadata = body.readMap("metadata")
val rawDate = body.readString("date") ?: body.readString("transactionDate")
Resource.Success(
TransactionExtractionDraft(
description = body.readString("description") ?: body.readString("title"),
amount = body.readDouble("amount") ?: metadata.readDouble("amount"),
currency = body.readString("currency"),
date = rawDate?.let(::parseLocalDateOrNull),
type = body.readString("type")?.let(::parseExtractionType),
sourceName = body.readString("sourceName")
?: sourceMap.readString("name")
?: sourceMap.readString("label"),
targetName = body.readString("targetName")
?: body.readString("counterparty")
?: targetMap.readString("name")
?: targetMap.readString("label")
?: body.readString("merchant"),
confidence = body.readDouble("confidence")
?: metadata.readDouble("confidence"),
rawText = text,
),
)
} else {
Resource.Error("Failed to extract transaction from text: ${response.code()}")
}
} catch (e: Exception) {
Resource.Error(e.message ?: "Network error")
}
}

override fun getRecentTransactions(limit: Int): Flow<Resource<List<Transaction>>> = flow {
emit(Resource.Loading)
try {
Expand Down Expand Up @@ -286,4 +328,37 @@ class TransactionRepositoryImpl @Inject constructor(
?: emptyList(),
)
}

private fun parseLocalDateOrNull(value: String): LocalDate? {
return runCatching { LocalDate.parse(value) }.getOrNull()
}

private fun parseExtractionType(value: String): TransactionType? {
return when (value.trim().uppercase()) {
"CREDIT", "EXPENSE" -> TransactionType.CREDIT
"DEBIT", "INCOME" -> TransactionType.DEBIT
"TRANSFER" -> TransactionType.TRANSFER
else -> null
}
}

private fun Map<String, Any?>.readString(key: String): String? {
val value = this[key] ?: return null
return (value as? String)?.trim()?.takeIf { it.isNotBlank() }
}

private fun Map<String, Any?>.readDouble(key: String): Double? {
val value = this[key] ?: return null
return when (value) {
is Number -> value.toDouble()
is String -> value.toDoubleOrNull()
else -> null
}
}

private fun Map<String, Any?>.readMap(key: String): Map<String, Any?> {
val value = this[key] ?: return emptyMap()
@Suppress("UNCHECKED_CAST")
return value as? Map<String, Any?> ?: emptyMap()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.pledgerio.app.domain.model

import java.time.LocalDate

data class TransactionExtractionDraft(
val description: String? = null,
val amount: Double? = null,
val currency: String? = null,
val date: LocalDate? = null,
val type: TransactionType? = null,
val sourceName: String? = null,
val targetName: String? = null,
val confidence: Double? = null,
val rawText: String? = null,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.pledgerio.app.domain.repository

import com.pledgerio.app.domain.model.Transaction
import com.pledgerio.app.domain.model.TransactionClassificationSuggestion
import com.pledgerio.app.domain.model.TransactionExtractionDraft
import com.pledgerio.app.domain.model.TransactionFilters
import com.pledgerio.app.domain.model.TransactionSplit
import com.pledgerio.app.domain.model.TransactionType
Expand Down Expand Up @@ -42,5 +43,6 @@ interface TransactionRepository {
source: String? = null,
destination: String? = null,
): Resource<TransactionClassificationSuggestion>
suspend fun extractTransactionFromText(text: String): Resource<TransactionExtractionDraft>
fun getRecentTransactions(limit: Int = 5): Flow<Resource<List<Transaction>>>
}
Loading
Loading