From b9df189a11f2ed14fed606448c5214c96e5d43fa Mon Sep 17 00:00:00 2001 From: Carlos Garnacho Date: Fri, 8 May 2026 20:10:51 +0200 Subject: [PATCH] Support parsing doubly quoted strings as identifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per https://www.sqlite.org/lang_keywords.html, SQLite supports identifiers expressed as strings surrounded between double quotes: $ sqlite3 SQLite version 3.52.0 2026-03-06 16:01:44 Enter ".help" for usage hints. Connected to a transient in-memory database. Use ".open FILENAME" to reopen on a persistent database. sqlite> create table a("oh,boy!" TEXT); sqlite> pragma table_info(a); ╭─────┬─────────┬──────┬─────────┬────────────┬────╮ │ cid │ name │ type │ notnull │ dflt_value │ pk │ ╞═════╪═════════╪══════╪═════════╪════════════╪════╡ │ 0 │ oh,boy! │ TEXT │ 0 │ NULL │ 0 │ ╰─────┴─────────┴──────┴─────────┴────────────┴────╯ When identifiers are parsed as such, no escaping applies, i.e. backslashes don't affect the next character: sqlite> create table b("oh\tno\" TEXT); sqlite> pragma table_info(b); ╭─────┬─────────┬──────┬─────────┬────────────┬────╮ │ cid │ name │ type │ notnull │ dflt_value │ pk │ ╞═════╪═════════╪══════╪═════════╪════════════╪════╡ │ 0 │ oh\tno\ │ TEXT │ 0 │ NULL │ 0 │ ╰─────┴─────────┴──────┴─────────┴────────────┴────╯ Make sqlite-vec internal scanner handle these identifiers, so that the column names in a vec0 virtual table may have the same (reduced) limitations as any other SQLite table. Note: SQLite also implements similar support for single quoted strings to be handled as identifiers. This is done for compatibility with other SQL implementations rather than standard conformance, so it's perhaps a bit more dubious to support. --- sqlite-vec.c | 72 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index dc33c67..9963ca9 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -2072,6 +2072,7 @@ static void _static_text_func(sqlite3_context *context, int argc, enum Vec0TokenType { TOKEN_TYPE_IDENTIFIER, + TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER, TOKEN_TYPE_DIGIT, TOKEN_TYPE_LBRACKET, TOKEN_TYPE_RBRACKET, @@ -2157,6 +2158,17 @@ int vec0_token_next(char *start, char *end, struct Vec0Token *out) { out->end = ptr; out->token_type = TOKEN_TYPE_IDENTIFIER; return VEC0_TOKEN_RESULT_SOME; + } else if (curr == '"') { + char *start = ptr; + int match = 0; + do { + match = ptr > start && (*ptr == '"'); + ptr++; + } while (ptr < end && !match); + out->start = start; + out->end = ptr; + out->token_type = TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER; + return VEC0_TOKEN_RESULT_SOME; } else if (is_digit(curr)) { char *start = ptr; while (ptr < end && (is_digit(*ptr))) { @@ -2263,12 +2275,18 @@ int vec0_parse_partition_key_definition(const char *source, int source_length, // Check first token is identifier, will be the column name int rc = vec0_scanner_next(&scanner, &token); if (rc != VEC0_TOKEN_RESULT_SOME && - token.token_type != TOKEN_TYPE_IDENTIFIER) { + !((token.token_type == TOKEN_TYPE_IDENTIFIER) || + (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER))) { return SQLITE_EMPTY; } - column_name = token.start; - column_name_length = token.end - token.start; + if (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER) { + column_name = token.start + 1; + column_name_length = token.end - token.start - 2; + } else { + column_name = token.start; + column_name_length = token.end - token.start; + } // Check the next token matches "text" or "integer", as column type rc = vec0_scanner_next(&scanner, &token); @@ -2346,12 +2364,18 @@ int vec0_parse_auxiliary_column_definition(const char *source, int source_length rc = vec0_scanner_next(&scanner, &token); if (rc != VEC0_TOKEN_RESULT_SOME && - token.token_type != TOKEN_TYPE_IDENTIFIER) { + !((token.token_type == TOKEN_TYPE_IDENTIFIER) || + (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER))) { return SQLITE_EMPTY; } - column_name = token.start; - column_name_length = token.end - token.start; + if (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER) { + column_name = token.start + 1; + column_name_length = token.end - token.start - 2; + } else { + column_name = token.start; + column_name_length = token.end - token.start; + } // Check the next token matches "text" or "integer", as column type rc = vec0_scanner_next(&scanner, &token); @@ -2418,12 +2442,18 @@ int vec0_parse_metadata_column_definition(const char *source, int source_length, rc = vec0_scanner_next(&scanner, &token); if (rc != VEC0_TOKEN_RESULT_SOME || - token.token_type != TOKEN_TYPE_IDENTIFIER) { + !((token.token_type == TOKEN_TYPE_IDENTIFIER) || + (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER))) { return SQLITE_EMPTY; } - column_name = token.start; - column_name_length = token.end - token.start; + if (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER) { + column_name = token.start + 1; + column_name_length = token.end - token.start - 2; + } else { + column_name = token.start; + column_name_length = token.end - token.start; + } // Check the next token matches a valid metadata type rc = vec0_scanner_next(&scanner, &token); @@ -2478,12 +2508,18 @@ int vec0_parse_primary_key_definition(const char *source, int source_length, // Check first token is identifier, will be the column name int rc = vec0_scanner_next(&scanner, &token); if (rc != VEC0_TOKEN_RESULT_SOME && - token.token_type != TOKEN_TYPE_IDENTIFIER) { + !((token.token_type == TOKEN_TYPE_IDENTIFIER) || + (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER))) { return SQLITE_EMPTY; } - column_name = token.start; - column_name_length = token.end - token.start; + if (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER) { + column_name = token.start + 1; + column_name_length = token.end - token.start - 2; + } else { + column_name = token.start; + column_name_length = token.end - token.start; + } // Check the next token matches "text" or "integer", as column type rc = vec0_scanner_next(&scanner, &token); @@ -2998,12 +3034,18 @@ int vec0_parse_vector_column(const char *source, int source_length, rc = vec0_scanner_next(&scanner, &token); if (rc != VEC0_TOKEN_RESULT_SOME && - token.token_type != TOKEN_TYPE_IDENTIFIER) { + !((token.token_type == TOKEN_TYPE_IDENTIFIER) || + (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER))) { return SQLITE_EMPTY; } - name = token.start; - nameLength = token.end - token.start; + if (token.token_type == TOKEN_TYPE_DOUBLY_QUOTED_IDENTIFIER) { + name = token.start + 1; + nameLength = token.end - token.start - 2; + } else { + name = token.start; + nameLength = token.end - token.start; + } // vector column type comes next: float, int, or bit rc = vec0_scanner_next(&scanner, &token);