Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions graphile/graphile-search/src/adapters/pgvector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ export function createPgvectorAdapter(

filterPrefix,

// pgvector operates on embedding vectors, not text search — its presence
// alone should NOT trigger supplementary adapters like trgm.
isIntentionalSearch: false,

supportsTextSearch: false,
// pgvector requires a vector array, not plain text — no buildTextSearchInput

Expand Down
26 changes: 25 additions & 1 deletion graphile/graphile-search/src/adapters/trgm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,40 @@ export interface TrgmAdapterOptions {
* @default 0.3
*/
defaultThreshold?: number;

/**
* When true, trgm only activates on tables that have an "intentional"
* search column detected by another adapter (e.g. a tsvector column or
* a BM25 index). This prevents trgm similarity fields from being added
* to every table with text columns.
*
* The plugin's `getAdapterColumns` orchestrates this by running
* non-supplementary adapters first, then only running supplementary
* adapters on codecs that already have search columns.
*
* @default true
*/
requireIntentionalSearch?: boolean;
}

export function createTrgmAdapter(
options: TrgmAdapterOptions = {}
): SearchAdapter {
const { filterPrefix = 'trgm', defaultThreshold = 0.3 } = options;
const {
filterPrefix = 'trgm',
defaultThreshold = 0.3,
requireIntentionalSearch = true,
} = options;

return {
name: 'trgm',

/**
* When true, this adapter is "supplementary" — it only activates on
* tables that already have columns detected by a non-supplementary adapter.
*/
isSupplementary: requireIntentionalSearch,

scoreSemantics: {
metric: 'similarity',
lowerIsBetter: false,
Expand Down
42 changes: 38 additions & 4 deletions graphile/graphile-search/src/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,52 @@ export function createUnifiedSearchPlugin(

/**
* Get (or compute) the adapter columns for a given codec.
*
* Runs non-supplementary adapters first (e.g. tsvector, BM25, pgvector).
* Supplementary adapters (e.g. trgm with requireIntentionalSearch) are only
* run if at least one adapter with `isIntentionalSearch: true` found columns.
*
* This distinction matters because pgvector (embeddings) is NOT intentional
* text search — its presence alone should not trigger trgm similarity fields.
* Only tsvector and BM25, which represent explicit search infrastructure,
* count as intentional search.
*/
function getAdapterColumns(codec: PgCodecWithAttributes, build: any): AdapterColumnCache[] {
const cacheKey = codec.name;
if (codecCache.has(cacheKey)) {
return codecCache.get(cacheKey)!;
}

const primaryAdapters = adapters.filter((a) => !a.isSupplementary);
const supplementaryAdapters = adapters.filter((a) => a.isSupplementary);

// Phase 1: Run non-supplementary adapters (tsvector, BM25, pgvector, etc.)
const results: AdapterColumnCache[] = [];
for (const adapter of adapters) {
let hasIntentionalSearch = false;
for (const adapter of primaryAdapters) {
const columns = adapter.detectColumns(codec, build);
if (columns.length > 0) {
results.push({ adapter, columns });
// Track whether any "intentional search" adapter found columns.
// isIntentionalSearch defaults to true when not explicitly set.
if (adapter.isIntentionalSearch !== false) {
hasIntentionalSearch = true;
}
}
}

// Phase 2: Only run supplementary adapters if at least one primary
// adapter with isIntentionalSearch found columns on this codec.
// pgvector (isIntentionalSearch: false) alone won't trigger trgm.
if (hasIntentionalSearch) {
for (const adapter of supplementaryAdapters) {
const columns = adapter.detectColumns(codec, build);
if (columns.length > 0) {
results.push({ adapter, columns });
}
}
}

codecCache.set(cacheKey, results);
return results;
}
Expand Down Expand Up @@ -170,9 +202,11 @@ export function createUnifiedSearchPlugin(
provides: ['default'],
before: ['inferred', 'override', 'PgAttributesPlugin'],
callback(behavior, [codec, attributeName], build) {
// Check if any adapter claims this column
for (const adapter of adapters) {
const columns = adapter.detectColumns(codec, build);
// Use getAdapterColumns which respects isSupplementary logic,
// so trgm columns only appear when intentional search exists
if (!codec?.attributes) return behavior;
const adapterColumns = getAdapterColumns(codec as PgCodecWithAttributes, build);
for (const { columns } of adapterColumns) {
if (columns.some((c) => c.attributeName === attributeName)) {
return [
'unifiedSearch:orderBy',
Expand Down
29 changes: 29 additions & 0 deletions graphile/graphile-search/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,35 @@ export interface SearchAdapter {
/** Score semantics for this algorithm. */
scoreSemantics: ScoreSemantics;

/**
* When true, this adapter is "supplementary" — it only activates on
* tables that already have at least one column detected by an adapter
* whose `isIntentionalSearch` is true (e.g. tsvector or BM25).
*
* This prevents adapters like pg_trgm from adding similarity fields
* to every table with text columns when there is no intentional search setup.
*
* pgvector (embeddings) does NOT count as intentional search because it
* operates on vector columns, not text search — so its presence alone
* won't trigger supplementary adapters.
*
* @default false
*/
isSupplementary?: boolean;

/**
* When true, this adapter represents "intentional search" — its presence
* on a table signals that the table was explicitly set up for search and
* should trigger supplementary adapters (e.g. trgm).
*
* Adapters that check for real infrastructure (tsvector columns, BM25
* indexes) should set this to true. Adapters that operate on a different
* domain (pgvector embeddings) should set this to false.
*
* @default true
*/
isIntentionalSearch?: boolean;

/**
* The filter prefix used for filter field names on the connection filter input.
* The field name is: `{filterPrefix}{ColumnName}` (camelCase).
Expand Down
Loading
Loading