Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions PR_DESCRIPTION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# feat(pipeline): add verification module

Adds a standalone verification library at `packages/pipeline/verification/`.

Basically it checks clinical notes against the source transcript using token matching - sees if the claims in the note are actually supported by what was said.

## whats in here

- `types.ts` - types for claims, verdicts, etc
- `verifier.ts` - core matching logic (tokenize, overlap calc)
- `note-verifier.ts` - main `verifyNote()` function
- tests for both

## whats NOT touched

Nothing. This is new code only, no changes to existing files.

- no tsconfig changes
- no storage type changes
- no pipeline wiring

## safe to merge

Its completely isolated. Just a library sitting in its own folder.

## testing

```bash
npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts
```

13 tests, all pass.
36 changes: 36 additions & 0 deletions packages/pipeline/verification/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# verification

Validates clinical notes against source transcripts using token matching.

Not wired into the pipeline yet - just a standalone lib.

## quick example

```typescript
import { verifyNote } from './src/note-verifier'

const result = await verifyNote(
'Patient has headache for 3 days.',
'Patient reported headache lasting 3 days.'
)

console.log(result.status) // 'verified' | 'partial' | 'failed'
```

## how it works

1. Split note into sentences (claims)
2. Classify each (fact, inference, opinion, etc)
3. Match against transcript chunks
4. Score based on token overlap + number coverage

## exports

- `verifyNote(note, transcript, opts?)` - main api
- `tokenize`, `extractNumbers`, `calculateOverlap`, `classifyClaim` - utils

## run tests

```bash
npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { describe, it } from 'node:test'
import assert from 'node:assert'
import { verifyNote } from '../note-verifier'

const sampleTranscript = `
Doctor: Good morning, what brings you in today?
Patient: I've been having this really bad headache for the past 3 days.
Doctor: Pain severity?
Patient: About 7 or 8 out of 10.
Doctor: Blood pressure is 128/82, temperature 98.4.
`

const goodNote = `Patient presents with headache for 3 days. Pain severity 7-8/10. Vitals: BP 128/82.`
const badNote = `Patient presents with chest pain for 5 days. BP 180/110.`

describe('verifyNote', () => {
it('verifies matching note', async () => {
const result = await verifyNote(goodNote, sampleTranscript)
assert.ok(['verified', 'partial'].includes(result.status))
assert.ok(result.summary.overallConfidence > 0.3)
assert.ok(result.claims.length > 0)
})

it('flags mismatch', async () => {
const result = await verifyNote(badNote, sampleTranscript)
assert.ok(result.summary.overallConfidence < 0.3)
})

it('handles empty note', async () => {
const result = await verifyNote('', sampleTranscript)
assert.strictEqual(result.claims.length, 0)
assert.strictEqual(result.status, 'verified')
})

it('handles empty transcript', async () => {
const result = await verifyNote(goodNote, '')
assert.ok(result.summary.overallConfidence < 0.5)
})

it('respects factsOnly', async () => {
const result = await verifyNote(goodNote, sampleTranscript, { factsOnly: true })
for (const claim of result.claims) {
assert.strictEqual(claim.kind, 'fact')
}
})
})
47 changes: 47 additions & 0 deletions packages/pipeline/verification/src/__tests__/verifier.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { describe, it } from 'node:test'
import assert from 'node:assert'
import { tokenize, extractNumbers, calculateOverlap, classifyClaim } from '../verifier'

describe('tokenize', () => {
it('extracts tokens, filters stopwords', () => {
const tokens = tokenize('Patient reports headache for 3 days')
assert.ok(tokens.includes('headache'))
assert.ok(!tokens.includes('for'))
})

it('handles empty', () => {
assert.deepStrictEqual(tokenize(''), [])
})
})

describe('extractNumbers', () => {
it('extracts numbers and decimals', () => {
const numbers = extractNumbers('BP 120/80, temp 98.6')
assert.ok(numbers.includes('120'))
assert.ok(numbers.includes('98.6'))
})
})

describe('calculateOverlap', () => {
it('returns 1.0 for same text', () => {
assert.strictEqual(calculateOverlap('severe headache', 'severe headache'), 1.0)
})

it('returns 0 for no match', () => {
assert.strictEqual(calculateOverlap('headache pain', 'cardiac issues'), 0)
})
})

describe('classifyClaim', () => {
it('identifies facts', () => {
assert.strictEqual(classifyClaim('Patient has hypertension.'), 'fact')
})

it('identifies questions', () => {
assert.strictEqual(classifyClaim('Does the patient smoke?'), 'question')
})

it('identifies inferences', () => {
assert.strictEqual(classifyClaim('I think this might be migraine.'), 'inference')
})
})
3 changes: 3 additions & 0 deletions packages/pipeline/verification/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export type { Claim, ClaimKind, Evidence, Verdict, VerificationResult, VerificationSummary, VerificationOptions } from './types'
export { verifyNote } from './note-verifier'
export { tokenize, extractNumbers, calculateOverlap, classifyClaim } from './verifier'
73 changes: 73 additions & 0 deletions packages/pipeline/verification/src/note-verifier.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import type { Claim, Evidence, VerificationResult, VerificationSummary, VerificationOptions } from './types'
import { looksSupported, classifyClaim, determineVerdict } from './verifier'

function extractClaims(text: string): string[] {
return text.replace(/\n+/g, ' ').split(/(?<=[.!?])\s+/).map(s => s.trim()).filter(s => s.length > 10)
}

function chunkTranscript(transcript: string): { text: string; ref: string }[] {
return transcript.split('\n').filter(l => l.trim()).map((text, i) => ({ text: text.trim(), ref: `line:${i + 1}` }))
}

function findEvidence(claim: string, chunks: { text: string; ref: string }[], opts: VerificationOptions): { evidence: Evidence[]; bestScore: number } {
const evidence: Evidence[] = []
let bestScore = 0
for (const chunk of chunks) {
const [, score] = looksSupported(claim, chunk.text, opts.minTokenOverlap, opts.minNumberCoverage)
if (score > 0.1) {
evidence.push({ ref: chunk.ref, text: chunk.text, score })
if (score > bestScore) bestScore = score
}
}
return { evidence: evidence.sort((a, b) => b.score - a.score).slice(0, 3), bestScore }
}

function calculateSummary(claims: Claim[]): VerificationSummary {
const facts = claims.filter(c => c.kind === 'fact')
const supported = facts.filter(c => c.verdict === 'supported').length
const unsupported = facts.filter(c => c.verdict === 'unsupported').length
const totalConf = facts.reduce((sum, c) => sum + c.confidence, 0)
return {
totalClaims: claims.length,
supportedClaims: supported,
unsupportedClaims: unsupported,
overallConfidence: facts.length > 0 ? Math.round((totalConf / facts.length) * 100) / 100 : 1.0
}
}

export async function verifyNote(noteText: string, transcript: string, options: VerificationOptions = {}): Promise<VerificationResult> {
const startTime = performance.now()
const { minTokenOverlap = 0.25, minNumberCoverage = 1.0, factsOnly = false } = options

const claimTexts = extractClaims(noteText)
const chunks = chunkTranscript(transcript)
const claims: Claim[] = []

for (let i = 0; i < claimTexts.length; i++) {
const text = claimTexts[i]
const kind = classifyClaim(text)
if (factsOnly && kind !== 'fact') continue

const { evidence, bestScore } = findEvidence(text, chunks, { minTokenOverlap, minNumberCoverage })
claims.push({
id: `claim_${i + 1}`,
text,
kind,
verdict: determineVerdict(bestScore, kind),
confidence: Math.round(bestScore * 100) / 100,
evidence
})
}

const summary = calculateSummary(claims)
const factTotal = summary.supportedClaims + summary.unsupportedClaims
let status: 'verified' | 'partial' | 'failed' = 'verified'
if (factTotal > 0) {
const supportRate = summary.supportedClaims / factTotal
const unsupportRate = summary.unsupportedClaims / factTotal
if (unsupportRate > 0.3) status = 'failed'
else if (supportRate < 0.8 || summary.unsupportedClaims > 0) status = 'partial'
}

return { status, summary, claims, processingTimeMs: Math.round(performance.now() - startTime) }
}
39 changes: 39 additions & 0 deletions packages/pipeline/verification/src/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// types for note verification

export type ClaimKind = 'fact' | 'inference' | 'opinion' | 'instruction' | 'question'
export type Verdict = 'supported' | 'uncertain' | 'unsupported'

export interface Claim {
id: string
text: string
kind: ClaimKind
verdict: Verdict
confidence: number
evidence: Evidence[]
}

export interface Evidence {
ref: string
text: string
score: number
}

export interface VerificationResult {
status: 'verified' | 'partial' | 'failed'
summary: VerificationSummary
claims: Claim[]
processingTimeMs: number
}

export interface VerificationSummary {
totalClaims: number
supportedClaims: number
unsupportedClaims: number
overallConfidence: number
}

export interface VerificationOptions {
minTokenOverlap?: number
minNumberCoverage?: number
factsOnly?: boolean
}
65 changes: 65 additions & 0 deletions packages/pipeline/verification/src/verifier.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import type { ClaimKind, Verdict } from './types'

const STOP_WORDS = new Set([
'a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'of', 'to', 'in', 'on', 'for', 'with', 'by', 'as',
'is', 'are', 'was', 'were', 'be', 'been', 'it', 'this', 'that', 'at', 'from', 'not', 'can', 'do', 'does',
'we', 'you', 'they', 'i', 'he', 'she', 'has', 'have', 'had', 'will', 'patient', 'reports', 'denies'
])

export function tokenize(text: string): string[] {
const normalized = (text || '').toLowerCase().replace(/[^\w\-]+/g, ' ').trim()
if (!normalized) return []
return normalized.split(/\s+/).filter(t => t.length >= 2 && !STOP_WORDS.has(t))
}

export function extractNumbers(text: string): string[] {
return (text || '').match(/(?<![\w])\d+(?:[.,]\d+)?(?![\w])/g) || []
}

// returns overlap ratio 0-1
export function calculateOverlap(claim: string, evidence: string): number {
const claimTokens = new Set(tokenize(claim))
const evidenceTokens = new Set(tokenize(evidence))
if (claimTokens.size === 0 || evidenceTokens.size === 0) return 0

let overlap = 0
for (const token of claimTokens) {
if (evidenceTokens.has(token)) overlap++
}
return overlap / claimTokens.size
}

function numberCoverage(claim: string, evidence: string): number {
const claimNums = extractNumbers(claim).map(n => n.replace(',', '.'))
if (claimNums.length === 0) return 1.0

const evidenceNums = new Set(extractNumbers(evidence).map(n => n.replace(',', '.')))
if (evidenceNums.size === 0) return 0

let hits = 0
for (const n of claimNums) if (evidenceNums.has(n)) hits++
return hits / claimNums.length
}

export function looksSupported(claim: string, evidence: string, minOverlap = 0.25, minNumCov = 1.0): [boolean, number] {
const overlap = calculateOverlap(claim, evidence)
const numCov = numberCoverage(claim, evidence)
const score = overlap * 0.7 + numCov * 0.3
return [overlap >= minOverlap && numCov >= minNumCov, score]
}

export function classifyClaim(text: string): ClaimKind {
const lower = text.toLowerCase().trim()
if (lower.endsWith('?')) return 'question'
if (['i think', 'i believe', 'probably', 'likely'].some(p => lower.includes(p))) return 'inference'
if (['in my opinion', 'i feel'].some(p => lower.includes(p))) return 'opinion'
if (['do ', 'please ', 'recommend ', 'consider '].some(p => lower.startsWith(p))) return 'instruction'
return 'fact'
}

export function determineVerdict(score: number, kind: ClaimKind): Verdict {
if (kind !== 'fact') return 'uncertain'
if (score >= 0.5) return 'supported'
if (score >= 0.25) return 'uncertain'
return 'unsupported'
}