diff --git a/PR_DESCRIPTION.md b/PR_DESCRIPTION.md new file mode 100644 index 0000000..d5af99e --- /dev/null +++ b/PR_DESCRIPTION.md @@ -0,0 +1,32 @@ +# feat(pipeline): add verification module + +Adds a standalone verification library at `packages/pipeline/verification/`. + +Basically it checks clinical notes against the source transcript using token matching - sees if the claims in the note are actually supported by what was said. + +## whats in here + +- `types.ts` - types for claims, verdicts, etc +- `verifier.ts` - core matching logic (tokenize, overlap calc) +- `note-verifier.ts` - main `verifyNote()` function +- tests for both + +## whats NOT touched + +Nothing. This is new code only, no changes to existing files. + +- no tsconfig changes +- no storage type changes +- no pipeline wiring + +## safe to merge + +Its completely isolated. Just a library sitting in its own folder. + +## testing + +```bash +npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts +``` + +13 tests, all pass. diff --git a/packages/pipeline/verification/README.md b/packages/pipeline/verification/README.md new file mode 100644 index 0000000..90a7bfd --- /dev/null +++ b/packages/pipeline/verification/README.md @@ -0,0 +1,36 @@ +# verification + +Validates clinical notes against source transcripts using token matching. + +Not wired into the pipeline yet - just a standalone lib. + +## quick example + +```typescript +import { verifyNote } from './src/note-verifier' + +const result = await verifyNote( + 'Patient has headache for 3 days.', + 'Patient reported headache lasting 3 days.' +) + +console.log(result.status) // 'verified' | 'partial' | 'failed' +``` + +## how it works + +1. Split note into sentences (claims) +2. Classify each (fact, inference, opinion, etc) +3. Match against transcript chunks +4. Score based on token overlap + number coverage + +## exports + +- `verifyNote(note, transcript, opts?)` - main api +- `tokenize`, `extractNumbers`, `calculateOverlap`, `classifyClaim` - utils + +## run tests + +```bash +npx tsx --test packages/pipeline/verification/src/__tests__/*.test.ts +``` diff --git a/packages/pipeline/verification/src/__tests__/note-verifier.test.ts b/packages/pipeline/verification/src/__tests__/note-verifier.test.ts new file mode 100644 index 0000000..213fa30 --- /dev/null +++ b/packages/pipeline/verification/src/__tests__/note-verifier.test.ts @@ -0,0 +1,46 @@ +import { describe, it } from 'node:test' +import assert from 'node:assert' +import { verifyNote } from '../note-verifier' + +const sampleTranscript = ` +Doctor: Good morning, what brings you in today? +Patient: I've been having this really bad headache for the past 3 days. +Doctor: Pain severity? +Patient: About 7 or 8 out of 10. +Doctor: Blood pressure is 128/82, temperature 98.4. +` + +const goodNote = `Patient presents with headache for 3 days. Pain severity 7-8/10. Vitals: BP 128/82.` +const badNote = `Patient presents with chest pain for 5 days. BP 180/110.` + +describe('verifyNote', () => { + it('verifies matching note', async () => { + const result = await verifyNote(goodNote, sampleTranscript) + assert.ok(['verified', 'partial'].includes(result.status)) + assert.ok(result.summary.overallConfidence > 0.3) + assert.ok(result.claims.length > 0) + }) + + it('flags mismatch', async () => { + const result = await verifyNote(badNote, sampleTranscript) + assert.ok(result.summary.overallConfidence < 0.3) + }) + + it('handles empty note', async () => { + const result = await verifyNote('', sampleTranscript) + assert.strictEqual(result.claims.length, 0) + assert.strictEqual(result.status, 'verified') + }) + + it('handles empty transcript', async () => { + const result = await verifyNote(goodNote, '') + assert.ok(result.summary.overallConfidence < 0.5) + }) + + it('respects factsOnly', async () => { + const result = await verifyNote(goodNote, sampleTranscript, { factsOnly: true }) + for (const claim of result.claims) { + assert.strictEqual(claim.kind, 'fact') + } + }) +}) diff --git a/packages/pipeline/verification/src/__tests__/verifier.test.ts b/packages/pipeline/verification/src/__tests__/verifier.test.ts new file mode 100644 index 0000000..b16d2e8 --- /dev/null +++ b/packages/pipeline/verification/src/__tests__/verifier.test.ts @@ -0,0 +1,47 @@ +import { describe, it } from 'node:test' +import assert from 'node:assert' +import { tokenize, extractNumbers, calculateOverlap, classifyClaim } from '../verifier' + +describe('tokenize', () => { + it('extracts tokens, filters stopwords', () => { + const tokens = tokenize('Patient reports headache for 3 days') + assert.ok(tokens.includes('headache')) + assert.ok(!tokens.includes('for')) + }) + + it('handles empty', () => { + assert.deepStrictEqual(tokenize(''), []) + }) +}) + +describe('extractNumbers', () => { + it('extracts numbers and decimals', () => { + const numbers = extractNumbers('BP 120/80, temp 98.6') + assert.ok(numbers.includes('120')) + assert.ok(numbers.includes('98.6')) + }) +}) + +describe('calculateOverlap', () => { + it('returns 1.0 for same text', () => { + assert.strictEqual(calculateOverlap('severe headache', 'severe headache'), 1.0) + }) + + it('returns 0 for no match', () => { + assert.strictEqual(calculateOverlap('headache pain', 'cardiac issues'), 0) + }) +}) + +describe('classifyClaim', () => { + it('identifies facts', () => { + assert.strictEqual(classifyClaim('Patient has hypertension.'), 'fact') + }) + + it('identifies questions', () => { + assert.strictEqual(classifyClaim('Does the patient smoke?'), 'question') + }) + + it('identifies inferences', () => { + assert.strictEqual(classifyClaim('I think this might be migraine.'), 'inference') + }) +}) diff --git a/packages/pipeline/verification/src/index.ts b/packages/pipeline/verification/src/index.ts new file mode 100644 index 0000000..e3e878f --- /dev/null +++ b/packages/pipeline/verification/src/index.ts @@ -0,0 +1,3 @@ +export type { Claim, ClaimKind, Evidence, Verdict, VerificationResult, VerificationSummary, VerificationOptions } from './types' +export { verifyNote } from './note-verifier' +export { tokenize, extractNumbers, calculateOverlap, classifyClaim } from './verifier' diff --git a/packages/pipeline/verification/src/note-verifier.ts b/packages/pipeline/verification/src/note-verifier.ts new file mode 100644 index 0000000..bd3ddfd --- /dev/null +++ b/packages/pipeline/verification/src/note-verifier.ts @@ -0,0 +1,73 @@ +import type { Claim, Evidence, VerificationResult, VerificationSummary, VerificationOptions } from './types' +import { looksSupported, classifyClaim, determineVerdict } from './verifier' + +function extractClaims(text: string): string[] { + return text.replace(/\n+/g, ' ').split(/(?<=[.!?])\s+/).map(s => s.trim()).filter(s => s.length > 10) +} + +function chunkTranscript(transcript: string): { text: string; ref: string }[] { + return transcript.split('\n').filter(l => l.trim()).map((text, i) => ({ text: text.trim(), ref: `line:${i + 1}` })) +} + +function findEvidence(claim: string, chunks: { text: string; ref: string }[], opts: VerificationOptions): { evidence: Evidence[]; bestScore: number } { + const evidence: Evidence[] = [] + let bestScore = 0 + for (const chunk of chunks) { + const [, score] = looksSupported(claim, chunk.text, opts.minTokenOverlap, opts.minNumberCoverage) + if (score > 0.1) { + evidence.push({ ref: chunk.ref, text: chunk.text, score }) + if (score > bestScore) bestScore = score + } + } + return { evidence: evidence.sort((a, b) => b.score - a.score).slice(0, 3), bestScore } +} + +function calculateSummary(claims: Claim[]): VerificationSummary { + const facts = claims.filter(c => c.kind === 'fact') + const supported = facts.filter(c => c.verdict === 'supported').length + const unsupported = facts.filter(c => c.verdict === 'unsupported').length + const totalConf = facts.reduce((sum, c) => sum + c.confidence, 0) + return { + totalClaims: claims.length, + supportedClaims: supported, + unsupportedClaims: unsupported, + overallConfidence: facts.length > 0 ? Math.round((totalConf / facts.length) * 100) / 100 : 1.0 + } +} + +export async function verifyNote(noteText: string, transcript: string, options: VerificationOptions = {}): Promise { + const startTime = performance.now() + const { minTokenOverlap = 0.25, minNumberCoverage = 1.0, factsOnly = false } = options + + const claimTexts = extractClaims(noteText) + const chunks = chunkTranscript(transcript) + const claims: Claim[] = [] + + for (let i = 0; i < claimTexts.length; i++) { + const text = claimTexts[i] + const kind = classifyClaim(text) + if (factsOnly && kind !== 'fact') continue + + const { evidence, bestScore } = findEvidence(text, chunks, { minTokenOverlap, minNumberCoverage }) + claims.push({ + id: `claim_${i + 1}`, + text, + kind, + verdict: determineVerdict(bestScore, kind), + confidence: Math.round(bestScore * 100) / 100, + evidence + }) + } + + const summary = calculateSummary(claims) + const factTotal = summary.supportedClaims + summary.unsupportedClaims + let status: 'verified' | 'partial' | 'failed' = 'verified' + if (factTotal > 0) { + const supportRate = summary.supportedClaims / factTotal + const unsupportRate = summary.unsupportedClaims / factTotal + if (unsupportRate > 0.3) status = 'failed' + else if (supportRate < 0.8 || summary.unsupportedClaims > 0) status = 'partial' + } + + return { status, summary, claims, processingTimeMs: Math.round(performance.now() - startTime) } +} diff --git a/packages/pipeline/verification/src/types.ts b/packages/pipeline/verification/src/types.ts new file mode 100644 index 0000000..4beddd6 --- /dev/null +++ b/packages/pipeline/verification/src/types.ts @@ -0,0 +1,39 @@ +// types for note verification + +export type ClaimKind = 'fact' | 'inference' | 'opinion' | 'instruction' | 'question' +export type Verdict = 'supported' | 'uncertain' | 'unsupported' + +export interface Claim { + id: string + text: string + kind: ClaimKind + verdict: Verdict + confidence: number + evidence: Evidence[] +} + +export interface Evidence { + ref: string + text: string + score: number +} + +export interface VerificationResult { + status: 'verified' | 'partial' | 'failed' + summary: VerificationSummary + claims: Claim[] + processingTimeMs: number +} + +export interface VerificationSummary { + totalClaims: number + supportedClaims: number + unsupportedClaims: number + overallConfidence: number +} + +export interface VerificationOptions { + minTokenOverlap?: number + minNumberCoverage?: number + factsOnly?: boolean +} diff --git a/packages/pipeline/verification/src/verifier.ts b/packages/pipeline/verification/src/verifier.ts new file mode 100644 index 0000000..5b9c1a4 --- /dev/null +++ b/packages/pipeline/verification/src/verifier.ts @@ -0,0 +1,65 @@ +import type { ClaimKind, Verdict } from './types' + +const STOP_WORDS = new Set([ + 'a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'of', 'to', 'in', 'on', 'for', 'with', 'by', 'as', + 'is', 'are', 'was', 'were', 'be', 'been', 'it', 'this', 'that', 'at', 'from', 'not', 'can', 'do', 'does', + 'we', 'you', 'they', 'i', 'he', 'she', 'has', 'have', 'had', 'will', 'patient', 'reports', 'denies' +]) + +export function tokenize(text: string): string[] { + const normalized = (text || '').toLowerCase().replace(/[^\w\-]+/g, ' ').trim() + if (!normalized) return [] + return normalized.split(/\s+/).filter(t => t.length >= 2 && !STOP_WORDS.has(t)) +} + +export function extractNumbers(text: string): string[] { + return (text || '').match(/(? n.replace(',', '.')) + if (claimNums.length === 0) return 1.0 + + const evidenceNums = new Set(extractNumbers(evidence).map(n => n.replace(',', '.'))) + if (evidenceNums.size === 0) return 0 + + let hits = 0 + for (const n of claimNums) if (evidenceNums.has(n)) hits++ + return hits / claimNums.length +} + +export function looksSupported(claim: string, evidence: string, minOverlap = 0.25, minNumCov = 1.0): [boolean, number] { + const overlap = calculateOverlap(claim, evidence) + const numCov = numberCoverage(claim, evidence) + const score = overlap * 0.7 + numCov * 0.3 + return [overlap >= minOverlap && numCov >= minNumCov, score] +} + +export function classifyClaim(text: string): ClaimKind { + const lower = text.toLowerCase().trim() + if (lower.endsWith('?')) return 'question' + if (['i think', 'i believe', 'probably', 'likely'].some(p => lower.includes(p))) return 'inference' + if (['in my opinion', 'i feel'].some(p => lower.includes(p))) return 'opinion' + if (['do ', 'please ', 'recommend ', 'consider '].some(p => lower.startsWith(p))) return 'instruction' + return 'fact' +} + +export function determineVerdict(score: number, kind: ClaimKind): Verdict { + if (kind !== 'fact') return 'uncertain' + if (score >= 0.5) return 'supported' + if (score >= 0.25) return 'uncertain' + return 'unsupported' +}