From b016b6090f32be5e4a385cb5ffb5c83a4af36818 Mon Sep 17 00:00:00 2001 From: Robert Carroll Date: Mon, 23 Feb 2026 12:30:02 -0600 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=9A=A7=20Create=20Study=20metadata?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pulls out certain fields into study metadata. --- .../datamodel/include_access_model.py | 146 +++++++++++------- .../include_access_model_pydantic.py | 41 +++-- .../schema/include_access_model.yaml | 37 +++-- 3 files changed, 138 insertions(+), 86 deletions(-) diff --git a/src/include_access_model/datamodel/include_access_model.py b/src/include_access_model/datamodel/include_access_model.py index 930faf0..8d8a963 100644 --- a/src/include_access_model/datamodel/include_access_model.py +++ b/src/include_access_model/datamodel/include_access_model.py @@ -1,5 +1,5 @@ # Auto generated from include_access_model.yaml by pythongen.py version: 0.0.1 -# Generation date: 2026-02-23T11:50:56 +# Generation date: 2026-02-23T12:29:43 # Schema: include-access-model # # id: https://includedcc.org/include-access-model @@ -89,6 +89,10 @@ class StudyStudyId(extended_str): pass +class StudyMetadataStudyId(StudyStudyId): + pass + + class DOIDoId(extended_str): pass @@ -144,24 +148,15 @@ class Study(Record): class_model_uri: ClassVar[URIRef] = INCLUDEDCC.Study study_id: Union[str, StudyStudyId] = None - principal_investigator: Union[Union[dict, "Investigator"], list[Union[dict, "Investigator"]]] = None - contact: Union[Union[dict, "Investigator"], list[Union[dict, "Investigator"]]] = None study_title: str = None study_code: str = None program: Union[Union[str, "EnumProgram"], list[Union[str, "EnumProgram"]]] = None + principal_investigator: Union[Union[dict, "Investigator"], list[Union[dict, "Investigator"]]] = None + contact: Union[Union[dict, "Investigator"], list[Union[dict, "Investigator"]]] = None study_description: str = None - research_domain: Union[Union[str, "EnumResearchDomain"], list[Union[str, "EnumResearchDomain"]]] = None - participant_lifespan_stage: Union[Union[str, "EnumParticipantLifespanStage"], list[Union[str, "EnumParticipantLifespanStage"]]] = None - study_design: Union[Union[str, "EnumStudyDesign"], list[Union[str, "EnumStudyDesign"]]] = None - clinical_data_source_type: Union[Union[str, "EnumClinicalDataSourceType"], list[Union[str, "EnumClinicalDataSourceType"]]] = None - data_category: Union[Union[str, "EnumDataCategory"], list[Union[str, "EnumDataCategory"]]] = None - expected_number_of_participants: int = None - actual_number_of_participants: int = None parent_study: Optional[Union[str, StudyStudyId]] = None - funding_source: Optional[Union[str, list[str]]] = empty_list() study_short_name: Optional[str] = None - vbr: Optional[Union[dict, "VirtualBiorepository"]] = None - selection_criteria: Optional[str] = None + funding_source: Optional[Union[str, list[str]]] = empty_list() website: Optional[Union[str, URI]] = None publication: Optional[Union[Union[dict, "Publication"], list[Union[dict, "Publication"]]]] = empty_list() acknowledgments: Optional[str] = None @@ -174,18 +169,6 @@ def __post_init__(self, *_: str, **kwargs: Any): if not isinstance(self.study_id, StudyStudyId): self.study_id = StudyStudyId(self.study_id) - if self._is_empty(self.principal_investigator): - self.MissingRequiredField("principal_investigator") - if not isinstance(self.principal_investigator, list): - self.principal_investigator = [self.principal_investigator] if self.principal_investigator is not None else [] - self.principal_investigator = [v if isinstance(v, Investigator) else Investigator(**as_dict(v)) for v in self.principal_investigator] - - if self._is_empty(self.contact): - self.MissingRequiredField("contact") - if not isinstance(self.contact, list): - self.contact = [self.contact] if self.contact is not None else [] - self.contact = [v if isinstance(v, Investigator) else Investigator(**as_dict(v)) for v in self.contact] - if self._is_empty(self.study_title): self.MissingRequiredField("study_title") if not isinstance(self.study_title, str): @@ -202,16 +185,80 @@ def __post_init__(self, *_: str, **kwargs: Any): self.program = [self.program] if self.program is not None else [] self.program = [v if isinstance(v, EnumProgram) else EnumProgram(v) for v in self.program] + if self._is_empty(self.principal_investigator): + self.MissingRequiredField("principal_investigator") + if not isinstance(self.principal_investigator, list): + self.principal_investigator = [self.principal_investigator] if self.principal_investigator is not None else [] + self.principal_investigator = [v if isinstance(v, Investigator) else Investigator(**as_dict(v)) for v in self.principal_investigator] + + if self._is_empty(self.contact): + self.MissingRequiredField("contact") + if not isinstance(self.contact, list): + self.contact = [self.contact] if self.contact is not None else [] + self.contact = [v if isinstance(v, Investigator) else Investigator(**as_dict(v)) for v in self.contact] + if self._is_empty(self.study_description): self.MissingRequiredField("study_description") if not isinstance(self.study_description, str): self.study_description = str(self.study_description) - if self._is_empty(self.research_domain): - self.MissingRequiredField("research_domain") - if not isinstance(self.research_domain, list): - self.research_domain = [self.research_domain] if self.research_domain is not None else [] - self.research_domain = [v if isinstance(v, EnumResearchDomain) else EnumResearchDomain(v) for v in self.research_domain] + if self.parent_study is not None and not isinstance(self.parent_study, StudyStudyId): + self.parent_study = StudyStudyId(self.parent_study) + + if self.study_short_name is not None and not isinstance(self.study_short_name, str): + self.study_short_name = str(self.study_short_name) + + if not isinstance(self.funding_source, list): + self.funding_source = [self.funding_source] if self.funding_source is not None else [] + self.funding_source = [v if isinstance(v, str) else str(v) for v in self.funding_source] + + if self.website is not None and not isinstance(self.website, URI): + self.website = URI(self.website) + + if not isinstance(self.publication, list): + self.publication = [self.publication] if self.publication is not None else [] + self.publication = [v if isinstance(v, Publication) else Publication(**as_dict(v)) for v in self.publication] + + if self.acknowledgments is not None and not isinstance(self.acknowledgments, str): + self.acknowledgments = str(self.acknowledgments) + + if self.citation_statement is not None and not isinstance(self.citation_statement, str): + self.citation_statement = str(self.citation_statement) + + if self.do_id is not None and not isinstance(self.do_id, DOIDoId): + self.do_id = DOIDoId(self.do_id) + + super().__post_init__(**kwargs) + + +@dataclass(repr=False) +class StudyMetadata(Record): + """ + Additional features about studies that may not apply to all studies + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = INCLUDEDCC["StudyMetadata"] + class_class_curie: ClassVar[str] = "includedcc:StudyMetadata" + class_name: ClassVar[str] = "StudyMetadata" + class_model_uri: ClassVar[URIRef] = INCLUDEDCC.StudyMetadata + + study_id: Union[str, StudyMetadataStudyId] = None + participant_lifespan_stage: Union[Union[str, "EnumParticipantLifespanStage"], list[Union[str, "EnumParticipantLifespanStage"]]] = None + study_design: Union[Union[str, "EnumStudyDesign"], list[Union[str, "EnumStudyDesign"]]] = None + clinical_data_source_type: Union[Union[str, "EnumClinicalDataSourceType"], list[Union[str, "EnumClinicalDataSourceType"]]] = None + data_category: Union[Union[str, "EnumDataCategory"], list[Union[str, "EnumDataCategory"]]] = None + research_domain: Union[Union[str, "EnumResearchDomain"], list[Union[str, "EnumResearchDomain"]]] = None + expected_number_of_participants: int = None + actual_number_of_participants: int = None + selection_criteria: Optional[str] = None + vbr: Optional[Union[dict, "VirtualBiorepository"]] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.study_id): + self.MissingRequiredField("study_id") + if not isinstance(self.study_id, StudyMetadataStudyId): + self.study_id = StudyMetadataStudyId(self.study_id) if self._is_empty(self.participant_lifespan_stage): self.MissingRequiredField("participant_lifespan_stage") @@ -237,6 +284,12 @@ def __post_init__(self, *_: str, **kwargs: Any): self.data_category = [self.data_category] if self.data_category is not None else [] self.data_category = [v if isinstance(v, EnumDataCategory) else EnumDataCategory(v) for v in self.data_category] + if self._is_empty(self.research_domain): + self.MissingRequiredField("research_domain") + if not isinstance(self.research_domain, list): + self.research_domain = [self.research_domain] if self.research_domain is not None else [] + self.research_domain = [v if isinstance(v, EnumResearchDomain) else EnumResearchDomain(v) for v in self.research_domain] + if self._is_empty(self.expected_number_of_participants): self.MissingRequiredField("expected_number_of_participants") if not isinstance(self.expected_number_of_participants, int): @@ -247,37 +300,11 @@ def __post_init__(self, *_: str, **kwargs: Any): if not isinstance(self.actual_number_of_participants, int): self.actual_number_of_participants = int(self.actual_number_of_participants) - if self.parent_study is not None and not isinstance(self.parent_study, StudyStudyId): - self.parent_study = StudyStudyId(self.parent_study) - - if not isinstance(self.funding_source, list): - self.funding_source = [self.funding_source] if self.funding_source is not None else [] - self.funding_source = [v if isinstance(v, str) else str(v) for v in self.funding_source] - - if self.study_short_name is not None and not isinstance(self.study_short_name, str): - self.study_short_name = str(self.study_short_name) - - if self.vbr is not None and not isinstance(self.vbr, VirtualBiorepository): - self.vbr = VirtualBiorepository(**as_dict(self.vbr)) - if self.selection_criteria is not None and not isinstance(self.selection_criteria, str): self.selection_criteria = str(self.selection_criteria) - if self.website is not None and not isinstance(self.website, URI): - self.website = URI(self.website) - - if not isinstance(self.publication, list): - self.publication = [self.publication] if self.publication is not None else [] - self.publication = [v if isinstance(v, Publication) else Publication(**as_dict(v)) for v in self.publication] - - if self.acknowledgments is not None and not isinstance(self.acknowledgments, str): - self.acknowledgments = str(self.acknowledgments) - - if self.citation_statement is not None and not isinstance(self.citation_statement, str): - self.citation_statement = str(self.citation_statement) - - if self.do_id is not None and not isinstance(self.do_id, DOIDoId): - self.do_id = DOIDoId(self.do_id) + if self.vbr is not None and not isinstance(self.vbr, VirtualBiorepository): + self.vbr = VirtualBiorepository(**as_dict(self.vbr)) super().__post_init__(**kwargs) @@ -1212,6 +1239,9 @@ class slots: slots.Study_study_id = Slot(uri=INCLUDEDCC.study_id, name="Study_study_id", curie=INCLUDEDCC.curie('study_id'), model_uri=INCLUDEDCC.Study_study_id, domain=Study, range=Union[str, StudyStudyId]) +slots.StudyMetadata_study_id = Slot(uri=INCLUDEDCC.study_id, name="StudyMetadata_study_id", curie=INCLUDEDCC.curie('study_id'), + model_uri=INCLUDEDCC.StudyMetadata_study_id, domain=StudyMetadata, range=Union[str, StudyMetadataStudyId]) + slots.DOI_do_id = Slot(uri=INCLUDEDCC.do_id, name="DOI_do_id", curie=INCLUDEDCC.curie('do_id'), model_uri=INCLUDEDCC.DOI_do_id, domain=DOI, range=Union[str, DOIDoId]) diff --git a/src/include_access_model/datamodel/include_access_model_pydantic.py b/src/include_access_model/datamodel/include_access_model_pydantic.py index 2026dde..9e9f441 100644 --- a/src/include_access_model/datamodel/include_access_model_pydantic.py +++ b/src/include_access_model/datamodel/include_access_model_pydantic.py @@ -382,33 +382,47 @@ class Study(Record): 'required': True}}, 'title': 'Study'}) - study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) + study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata']} }) parent_study: Optional[str] = Field(default=None, title="Parent Study", description="""The parent study for this study, if it is a nested study.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - funding_source: Optional[list[str]] = Field(default=[], title="Funding Source", description="""The funding source(s) of the study.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - principal_investigator: list[Investigator] = Field(default=..., title="Principal Investigator", description="""The Principal Investigator(s) responsible for the study.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - contact: list[Investigator] = Field(default=..., title="Contact Person", description="""The individual to contact with questions about this record.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'VirtualBiorepository']} }) study_title: str = Field(default=..., description="""Full Study Title""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) study_code: str = Field(default=..., title="Study Code", description="""Unique identifier for the study (generally a short acronym)""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) study_short_name: Optional[str] = Field(default=None, title="Study Code", description="""Short name for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) program: list[EnumProgram] = Field(default=..., title="Program", description="""Funding source(s) for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) + funding_source: Optional[list[str]] = Field(default=[], title="Funding Source", description="""The funding source(s) of the study.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) + principal_investigator: list[Investigator] = Field(default=..., title="Principal Investigator", description="""The Principal Investigator(s) responsible for the study.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) + contact: list[Investigator] = Field(default=..., title="Contact Person", description="""The individual to contact with questions about this record.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'VirtualBiorepository']} }) study_description: str = Field(default=..., title="Study Description", description="""Brief description of the study (2-4 sentences)""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - vbr: Optional[VirtualBiorepository] = Field(default=None, title="Virtual Biorepository", description="""Information about the study's Virtual Biorepository, if participating""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - research_domain: list[EnumResearchDomain] = Field(default=..., description="""Main research domain(s) of the study, other than Down syndrome""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - participant_lifespan_stage: list[EnumParticipantLifespanStage] = Field(default=..., title="Participant Lifespan Stage", description="""Focus age group(s) of the study population""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - selection_criteria: Optional[str] = Field(default=None, title="Selection Criteria", description="""Brief description of inclusion and/or exclusion criteria for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - study_design: list[EnumStudyDesign] = Field(default=..., title="Study Design", description="""Overall design of study, including whether it is longitudinal and whether family members/unrelated controls are also enrolled""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - clinical_data_source_type: list[EnumClinicalDataSourceType] = Field(default=..., title="Clinical Data Source Type", description="""Source(s) of data collected from study participants""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - data_category: list[EnumDataCategory] = Field(default=..., title="Data Category", description="""General category of data in this Record (e.g. Clinical, Genomics, etc)""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) website: Optional[str] = Field(default=None, title="Website", description="""Website for the Record.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'VirtualBiorepository', 'Publication']} }) publication: Optional[list[Publication]] = Field(default=[], title="Publication", description="""Publications associated with this Record.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - expected_number_of_participants: int = Field(default=..., title="Expected Number of Participants", description="""Total expected number of participants to be recruited.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) - actual_number_of_participants: int = Field(default=..., title="Actual Number of Participants", description="""Total participants included at this time.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) acknowledgments: Optional[str] = Field(default=None, title="Acknowledgments", description="""Funding statement and acknowledgments for this study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) citation_statement: Optional[str] = Field(default=None, title="Citation Statement", description="""Statement that secondary data users should use to acknowledge use of this study or dataset. E.g., \"The results analyzed and here are based in whole or in part upon data generated by the INCLUDE (INvestigation of Co-occurring conditions across the Lifespan to Understand Down syndromE) Project , and were accessed from the INCLUDE Data Hub and .\"""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) do_id: Optional[str] = Field(default=None, title="DOI", description="""Digital Object Identifier (DOI) for this Record.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'DOI']} }) external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) +class StudyMetadata(Record): + """ + Additional features about studies that may not apply to all studies + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', + 'slot_usage': {'study_id': {'identifier': True, + 'name': 'study_id', + 'required': True}}, + 'title': 'Study Metadata'}) + + study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata']} }) + participant_lifespan_stage: list[EnumParticipantLifespanStage] = Field(default=..., title="Participant Lifespan Stage", description="""Focus age group(s) of the study population""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + selection_criteria: Optional[str] = Field(default=None, title="Selection Criteria", description="""Brief description of inclusion and/or exclusion criteria for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + study_design: list[EnumStudyDesign] = Field(default=..., title="Study Design", description="""Overall design of study, including whether it is longitudinal and whether family members/unrelated controls are also enrolled""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + clinical_data_source_type: list[EnumClinicalDataSourceType] = Field(default=..., title="Clinical Data Source Type", description="""Source(s) of data collected from study participants""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + data_category: list[EnumDataCategory] = Field(default=..., title="Data Category", description="""General category of data in this Record (e.g. Clinical, Genomics, etc)""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + vbr: Optional[VirtualBiorepository] = Field(default=None, title="Virtual Biorepository", description="""Information about the study's Virtual Biorepository, if participating""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + research_domain: list[EnumResearchDomain] = Field(default=..., description="""Main research domain(s) of the study, other than Down syndrome""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + expected_number_of_participants: int = Field(default=..., title="Expected Number of Participants", description="""Total expected number of participants to be recruited.""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + actual_number_of_participants: int = Field(default=..., title="Actual Number of Participants", description="""Total participants included at this time.""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) + external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) + + class VirtualBiorepository(Record): """ An organization that can provide access to specimen for further analysis. @@ -549,6 +563,7 @@ class Concept(ConfiguredBaseModel): # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model Record.model_rebuild() Study.model_rebuild() +StudyMetadata.model_rebuild() VirtualBiorepository.model_rebuild() DOI.model_rebuild() Investigator.model_rebuild() diff --git a/src/include_access_model/schema/include_access_model.yaml b/src/include_access_model/schema/include_access_model.yaml index de9b5fe..7ab6521 100644 --- a/src/include_access_model/schema/include_access_model.yaml +++ b/src/include_access_model/schema/include_access_model.yaml @@ -48,35 +48,45 @@ classes: #TODO: Split out core Study items and additional study metadata? - study_id - parent_study - - funding_source - - principal_investigator - - contact - study_title - study_code - study_short_name - program + - funding_source + - principal_investigator + - contact - study_description - - vbr - - research_domain + - website +# - dbgap : Should we call this out specifically or just use an external id? + - publication + - acknowledgments + - citation_statement + - do_id + slot_usage: + study_id: + range: string + required: true + identifier: true + StudyMetadata: + title: Study Metadata + description: Additional features about studies that may not apply to all studies + is_a: Record + slots: + - study_id - participant_lifespan_stage - selection_criteria - study_design - clinical_data_source_type - data_category - - website -# - dbgap : Should we call this out specifically or just use an external id? - - publication + - vbr + - research_domain - expected_number_of_participants - actual_number_of_participants # Do we need this info, or should it be documented elsewhere? # - guidType # - guidMapped - - acknowledgments - - citation_statement - - do_id slot_usage: study_id: - range: string required: true identifier: true VirtualBiorepository: @@ -165,11 +175,8 @@ classes: - age_at_event - age_at_resolution - concept -# Moved the displays into the concept -# - display - concept_source - value_concept -# - value_display - value_number - value_source - value_units From 8b7f7cafe0dee8a4462f4c4b1382e1a3d4e5e1f4 Mon Sep 17 00:00:00 2001 From: Robert Carroll Date: Mon, 23 Feb 2026 16:50:15 -0600 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=8D=B1=20Add=20Biospecimen=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This includes a baseline 3-tier biospecimen model. --- .../datamodel/include_access_model.py | 291 +++++++++++++++++- .../include_access_model_pydantic.py | 122 +++++++- .../schema/include_access_model.yaml | 174 ++++++++++- 3 files changed, 569 insertions(+), 18 deletions(-) diff --git a/src/include_access_model/datamodel/include_access_model.py b/src/include_access_model/datamodel/include_access_model.py index 8d8a963..8ff9fce 100644 --- a/src/include_access_model/datamodel/include_access_model.py +++ b/src/include_access_model/datamodel/include_access_model.py @@ -1,5 +1,5 @@ # Auto generated from include_access_model.yaml by pythongen.py version: 0.0.1 -# Generation date: 2026-02-23T12:29:43 +# Generation date: 2026-02-23T16:48:29 # Schema: include-access-model # # id: https://includedcc.org/include-access-model @@ -113,6 +113,18 @@ class ConceptConceptCurie(URIorCURIE): pass +class SampleSampleId(extended_str): + pass + + +class BiospecimenCollectionBiospecimenCollectionId(extended_str): + pass + + +class AliquotAliquotId(extended_str): + pass + + @dataclass(repr=False) class Record(YAMLRoot): """ @@ -551,8 +563,8 @@ class SubjectAssertion(Record): value_concept: Optional[Union[Union[str, ConceptConceptCurie], list[Union[str, ConceptConceptCurie]]]] = empty_list() value_number: Optional[float] = None value_source: Optional[str] = None - value_units: Optional[Union[str, ConceptConceptCurie]] = None - value_units_source: Optional[str] = None + value_unit: Optional[Union[str, ConceptConceptCurie]] = None + value_unit_source: Optional[str] = None def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.assertion_id): @@ -592,11 +604,11 @@ def __post_init__(self, *_: str, **kwargs: Any): if self.value_source is not None and not isinstance(self.value_source, str): self.value_source = str(self.value_source) - if self.value_units is not None and not isinstance(self.value_units, ConceptConceptCurie): - self.value_units = ConceptConceptCurie(self.value_units) + if self.value_unit is not None and not isinstance(self.value_unit, ConceptConceptCurie): + self.value_unit = ConceptConceptCurie(self.value_unit) - if self.value_units_source is not None and not isinstance(self.value_units_source, str): - self.value_units_source = str(self.value_units_source) + if self.value_unit_source is not None and not isinstance(self.value_unit_source, str): + self.value_unit_source = str(self.value_unit_source) super().__post_init__(**kwargs) @@ -628,6 +640,143 @@ def __post_init__(self, *_: str, **kwargs: Any): super().__post_init__(**kwargs) +@dataclass(repr=False) +class Sample(Record): + """ + A functionally equivalent specimen taken from a participant or processed from such a sample. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = INCLUDEDCC["Sample"] + class_class_curie: ClassVar[str] = "includedcc:Sample" + class_name: ClassVar[str] = "Sample" + class_model_uri: ClassVar[URIRef] = INCLUDEDCC.Sample + + sample_id: Union[str, SampleSampleId] = None + sample_type: Union[str, URIorCURIE] = None + biospecimen_collection_id: Optional[Union[str, BiospecimenCollectionBiospecimenCollectionId]] = None + parent_sample_id: Optional[Union[str, SampleSampleId]] = None + processing: Optional[Union[Union[str, URIorCURIE], list[Union[str, URIorCURIE]]]] = empty_list() + availablity_status: Optional[Union[str, "EnumAvailabilityStatus"]] = None + storage_method: Optional[Union[Union[str, URIorCURIE], list[Union[str, URIorCURIE]]]] = empty_list() + quantity_number: Optional[float] = None + quantity_unit: Optional[Union[str, ConceptConceptCurie]] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.sample_id): + self.MissingRequiredField("sample_id") + if not isinstance(self.sample_id, SampleSampleId): + self.sample_id = SampleSampleId(self.sample_id) + + if self._is_empty(self.sample_type): + self.MissingRequiredField("sample_type") + if not isinstance(self.sample_type, URIorCURIE): + self.sample_type = URIorCURIE(self.sample_type) + + if self.biospecimen_collection_id is not None and not isinstance(self.biospecimen_collection_id, BiospecimenCollectionBiospecimenCollectionId): + self.biospecimen_collection_id = BiospecimenCollectionBiospecimenCollectionId(self.biospecimen_collection_id) + + if self.parent_sample_id is not None and not isinstance(self.parent_sample_id, SampleSampleId): + self.parent_sample_id = SampleSampleId(self.parent_sample_id) + + if not isinstance(self.processing, list): + self.processing = [self.processing] if self.processing is not None else [] + self.processing = [v if isinstance(v, URIorCURIE) else URIorCURIE(v) for v in self.processing] + + if self.availablity_status is not None and not isinstance(self.availablity_status, EnumAvailabilityStatus): + self.availablity_status = EnumAvailabilityStatus(self.availablity_status) + + if not isinstance(self.storage_method, list): + self.storage_method = [self.storage_method] if self.storage_method is not None else [] + self.storage_method = [v if isinstance(v, URIorCURIE) else URIorCURIE(v) for v in self.storage_method] + + if self.quantity_number is not None and not isinstance(self.quantity_number, float): + self.quantity_number = float(self.quantity_number) + + if self.quantity_unit is not None and not isinstance(self.quantity_unit, ConceptConceptCurie): + self.quantity_unit = ConceptConceptCurie(self.quantity_unit) + + super().__post_init__(**kwargs) + + +@dataclass(repr=False) +class BiospecimenCollection(Record): + """ + A biospecimen collection event which yields one or more Samples. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = INCLUDEDCC["BiospecimenCollection"] + class_class_curie: ClassVar[str] = "includedcc:BiospecimenCollection" + class_name: ClassVar[str] = "BiospecimenCollection" + class_model_uri: ClassVar[URIRef] = INCLUDEDCC.BiospecimenCollection + + biospecimen_collection_id: Union[str, BiospecimenCollectionBiospecimenCollectionId] = None + age_at_collection: Optional[float] = None + method: Optional[Union[str, "EnumSampleCollectionMethod"]] = None + site: Optional[Union[str, "EnumSite"]] = None + spatial_qualifier: Optional[Union[str, "EnumSpatialQualifiers"]] = None + laterality: Optional[Union[str, "EnumLaterality"]] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.biospecimen_collection_id): + self.MissingRequiredField("biospecimen_collection_id") + if not isinstance(self.biospecimen_collection_id, BiospecimenCollectionBiospecimenCollectionId): + self.biospecimen_collection_id = BiospecimenCollectionBiospecimenCollectionId(self.biospecimen_collection_id) + + if self.age_at_collection is not None and not isinstance(self.age_at_collection, float): + self.age_at_collection = float(self.age_at_collection) + + super().__post_init__(**kwargs) + + +@dataclass(repr=False) +class Aliquot(Record): + """ + A specific tube or amount of a biospecimen associated with a Sample. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = INCLUDEDCC["Aliquot"] + class_class_curie: ClassVar[str] = "includedcc:Aliquot" + class_name: ClassVar[str] = "Aliquot" + class_model_uri: ClassVar[URIRef] = INCLUDEDCC.Aliquot + + aliquot_id: Union[str, AliquotAliquotId] = None + sample_id: Optional[Union[str, SampleSampleId]] = None + availablity_status: Optional[Union[str, "EnumAvailabilityStatus"]] = None + quantity_number: Optional[float] = None + quantity_unit: Optional[Union[str, ConceptConceptCurie]] = None + concentration_number: Optional[float] = None + concentration_unit: Optional[Union[str, ConceptConceptCurie]] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.aliquot_id): + self.MissingRequiredField("aliquot_id") + if not isinstance(self.aliquot_id, AliquotAliquotId): + self.aliquot_id = AliquotAliquotId(self.aliquot_id) + + if self.sample_id is not None and not isinstance(self.sample_id, SampleSampleId): + self.sample_id = SampleSampleId(self.sample_id) + + if self.availablity_status is not None and not isinstance(self.availablity_status, EnumAvailabilityStatus): + self.availablity_status = EnumAvailabilityStatus(self.availablity_status) + + if self.quantity_number is not None and not isinstance(self.quantity_number, float): + self.quantity_number = float(self.quantity_number) + + if self.quantity_unit is not None and not isinstance(self.quantity_unit, ConceptConceptCurie): + self.quantity_unit = ConceptConceptCurie(self.quantity_unit) + + if self.concentration_number is not None and not isinstance(self.concentration_number, float): + self.concentration_number = float(self.concentration_number) + + if self.concentration_unit is not None and not isinstance(self.concentration_unit, ConceptConceptCurie): + self.concentration_unit = ConceptConceptCurie(self.concentration_unit) + + super().__post_init__(**kwargs) + + # Enumerations class EnumProgram(EnumDefinitionImpl): """ @@ -1067,6 +1216,63 @@ class EnumAssertionProvenance(EnumDefinitionImpl): description="Possible data sources for assertions.", ) +class EnumAvailabilityStatus(EnumDefinitionImpl): + """ + Is the Thing available for use? + """ + available = PermissibleValue( + text="available", + title="Available", + description="Biospecimen is Available", + meaning=IG2_BIOSPECIMEN_AVAILABILITY["available"]) + unavailable = PermissibleValue( + text="unavailable", + title="Unavailable", + description="Biospecimen is Unavailable", + meaning=IG2_BIOSPECIMEN_AVAILABILITY["unavailable"]) + + _defn = EnumDefinition( + name="EnumAvailabilityStatus", + description="Is the Thing available for use?", + ) + +class EnumSampleCollectionMethod(EnumDefinitionImpl): + """ + The approach used to collect the biospecimen. Recommend ontology: [LOINC](https://loinc.org). + """ + _defn = EnumDefinition( + name="EnumSampleCollectionMethod", + description="""The approach used to collect the biospecimen. Recommend ontology: [LOINC](https://loinc.org).""", + ) + +class EnumSite(EnumDefinitionImpl): + """ + The location of the specimen collection. Recommended ontology: [SNOMED Body + Site](https://hl7.org/fhir/R4B/valueset-body-site.html) + """ + _defn = EnumDefinition( + name="EnumSite", + description="""The location of the specimen collection. Recommended ontology: [SNOMED Body Site](https://hl7.org/fhir/R4B/valueset-body-site.html)""", + ) + +class EnumSpatialQualifiers(EnumDefinitionImpl): + """ + Any spatial/location qualifiers. + """ + _defn = EnumDefinition( + name="EnumSpatialQualifiers", + description="""Any spatial/location qualifiers.""", + ) + +class EnumLaterality(EnumDefinitionImpl): + """ + Laterality information for the site + """ + _defn = EnumDefinition( + name="EnumLaterality", + description="Laterality information for the site", + ) + # Slots class slots: pass @@ -1230,11 +1436,62 @@ class slots: slots.value_source = Slot(uri=INCLUDEDCC.value_source, name="value_source", curie=INCLUDEDCC.curie('value_source'), model_uri=INCLUDEDCC.value_source, domain=None, range=Optional[str]) -slots.value_units = Slot(uri=INCLUDEDCC.value_units, name="value_units", curie=INCLUDEDCC.curie('value_units'), - model_uri=INCLUDEDCC.value_units, domain=None, range=Optional[Union[str, ConceptConceptCurie]]) +slots.value_unit = Slot(uri=INCLUDEDCC.value_unit, name="value_unit", curie=INCLUDEDCC.curie('value_unit'), + model_uri=INCLUDEDCC.value_unit, domain=None, range=Optional[Union[str, ConceptConceptCurie]]) + +slots.value_unit_source = Slot(uri=INCLUDEDCC.value_unit_source, name="value_unit_source", curie=INCLUDEDCC.curie('value_unit_source'), + model_uri=INCLUDEDCC.value_unit_source, domain=None, range=Optional[str]) + +slots.sample_id = Slot(uri=INCLUDEDCC.sample_id, name="sample_id", curie=INCLUDEDCC.curie('sample_id'), + model_uri=INCLUDEDCC.sample_id, domain=None, range=Optional[Union[str, SampleSampleId]]) + +slots.parent_sample_id = Slot(uri=INCLUDEDCC.parent_sample_id, name="parent_sample_id", curie=INCLUDEDCC.curie('parent_sample_id'), + model_uri=INCLUDEDCC.parent_sample_id, domain=None, range=Optional[Union[str, SampleSampleId]]) + +slots.biospecimen_collection_id = Slot(uri=INCLUDEDCC.biospecimen_collection_id, name="biospecimen_collection_id", curie=INCLUDEDCC.curie('biospecimen_collection_id'), + model_uri=INCLUDEDCC.biospecimen_collection_id, domain=None, range=Optional[Union[str, BiospecimenCollectionBiospecimenCollectionId]]) + +slots.aliquot_id = Slot(uri=INCLUDEDCC.aliquot_id, name="aliquot_id", curie=INCLUDEDCC.curie('aliquot_id'), + model_uri=INCLUDEDCC.aliquot_id, domain=None, range=Optional[Union[str, AliquotAliquotId]]) + +slots.sample_type = Slot(uri=INCLUDEDCC.sample_type, name="sample_type", curie=INCLUDEDCC.curie('sample_type'), + model_uri=INCLUDEDCC.sample_type, domain=None, range=Union[str, URIorCURIE]) + +slots.processing = Slot(uri=INCLUDEDCC.processing, name="processing", curie=INCLUDEDCC.curie('processing'), + model_uri=INCLUDEDCC.processing, domain=None, range=Optional[Union[Union[str, URIorCURIE], list[Union[str, URIorCURIE]]]]) + +slots.availablity_status = Slot(uri=INCLUDEDCC.availablity_status, name="availablity_status", curie=INCLUDEDCC.curie('availablity_status'), + model_uri=INCLUDEDCC.availablity_status, domain=None, range=Optional[Union[str, "EnumAvailabilityStatus"]]) -slots.value_units_source = Slot(uri=INCLUDEDCC.value_units_source, name="value_units_source", curie=INCLUDEDCC.curie('value_units_source'), - model_uri=INCLUDEDCC.value_units_source, domain=None, range=Optional[str]) +slots.storage_method = Slot(uri=INCLUDEDCC.storage_method, name="storage_method", curie=INCLUDEDCC.curie('storage_method'), + model_uri=INCLUDEDCC.storage_method, domain=None, range=Optional[Union[Union[str, URIorCURIE], list[Union[str, URIorCURIE]]]]) + +slots.quantity_number = Slot(uri=INCLUDEDCC.quantity_number, name="quantity_number", curie=INCLUDEDCC.curie('quantity_number'), + model_uri=INCLUDEDCC.quantity_number, domain=None, range=Optional[float]) + +slots.quantity_unit = Slot(uri=INCLUDEDCC.quantity_unit, name="quantity_unit", curie=INCLUDEDCC.curie('quantity_unit'), + model_uri=INCLUDEDCC.quantity_unit, domain=None, range=Optional[Union[str, ConceptConceptCurie]]) + +slots.concentration_number = Slot(uri=INCLUDEDCC.concentration_number, name="concentration_number", curie=INCLUDEDCC.curie('concentration_number'), + model_uri=INCLUDEDCC.concentration_number, domain=None, range=Optional[float]) + +slots.concentration_unit = Slot(uri=INCLUDEDCC.concentration_unit, name="concentration_unit", curie=INCLUDEDCC.curie('concentration_unit'), + model_uri=INCLUDEDCC.concentration_unit, domain=None, range=Optional[Union[str, ConceptConceptCurie]]) + +slots.age_at_collection = Slot(uri=INCLUDEDCC.age_at_collection, name="age_at_collection", curie=INCLUDEDCC.curie('age_at_collection'), + model_uri=INCLUDEDCC.age_at_collection, domain=None, range=Optional[float]) + +slots.method = Slot(uri=INCLUDEDCC.method, name="method", curie=INCLUDEDCC.curie('method'), + model_uri=INCLUDEDCC.method, domain=None, range=Optional[Union[str, "EnumSampleCollectionMethod"]]) + +slots.site = Slot(uri=INCLUDEDCC.site, name="site", curie=INCLUDEDCC.curie('site'), + model_uri=INCLUDEDCC.site, domain=None, range=Optional[Union[str, "EnumSite"]]) + +slots.spatial_qualifier = Slot(uri=INCLUDEDCC.spatial_qualifier, name="spatial_qualifier", curie=INCLUDEDCC.curie('spatial_qualifier'), + model_uri=INCLUDEDCC.spatial_qualifier, domain=None, range=Optional[Union[str, "EnumSpatialQualifiers"]]) + +slots.laterality = Slot(uri=INCLUDEDCC.laterality, name="laterality", curie=INCLUDEDCC.curie('laterality'), + model_uri=INCLUDEDCC.laterality, domain=None, range=Optional[Union[str, "EnumLaterality"]]) slots.Study_study_id = Slot(uri=INCLUDEDCC.study_id, name="Study_study_id", curie=INCLUDEDCC.curie('study_id'), model_uri=INCLUDEDCC.Study_study_id, domain=Study, range=Union[str, StudyStudyId]) @@ -1256,3 +1513,15 @@ class slots: slots.Concept_concept_curie = Slot(uri=INCLUDEDCC.concept_curie, name="Concept_concept_curie", curie=INCLUDEDCC.curie('concept_curie'), model_uri=INCLUDEDCC.Concept_concept_curie, domain=Concept, range=Union[str, ConceptConceptCurie]) + +slots.Sample_sample_id = Slot(uri=INCLUDEDCC.sample_id, name="Sample_sample_id", curie=INCLUDEDCC.curie('sample_id'), + model_uri=INCLUDEDCC.Sample_sample_id, domain=Sample, range=Union[str, SampleSampleId]) + +slots.Sample_biospecimen_collection_id = Slot(uri=INCLUDEDCC.biospecimen_collection_id, name="Sample_biospecimen_collection_id", curie=INCLUDEDCC.curie('biospecimen_collection_id'), + model_uri=INCLUDEDCC.Sample_biospecimen_collection_id, domain=Sample, range=Optional[Union[str, BiospecimenCollectionBiospecimenCollectionId]]) + +slots.BiospecimenCollection_biospecimen_collection_id = Slot(uri=INCLUDEDCC.biospecimen_collection_id, name="BiospecimenCollection_biospecimen_collection_id", curie=INCLUDEDCC.curie('biospecimen_collection_id'), + model_uri=INCLUDEDCC.BiospecimenCollection_biospecimen_collection_id, domain=BiospecimenCollection, range=Union[str, BiospecimenCollectionBiospecimenCollectionId]) + +slots.Aliquot_aliquot_id = Slot(uri=INCLUDEDCC.aliquot_id, name="Aliquot_aliquot_id", curie=INCLUDEDCC.curie('aliquot_id'), + model_uri=INCLUDEDCC.Aliquot_aliquot_id, domain=Aliquot, range=Union[str, AliquotAliquotId]) diff --git a/src/include_access_model/datamodel/include_access_model_pydantic.py b/src/include_access_model/datamodel/include_access_model_pydantic.py index 9e9f441..55f7713 100644 --- a/src/include_access_model/datamodel/include_access_model_pydantic.py +++ b/src/include_access_model/datamodel/include_access_model_pydantic.py @@ -359,6 +359,51 @@ class EnumAssertionProvenance(str, Enum): """ +class EnumAvailabilityStatus(str, Enum): + """ + Is the Thing available for use? + """ + Available = "available" + """ + Biospecimen is Available + """ + Unavailable = "unavailable" + """ + Biospecimen is Unavailable + """ + + +class EnumSampleCollectionMethod(str): + """ + The approach used to collect the biospecimen. Recommend ontology: [LOINC](https://loinc.org). + + """ + pass + + +class EnumSite(str): + """ + The location of the specimen collection. Recommended ontology: [SNOMED Body Site](https://hl7.org/fhir/R4B/valueset-body-site.html) + + """ + pass + + +class EnumSpatialQualifiers(str): + """ + Any spatial/location qualifiers. + + """ + pass + + +class EnumLaterality(str): + """ + Laterality information for the site + """ + pass + + class Record(ConfiguredBaseModel): """ @@ -540,8 +585,8 @@ class SubjectAssertion(Record): value_concept: Optional[list[str]] = Field(default=[], title="Value concept", description="""The structured term defining the value of the assertion.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) value_number: Optional[float] = Field(default=None, title="Value Number", description="""The numeric value of the assertion.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) value_source: Optional[str] = Field(default=None, title="Value Source Text", description="""The source text yielding the value.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) - value_units: Optional[str] = Field(default=None, title="Value Units", description="""The structured term defining the units of the value.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) - value_units_source: Optional[str] = Field(default=None, title="Value Units Source Text", description="""The source text yielding the value's units.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) + value_unit: Optional[str] = Field(default=None, title="Value Units", description="""The structured term defining the units of the value.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) + value_unit_source: Optional[str] = Field(default=None, title="Value Units Source Text", description="""The source text yielding the value's units.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) @@ -559,6 +604,76 @@ class Concept(ConfiguredBaseModel): display: Optional[str] = Field(default=None, title="Display String", description="""The friendly display string of the coded term.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Concept']} }) +class Sample(Record): + """ + A functionally equivalent specimen taken from a participant or processed from such a sample. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', + 'slot_usage': {'biospecimen_collection_id': {'description': 'Biospecimen ' + 'Collection ' + 'during which ' + 'this sample was ' + 'generated.', + 'name': 'biospecimen_collection_id'}, + 'sample_id': {'identifier': True, + 'name': 'sample_id', + 'range': 'string', + 'required': True}}, + 'title': 'Sample'}) + + sample_id: str = Field(default=..., title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + biospecimen_collection_id: Optional[str] = Field(default=None, title="Biospecimen Collection ID", description="""Biospecimen Collection during which this sample was generated.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'BiospecimenCollection']} }) + parent_sample_id: Optional[str] = Field(default=None, title="Parent Sample ID", description="""Sample from which this sample is derived""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample']} }) + sample_type: str = Field(default=..., title="Sample Type", description="""Type of material of which this Sample is comprised""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample']} }) + processing: Optional[list[str]] = Field(default=[], title="Sample Processing", description="""Processing that was applied to the Parent Sample or from the Biospecimen Collection that yielded this distinct sample""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample']} }) + availablity_status: Optional[EnumAvailabilityStatus] = Field(default=None, title="Sample Availability", description="""Can this Sample be requested for further analysis?""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + storage_method: Optional[list[str]] = Field(default=[], title="Sample Storage Method", description="""Sample storage method, eg, Frozen or with additives""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample']} }) + quantity_number: Optional[float] = Field(default=None, title="Quantity", description="""The total quantity of the specimen""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + quantity_unit: Optional[str] = Field(default=None, title="Quantity Units", description="""The structured term defining the units of the quantity.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) + + +class BiospecimenCollection(Record): + """ + A biospecimen collection event which yields one or more Samples. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', + 'slot_usage': {'biospecimen_collection_id': {'identifier': True, + 'name': 'biospecimen_collection_id', + 'range': 'string', + 'required': True}}, + 'title': 'BiospecimenCollection'}) + + biospecimen_collection_id: str = Field(default=..., title="Biospecimen Collection ID", description="""Unique identifier for this Biospecimen Collection.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'BiospecimenCollection']} }) + age_at_collection: Optional[float] = Field(default=None, title="Age at Biospecimen Collection", description="""The age at which this biospecimen was collected in decimal years.""", json_schema_extra = { "linkml_meta": {'domain_of': ['BiospecimenCollection'], 'unit': {'ucum_code': 'a'}} }) + method: Optional[EnumSampleCollectionMethod] = Field(default=None, title="Biospecimen Collection Method", description="""The approach used to collect the biospecimen.""", json_schema_extra = { "linkml_meta": {'domain_of': ['BiospecimenCollection']} }) + site: Optional[EnumSite] = Field(default=None, title="Biospecimen Collection Site", description="""The location of the specimen collection.""", json_schema_extra = { "linkml_meta": {'domain_of': ['BiospecimenCollection']} }) + spatial_qualifier: Optional[EnumSpatialQualifiers] = Field(default=None, title="Spatial Qualifier", description="""Qualifier that further refine the specific location of biospecimen collection""", json_schema_extra = { "linkml_meta": {'domain_of': ['BiospecimenCollection']} }) + laterality: Optional[EnumLaterality] = Field(default=None, title="Location Laterality", description="""Laterality that further refine the specific location of biospecimen collection""", json_schema_extra = { "linkml_meta": {'domain_of': ['BiospecimenCollection']} }) + external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) + + +class Aliquot(Record): + """ + A specific tube or amount of a biospecimen associated with a Sample. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', + 'slot_usage': {'aliquot_id': {'identifier': True, + 'name': 'aliquot_id', + 'range': 'string', + 'required': True}}, + 'title': 'Aliquot'}) + + aliquot_id: str = Field(default=..., title="Aliquot ID", description="""Unique identifier for an Aliquot.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Aliquot']} }) + sample_id: Optional[str] = Field(default=None, title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + availablity_status: Optional[EnumAvailabilityStatus] = Field(default=None, title="Sample Availability", description="""Can this Sample be requested for further analysis?""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + quantity_number: Optional[float] = Field(default=None, title="Quantity", description="""The total quantity of the specimen""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + quantity_unit: Optional[str] = Field(default=None, title="Quantity Units", description="""The structured term defining the units of the quantity.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) + concentration_number: Optional[float] = Field(default=None, title="Concentration", description="""What is the concentration of the analyte in the Aliquot?""", json_schema_extra = { "linkml_meta": {'domain_of': ['Aliquot']} }) + concentration_unit: Optional[str] = Field(default=None, title="Concentration Units", description="""Units associated with the concentration of the analyte in the Aliquot.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Aliquot']} }) + external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) + + # Model rebuild # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model Record.model_rebuild() @@ -572,3 +687,6 @@ class Concept(ConfiguredBaseModel): Demographics.model_rebuild() SubjectAssertion.model_rebuild() Concept.model_rebuild() +Sample.model_rebuild() +BiospecimenCollection.model_rebuild() +Aliquot.model_rebuild() diff --git a/src/include_access_model/schema/include_access_model.yaml b/src/include_access_model/schema/include_access_model.yaml index 7ab6521..6d56adf 100644 --- a/src/include_access_model/schema/include_access_model.yaml +++ b/src/include_access_model/schema/include_access_model.yaml @@ -179,8 +179,8 @@ classes: - value_concept - value_number - value_source - - value_units - - value_units_source + - value_unit + - value_unit_source slot_usage: assertion_id: range: string @@ -196,6 +196,60 @@ classes: concept_curie: required: true identifier: true + Sample: + title: Sample + description: A functionally equivalent specimen taken from a participant or processed from such a sample. + is_a: Record + slots: + - sample_id + - biospecimen_collection_id + - parent_sample_id + - sample_type + - processing + - availablity_status + - storage_method + - quantity_number + - quantity_unit + slot_usage: + sample_id: + range: string + required: true + identifier: true + biospecimen_collection_id: + description: Biospecimen Collection during which this sample was generated. + BiospecimenCollection: + title: BiospecimenCollection + description: A biospecimen collection event which yields one or more Samples. + is_a: Record + slots: + - biospecimen_collection_id + - age_at_collection + - method + - site + - spatial_qualifier + - laterality + slot_usage: + biospecimen_collection_id: + range: string + required: true + identifier: true + Aliquot: + title: Aliquot + description: A specific tube or amount of a biospecimen associated with a Sample. + is_a: Record + slots: + - aliquot_id + - sample_id + - availablity_status + - quantity_number + - quantity_unit + - concentration_number + - concentration_unit + slot_usage: + aliquot_id: + range: string + required: true + identifier: true slots: study_id: @@ -476,15 +530,88 @@ slots: title: Value Source Text description: The source text yielding the value. range: string - value_units: + value_unit: title: Value Units description: The structured term defining the units of the value. range: Concept - value_units_source: + value_unit_source: title: Value Units Source Text description: The source text yielding the value's units. range: string - + sample_id: + title: Sample ID + description: The unique identifier for this Sample. + range: Sample + parent_sample_id: + title: Parent Sample ID + description: Sample from which this sample is derived + range: Sample + inlined: false + biospecimen_collection_id: + title: Biospecimen Collection ID + description: Unique identifier for this Biospecimen Collection. + range: BiospecimenCollection + aliquot_id: + title: Aliquot ID + description: Unique identifier for an Aliquot. + range: Aliquot + sample_type: + title: Sample Type + description: Type of material of which this Sample is comprised + required: true + range: uriorcurie + processing: + title: Sample Processing + description: Processing that was applied to the Parent Sample or from the Biospecimen Collection that yielded this distinct sample + range: uriorcurie + multivalued: true + availablity_status: + title: Sample Availability + description: Can this Sample be requested for further analysis? + range: EnumAvailabilityStatus + storage_method: + title: Sample Storage Method + description: Sample storage method, eg, Frozen or with additives + range: uriorcurie + multivalued: true + quantity_number: + title: Quantity + description: The total quantity of the specimen + range: float + quantity_unit: + title: Quantity Units + description: The structured term defining the units of the quantity. + range: Concept + concentration_number: + title: Concentration + description: What is the concentration of the analyte in the Aliquot? + range: float + concentration_unit: + title: Concentration Units + description: Units associated with the concentration of the analyte in the Aliquot. + range: Concept + age_at_collection: + title: Age at Biospecimen Collection + description: The age at which this biospecimen was collected in decimal years. + range: float + unit: + ucum_code: a + method: + title: Biospecimen Collection Method + description: The approach used to collect the biospecimen. + range: EnumSampleCollectionMethod + site: + title: Biospecimen Collection Site + description: The location of the specimen collection. + range: EnumSite + spatial_qualifier: + title: Spatial Qualifier + description: Qualifier that further refine the specific location of biospecimen collection + range: EnumSpatialQualifiers + laterality: + title: Location Laterality + description: Laterality that further refine the specific location of biospecimen collection + range: EnumLaterality enums: EnumProgram: @@ -764,3 +891,40 @@ enums: other: title: Other description: Data obtained from other source, such as tissue bank +#TODO: Revisit these bindings / enumerations + EnumAvailabilityStatus: + description: Is the Thing available for use? + permissible_values: + available: + title: Available + meaning: ig2_biospecimen_availability:available + description: Biospecimen is Available + unavailable: + title: Unavailable + meaning: ig2_biospecimen_availability:unavailable + description: Biospecimen is Unavailable + EnumSampleCollectionMethod: + description: | + The approach used to collect the biospecimen. Recommend ontology: [LOINC](https://loinc.org). + EnumSite: + description: | + The location of the specimen collection. Recommended ontology: [SNOMED Body Site](https://hl7.org/fhir/R4B/valueset-body-site.html) + EnumSpatialQualifiers: + description: | + Any spatial/location qualifiers. + enum_uri: http://hl7.org/fhir/us/mcode/ValueSet/mcode-body-location-qualifier-vs + reachable_from: + source_ontology: bioregistry:snomedct + source_nodes: + - snomedct:106233006 + - snomedct:272424004 + - snomedct:51440002 + - snomedct:399488007 + - snomedct:24028007 + - snomedct:7771000 + is_direct: false + relationship_types: + - rdfs:subClassOf + EnumLaterality: + description: | + Laterality information for the site \ No newline at end of file