diff --git a/.gitignore b/.gitignore index b7a512c..c7bda76 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,6 @@ dist/ # Coverage .coverage + +# project +output.log diff --git a/README.md b/README.md index 4e08fe6..9852cca 100644 --- a/README.md +++ b/README.md @@ -134,14 +134,15 @@ scop3p --accession O95755 --no-cache # Set custom cache TTL (in seconds) scop3p --accession O95755 --cache-ttl 600 -# Include structures and peptides in stdout JSON (no saved files) -scop3p --accession O95755 --include-structures --include-peptides +# Include structures, peptides, and mutations in stdout JSON (no saved files) +scop3p --accession O95755 --include-structures --include-peptides --include-mutations # Save multiple outputs in one invocation (TARGET:FORMAT:PATH) scop3p --accession O95755 \ --save modifications:tsv:modifications.tsv \ --save structures:tsv:structures.tsv \ - --save peptides:json:peptides.json + --save peptides:json:peptides.json \ + --save mutations:tsv:mutations.tsv # Save one output file (same --save syntax) scop3p --accession O95755 --save modifications:json:results.json @@ -156,8 +157,9 @@ PYTHONPATH=./src python -m scop3p_api_client phospho --accession O95755 **Important Notes:** - **Structures TSV Format**: The structures data is nested in the JSON response (each structure contains a `structureModificationsList`). When exporting to TSV, the data is automatically flattened - one row per modification with structure-level fields (pdbId, resolution, etc.) repeated for each modification. -- **Automatic endpoint selection**: requesting `structures` or `peptides` via `--save` automatically fetches those datasets. -- **Stdout enrichment**: use `--include-structures` and/or `--include-peptides` to include them in stdout JSON when not saving files. +- **Mutations columns and sort**: mutations tabular output uses `position`, `pdbIds`, `referenceAA`, `altAA`, `type`, `disease` and sorts rows by `position`, then `referenceAA`, then `altAA`, then `type`. +- **Automatic endpoint selection**: requesting `structures`, `peptides`, or `mutations` via `--save` automatically fetches those datasets. +- **Stdout enrichment**: use `--include-structures`, `--include-peptides`, and/or `--include-mutations` to include them in stdout JSON when not saving files. - **Dataset JSON saves**: `--save TARGET:json:PATH` writes the normalized dataset payload for that target only (not the full `apiResult + metadata` envelope). **CLI Arguments:** @@ -166,8 +168,9 @@ PYTHONPATH=./src python -m scop3p_api_client phospho --accession O95755 - `--api-version` / `-v`: Optional API version query parameter - `--include-structures`: Include structures in stdout JSON output - `--include-peptides`: Include peptides in stdout JSON output +- `--include-mutations`: Include mutations in stdout JSON output - `--save`: Repeatable output specification `TARGET:FORMAT:PATH` - - `TARGET`: `modifications`, `structures`, `peptides` + - `TARGET`: `modifications`, `structures`, `peptides`, `mutations` - `FORMAT`: `json`, `tsv` - `--raw`: Output compact JSON (used for stdout and `json` saves) - `--indent`: JSON indentation size (default: 2, ignored when `--raw` is set) @@ -254,11 +257,12 @@ result = Scop3pResult.from_api( accession="O95755" ) -# Include structures and peptides +# Include structures, peptides, and mutations result = Scop3pResult.from_api( accession="O95755", include_structures=True, - include_peptides=True + include_peptides=True, + include_mutations=True ) # With API version and custom cache TTL @@ -278,6 +282,7 @@ result = Scop3pResult.from_api( print(result.modifications) print(result.structures) # None if not requested print(result.peptides) # None if not requested +print(result.mutations) # None if not requested print(result.metadata) # Convert to dictionary @@ -301,6 +306,7 @@ from scop3p_api_client.result import Scop3pResult from scop3p_api_client.output import ( Scop3pResultJSONOutput, Scop3pResultModificationsTabularOutput, + Scop3pResultMutationsTabularOutput, Scop3pResultStructuresTabularOutput, Scop3pResultPeptidesTabularOutput, ) @@ -309,7 +315,8 @@ from scop3p_api_client.output import ( result = Scop3pResult.from_api( accession="O95755", include_structures=True, - include_peptides=True + include_peptides=True, + include_mutations=True ) # JSON output @@ -341,6 +348,14 @@ pep_formatter = Scop3pResultPeptidesTabularOutput( include_header=False ) pep_formatter.print_to_console() + +# Mutations as TSV +mut_formatter = Scop3pResultMutationsTabularOutput( + result, + separator="\t", + include_header=True +) +mut_formatter.write_to_file("mutations.tsv") ``` **Low-level API usage:** @@ -351,6 +366,7 @@ The procedural helpers now wrap an underlying `Scop3pRestApi` instance. You can from scop3p_api_client.api import ( Scop3pRestApi, fetch_modifications, + fetch_mutations, fetch_structures, fetch_peptides, ) @@ -358,9 +374,11 @@ from scop3p_api_client.api import ( api = Scop3pRestApi() data = api.fetch_modifications("O95755") peptides, peptides_meta = api.fetch_peptides("O95755", return_metadata=True) +mutations, mutations_meta = api.fetch_mutations("O95755", return_metadata=True) # Or keep using the functional wrappers structures = fetch_structures("O95755") +mutations = fetch_mutations("O95755") ``` --- diff --git a/docs/examples.md b/docs/examples.md index 10aecab..1fcbc04 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -75,6 +75,14 @@ ABCDEFG 1 7 None 5 None D None None None None HIJKLMN 10 16 None 12 None L None None None None ``` +## Mutations TSV Example + +```tsv +position pdbIds referenceAA altAA type disease +326 {} R A Disease Other disease +326 ['1ABC', '2XYZ'] R H Disease Mental retardation +``` + ## FAIR Provenance Log Example Example from `output.log`: @@ -110,7 +118,8 @@ Example from `output.log`: "api_endpoints": [ "https://iomics.ugent.be/scop3p/api/modifications", "https://iomics.ugent.be/scop3p/api/get-structures-modifications", - "https://iomics.ugent.be/scop3p/api/get-peptides-modifications" + "https://iomics.ugent.be/scop3p/api/get-peptides-modifications", + "https://iomics.ugent.be/scop3p/api/get-mutations" ] }, "interoperable": { @@ -127,8 +136,9 @@ Example from `output.log`: ## Reproduce Locally ```bash -scop3p --accession O95755 --include-structures --include-peptides +scop3p --accession O95755 --include-structures --include-peptides --include-mutations scop3p --accession O95755 --save modifications:tsv:modifications.tsv scop3p --accession O95755 --save structures:tsv:structures.tsv scop3p --accession O95755 --save peptides:tsv:peptides.tsv +scop3p --accession O95755 --save mutations:tsv:mutations.tsv ``` diff --git a/docs/getting-started.md b/docs/getting-started.md index f9a9c2b..3220a3b 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -39,10 +39,10 @@ Use a specific API version for modifications: scop3p --accession O95755 --api-version 1 ``` -Include structures and peptides in standard output JSON: +Include structures, peptides, and mutations in standard output JSON: ```bash -scop3p --accession O95755 --include-structures --include-peptides +scop3p --accession O95755 --include-structures --include-peptides --include-mutations ``` Save additional files in one run: @@ -51,7 +51,8 @@ Save additional files in one run: scop3p --accession O95755 \ --save modifications:tsv:modifications.tsv \ --save structures:tsv:structures.tsv \ - --save peptides:json:peptides.json + --save peptides:json:peptides.json \ + --save mutations:tsv:mutations.tsv ``` ## Cache Controls @@ -87,8 +88,9 @@ scop3p --accession O95755 --log-file run-fair.log --raw ## What `--save` Supports -- Targets: `modifications`, `structures`, `peptides` +- Targets: `modifications`, `structures`, `peptides`, `mutations` - Formats: `json`, `tsv` +- Mutations TSV columns: `position`, `pdbIds`, `referenceAA`, `altAA`, `type`, `disease` Examples: diff --git a/docs/index.md b/docs/index.md index b1810bb..ac1734f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -37,3 +37,4 @@ This documentation covers: - `https://iomics.ugent.be/scop3p/api/modifications` - `https://iomics.ugent.be/scop3p/api/get-structures-modifications` - `https://iomics.ugent.be/scop3p/api/get-peptides-modifications` +- `https://iomics.ugent.be/scop3p/api/get-mutations` diff --git a/docs/methodology.md b/docs/methodology.md index ad026e5..ee85a82 100644 --- a/docs/methodology.md +++ b/docs/methodology.md @@ -27,11 +27,12 @@ flowchart LR - Modifications: `https://iomics.ugent.be/scop3p/api/modifications` - Structures: `https://iomics.ugent.be/scop3p/api/get-structures-modifications` - Peptides: `https://iomics.ugent.be/scop3p/api/get-peptides-modifications` +- Mutations: `https://iomics.ugent.be/scop3p/api/get-mutations` URL behavior: - Modifications URL is built as `?accession=` and optionally `&version=`. -- Structures and peptides use `?accession=`. +- Structures, peptides, and mutations use `?accession=`. ## `requests` Behavior @@ -47,7 +48,7 @@ Cache key material: - `accession` - `api_version` (modifications only) -- dataset suffix (`modifications`, `structures`, `peptides`) +- dataset suffix (`modifications`, `structures`, `peptides`, `mutations`) Cache file name: @@ -91,6 +92,7 @@ Before serialization, payloads are normalized for stable order: - predictable column/key ordering for known datasets - deterministic row sorting by dataset primary keys +- mutations sorted by `position`, then `referenceAA`, then `altAA`, then `type` Structures TSV behavior: diff --git a/docs/python-api.md b/docs/python-api.md index ae991c7..6db1d6a 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -19,6 +19,7 @@ result = Scop3pResult.from_api( ttl=600, # cache TTL in seconds include_structures=True, # optional include_peptides=True, # optional + include_mutations=True, # optional ) ``` @@ -36,7 +37,8 @@ Envelope shape: "apiResult": { "modifications": {}, "structures": [], - "peptides": [] + "peptides": [], + "mutations": [] }, "metadata": {} } @@ -66,6 +68,11 @@ peptides, peptides_meta = api.fetch_peptides( accession="O95755", return_metadata=True, ) + +mutations, mutations_meta = api.fetch_mutations( + accession="O95755", + return_metadata=True, +) ``` `return_metadata=True` includes source and cache metadata for each fetch. @@ -77,6 +84,7 @@ The module also exports backward-compatible wrappers: - `fetch_modifications(...)` - `fetch_structures(...)` - `fetch_peptides(...)` +- `fetch_mutations(...)` These forward arguments to a shared default `Scop3pRestApi` instance. diff --git a/examples/output_formats_demo.py b/examples/output_formats_demo.py index 6307f6c..c7b4d1e 100644 --- a/examples/output_formats_demo.py +++ b/examples/output_formats_demo.py @@ -5,6 +5,7 @@ from scop3p_api_client.output import ( Scop3pResultJSONOutput, Scop3pResultModificationsTabularOutput, + Scop3pResultMutationsTabularOutput, Scop3pResultStructuresTabularOutput, Scop3pResultPeptidesTabularOutput, ) @@ -19,6 +20,7 @@ def main(): accession="O95755", include_structures=True, include_peptides=True, + include_mutations=True, ttl=300 ) @@ -67,6 +69,19 @@ def main(): pep_lines = pep_output.split("\n")[:10] print("\n".join(pep_lines)) + if result.mutations: + print("\n" + "="*80) + print("Mutations TSV (first 10 lines):") + print("="*80) + mut_formatter = Scop3pResultMutationsTabularOutput( + result, + separator="\t", + include_header=True + ) + mut_output = mut_formatter.format() + mut_lines = mut_output.split("\n")[:10] + print("\n".join(mut_lines)) + print("\n" + "="*80) print("Example: Saving to files") print("="*80) @@ -76,6 +91,7 @@ def main(): # mods_formatter.write_to_file("modifications.tsv") # struct_formatter.write_to_file("structures.tsv") # pep_formatter.write_to_file("peptides.tsv") + # mut_formatter.write_to_file("mutations.tsv") print("To save outputs, uncomment the write_to_file() calls in the script.") print("\nDone!") diff --git a/pyproject.toml b/pyproject.toml index 08aa0f6..06c7770 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "scop3p" -version = "1.0.1" +version = "1.1.0" description = "The official Scop3P REST API Python client" readme = "README.md" requires-python = ">=3.6,<4" diff --git a/src/scop3p_api_client/api.py b/src/scop3p_api_client/api.py index 300c794..cc20c56 100644 --- a/src/scop3p_api_client/api.py +++ b/src/scop3p_api_client/api.py @@ -11,6 +11,7 @@ BASE_URL = "https://iomics.ugent.be/scop3p/api/modifications" STRUCTURES_URL = "https://iomics.ugent.be/scop3p/api/get-structures-modifications" PEPTIDES_URL = "https://iomics.ugent.be/scop3p/api/get-peptides-modifications" +MUTATIONS_URL = "https://iomics.ugent.be/scop3p/api/get-mutations" # default cache time-to-live (seconds) DEFAULT_CACHE_TTL = 300 @@ -352,6 +353,97 @@ def get_cache_stats(): pass raise + def fetch_mutations( + self, + accession: str, + session: Optional[requests.Session] = None, + timeout: Optional[int] = None, + cache_dir: Optional[str | Path] = None, + ttl: Optional[int] = None, + return_metadata: bool = False, + ) -> Any: + """Fetch mutations from Scop3P API with caching.""" + if not accession: + raise ValueError("accession must be provided") + + cache_dir = self._resolve_cache_dir(cache_dir) + session = self._resolve_session(session) + timeout = self._resolve_timeout(timeout) + ttl = self._resolve_ttl(ttl) + + cache_file = _cache_path_for(accession, None, cache_dir, suffix="mutations") + cache_path = str(cache_file) + + def get_cache_stats(): + if not cache_file.exists(): + return {} + stat = cache_file.stat() + mtime = stat.st_mtime + ctime = getattr(stat, "st_birthtime", stat.st_ctime) + size = stat.st_size + return { + "size_bytes": size, + "size_kilobytes": size / 1024, + "size_megabytes": size / (1024 * 1024), + "modified_at_utc": datetime.datetime.fromtimestamp( + mtime, datetime.timezone.utc + ).isoformat(), + "modified_at_localtime": datetime.datetime.fromtimestamp(mtime) + .astimezone() + .isoformat(), + "created_at_utc": datetime.datetime.fromtimestamp( + ctime, datetime.timezone.utc + ).isoformat(), + "created_at_localtime": datetime.datetime.fromtimestamp(ctime) + .astimezone() + .isoformat(), + } + + if cache_file.exists(): + mtime = cache_file.stat().st_mtime + if time.time() - mtime <= ttl: + try: + data = json.loads(cache_file.read_text(encoding="utf-8")) + if return_metadata: + meta = {"source": "cache", "cache_file": cache_path} + meta.update(get_cache_stats()) + return data, meta + return data + except Exception: + pass + + url = f"{MUTATIONS_URL}?accession={accession}" + + try: + resp = session.get(url, timeout=timeout) + resp.raise_for_status() + data = resp.json() + + try: + tmp = cache_file.with_suffix(".tmp") + tmp.write_text(json.dumps(data), encoding="utf-8") + tmp.replace(cache_file) + except Exception: + pass + + if return_metadata: + meta = {"source": "api", "cache_file": cache_path} + meta.update(get_cache_stats()) + return data, meta + return data + except Exception: + if cache_file.exists(): + try: + data = json.loads(cache_file.read_text(encoding="utf-8")) + if return_metadata: + meta = {"source": "cache_fallback", "cache_file": cache_path} + meta.update(get_cache_stats()) + return data, meta + return data + except Exception: + pass + raise + _DEFAULT_SCOP3P_API = Scop3pRestApi() @@ -413,3 +505,22 @@ def fetch_peptides( ttl=ttl, return_metadata=return_metadata, ) + + +def fetch_mutations( + accession: str, + session: Optional[requests.Session] = None, + timeout: int = 10, + cache_dir: Optional[str | Path] = None, + ttl: int = DEFAULT_CACHE_TTL, + return_metadata: bool = False, +) -> Any: + """Backward-compatible wrapper over Scop3pRestApi.fetch_mutations.""" + return _DEFAULT_SCOP3P_API.fetch_mutations( + accession=accession, + session=session, + timeout=timeout, + cache_dir=cache_dir, + ttl=ttl, + return_metadata=return_metadata, + ) diff --git a/src/scop3p_api_client/cli.py b/src/scop3p_api_client/cli.py index 57ad090..201a101 100644 --- a/src/scop3p_api_client/cli.py +++ b/src/scop3p_api_client/cli.py @@ -13,6 +13,7 @@ Scop3pResultFairLogOutput, Scop3pResultJSONOutput, Scop3pResultModificationsTabularOutput, + Scop3pResultMutationsTabularOutput, Scop3pResultStructuresTabularOutput, Scop3pResultPeptidesTabularOutput, ) @@ -30,9 +31,9 @@ def _parse_save_spec(spec: str) -> tuple[str, str, pathlib.Path]: output_format = output_format.strip().lower() path = output_path.strip() - if target not in {"modifications", "structures", "peptides"}: + if target not in {"modifications", "structures", "peptides", "mutations"}: raise argparse.ArgumentTypeError( - f"Invalid --save target '{target}'. Choose: modifications, structures, peptides" + f"Invalid --save target '{target}'. Choose: modifications, structures, peptides, mutations" ) if output_format not in {"json", "tsv"}: raise argparse.ArgumentTypeError( @@ -76,6 +77,13 @@ def _build_formatter( include_header=include_header, null_value=null_value, ) + if output_format == "tsv-mutations": + return Scop3pResultMutationsTabularOutput( + result, + separator=separator, + include_header=include_header, + null_value=null_value, + ) raise ValueError(f"Unknown format '{output_format}'") @@ -92,6 +100,7 @@ def _save_tsv_target( "modifications": "tsv-modifications", "structures": "tsv-structures", "peptides": "tsv-peptides", + "mutations": "tsv-mutations", }[target] formatter = _build_formatter( result=result, @@ -113,6 +122,7 @@ def _format_dataset_json( "modifications": result.modifications, "structures": result.structures, "peptides": result.peptides, + "mutations": result.mutations, }[target], ) if indent is None: @@ -213,7 +223,7 @@ def _resolve_version() -> str: metavar="TARGET:FORMAT:PATH", help=( "Save additional outputs in a single run. " - "TARGET: modifications|structures|peptides, FORMAT: json|tsv" + "TARGET: modifications|structures|peptides|mutations, FORMAT: json|tsv" ), ) parser.add_argument( @@ -226,6 +236,11 @@ def _resolve_version() -> str: action="store_true", help="Include peptides in stdout JSON output (also implied by --save peptides:...)", ) + parser.add_argument( + "--include-mutations", + action="store_true", + help="Include mutations in stdout JSON output (also implied by --save mutations:...)", + ) args = parser.parse_args(argv) @@ -235,6 +250,7 @@ def _resolve_version() -> str: include_structures = args.include_structures or ("structures" in save_targets) include_peptides = args.include_peptides or ("peptides" in save_targets) + include_mutations = args.include_mutations or ("mutations" in save_targets) # determine ttl: if no-cache requested, set ttl=0 to bypass reading cache if args.no_cache: @@ -257,6 +273,7 @@ def _resolve_version() -> str: ttl=ttl, include_structures=include_structures, include_peptides=include_peptides, + include_mutations=include_mutations, cli_args=cli_args_payload, ) run_log_messages.append("API fetch completed.") diff --git a/src/scop3p_api_client/output.py b/src/scop3p_api_client/output.py index c36989e..ebc9d8f 100644 --- a/src/scop3p_api_client/output.py +++ b/src/scop3p_api_client/output.py @@ -4,10 +4,12 @@ import datetime import json -from .api import BASE_URL, PEPTIDES_URL, STRUCTURES_URL +from .api import BASE_URL, MUTATIONS_URL, PEPTIDES_URL, STRUCTURES_URL from .sorting import ( MODIFICATIONS_COLUMNS, MODIFICATIONS_PRIMARY_KEY, + MUTATIONS_COLUMNS, + MUTATIONS_PRIMARY_KEY, PEPTIDES_COLUMNS, PEPTIDES_PRIMARY_KEY, STRUCTURES_COLUMNS, @@ -120,6 +122,8 @@ def _api_endpoints(self) -> List[str]: endpoints.append(STRUCTURES_URL) if self.result.peptides is not None: endpoints.append(PEPTIDES_URL) + if self.result.mutations is not None: + endpoints.append(MUTATIONS_URL) return endpoints def format(self) -> str: @@ -403,3 +407,33 @@ def get_data(self) -> List[Dict[str, Any]]: def get_sort_columns(self) -> List[str]: return list(PEPTIDES_PRIMARY_KEY) + + +class Scop3pResultMutationsTabularOutput(Scop3pResultTabularOutput): + """Tabular output for mutations data.""" + + def get_columns(self) -> List[str]: + """Get mutation columns. + + Returns: + List of column names for mutations + """ + return list(MUTATIONS_COLUMNS) + + def get_data(self) -> List[Dict[str, Any]]: + """Get mutations data. + + Returns: + List of mutation records + """ + if not self.result.mutations: + return [] + + if isinstance(self.result.mutations, list): + return self.result.mutations + if isinstance(self.result.mutations, dict): + return self.result.mutations.get("mutations", []) + return [] + + def get_sort_columns(self) -> List[str]: + return list(MUTATIONS_PRIMARY_KEY) diff --git a/src/scop3p_api_client/result.py b/src/scop3p_api_client/result.py index 1d3aa46..53e27ba 100644 --- a/src/scop3p_api_client/result.py +++ b/src/scop3p_api_client/result.py @@ -20,12 +20,14 @@ class Scop3pResult: modifications: Modifications data from the API structures: Optional structures data from the API peptides: Optional peptides data from the API + mutations: Optional mutations data from the API metadata: Execution metadata including caching info """ modifications: Any structures: Optional[Any] = None peptides: Optional[Any] = None + mutations: Optional[Any] = None metadata: Dict[str, Any] = field(default_factory=dict) @classmethod @@ -36,6 +38,7 @@ def from_api( ttl: int = DEFAULT_CACHE_TTL, include_structures: bool = False, include_peptides: bool = False, + include_mutations: bool = False, cli_args: Optional[Dict[str, Any]] = None, ) -> Scop3pResult: """Fetch data from Scop3P API and construct a Scop3pResult. @@ -46,6 +49,7 @@ def from_api( ttl: Cache time-to-live in seconds include_structures: Whether to fetch structures data include_peptides: Whether to fetch peptides data + include_mutations: Whether to fetch mutations data cli_args: Optional CLI arguments to include in metadata Returns: @@ -85,6 +89,18 @@ def from_api( peptides_data = peptides_response.get("peptides") cache_info["peptides"] = peptides_cache_info + # Fetch mutations if requested + mutations_data = None + if include_mutations: + mutations_response, mutations_cache_info = api_wrapper.fetch_mutations( + accession, ttl=ttl, return_metadata=True + ) + if isinstance(mutations_response, dict): + mutations_data = mutations_response.get("mutations", mutations_response) + else: + mutations_data = mutations_response + cache_info["mutations"] = mutations_cache_info + # Build metadata metadata = { "execution_datetime": datetime.datetime.now( @@ -108,6 +124,7 @@ def from_api( modifications=modifications_data, structures=structures_data, peptides=peptides_data, + mutations=mutations_data, metadata=metadata, ) @@ -133,6 +150,11 @@ def to_dict(self) -> Dict[str, Any]: "peptides", self.peptides ) + if self.mutations is not None: + api_result["mutations"] = normalize_dataset_payload( + "mutations", self.mutations + ) + return { "apiResult": api_result, "metadata": self.metadata, diff --git a/src/scop3p_api_client/sorting.py b/src/scop3p_api_client/sorting.py index 4336f19..31d8e48 100644 --- a/src/scop3p_api_client/sorting.py +++ b/src/scop3p_api_client/sorting.py @@ -67,6 +67,15 @@ def to_sort_key(value: Any) -> tuple[Any, ...]: "conservedScale", ) +MUTATIONS_COLUMNS: tuple[str, ...] = ( + "position", + "pdbIds", + "referenceAA", + "altAA", + "type", + "disease", +) + MODIFICATIONS_PRIMARY_KEY: tuple[str, ...] = ("position", "residue") PEPTIDES_PRIMARY_KEY: tuple[str, ...] = ( "peptideSequence", @@ -77,6 +86,7 @@ def to_sort_key(value: Any) -> tuple[Any, ...]: "score", ) STRUCTURES_PRIMARY_KEY: tuple[str, ...] = ("pdbId",) +MUTATIONS_PRIMARY_KEY: tuple[str, ...] = ("position", "referenceAA", "altAA", "type") def reorder_dict_keys( @@ -180,4 +190,23 @@ def normalize_dataset_payload(target: str, payload: Any) -> Any: return normalized_payload return payload + if target == "mutations": + if isinstance(payload, list): + return normalize_rows( + payload, + ordered_columns=MUTATIONS_COLUMNS, + primary_key=MUTATIONS_PRIMARY_KEY, + ) + if isinstance(payload, dict): + normalized_payload = dict(payload) + rows = normalized_payload.get("mutations") + if isinstance(rows, list): + normalized_payload["mutations"] = normalize_rows( + rows, + ordered_columns=MUTATIONS_COLUMNS, + primary_key=MUTATIONS_PRIMARY_KEY, + ) + return normalized_payload + return payload + return payload diff --git a/tests/test_api.py b/tests/test_api.py index 16e36df..1087724 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -20,6 +20,7 @@ _cache_path_for, build_url, fetch_modifications, + fetch_mutations, fetch_peptides, fetch_structures, ) @@ -257,6 +258,27 @@ def test_fetch_peptides_cache_fallback_returns_metadata(self) -> None: self.assertEqual(data, cached_data) self.assertEqual(meta["source"], "cache_fallback") + def test_fetch_mutations_returns_metadata_on_api_response(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + temp_cache_dir = Path(tmp) + payload = [{"position": 326, "referenceAA": "R", "altAA": "H", "type": "Disease"}] + session = MockSession( + { + "https://iomics.ugent.be/scop3p/api/get-mutations?accession=P12345": (200, payload), + } + ) + api = Scop3pRestApi() + data, meta = api.fetch_mutations( + "P12345", + session=session, + cache_dir=temp_cache_dir, + ttl=0, + return_metadata=True, + ) + self.assertEqual(data, payload) + self.assertEqual(meta["source"], "api") + self.assertIn("cache_file", meta) + def test_fetch_structures_wrapper_forwards_arguments(self) -> None: sentinel = {"structures": []} with patch( @@ -303,6 +325,29 @@ def test_fetch_peptides_wrapper_forwards_arguments(self) -> None: return_metadata=True, ) + def test_fetch_mutations_wrapper_forwards_arguments(self) -> None: + sentinel = [{"position": 1}] + with patch( + "scop3p_api_client.api._DEFAULT_SCOP3P_API.fetch_mutations", + return_value=sentinel, + ) as mocked: + result = fetch_mutations( + "P12345", + timeout=11, + cache_dir="/tmp/z", + ttl=45, + return_metadata=True, + ) + self.assertIs(result, sentinel) + mocked.assert_called_once_with( + accession="P12345", + session=None, + timeout=11, + cache_dir="/tmp/z", + ttl=45, + return_metadata=True, + ) + def test_cache_dir_falls_back_to_tempdir_when_mkdir_fails(self) -> None: api = Scop3pRestApi() with patch("pathlib.Path.mkdir", side_effect=OSError("nope")): diff --git a/tests/test_cli.py b/tests/test_cli.py index e029332..1e4562a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -75,6 +75,7 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): calls.append( @@ -84,6 +85,7 @@ def fake_from_api( "ttl": ttl, "include_structures": include_structures, "include_peptides": include_peptides, + "include_mutations": include_mutations, } ) return _mock_result(cli_args) @@ -106,6 +108,7 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): calls.append(ttl) @@ -126,6 +129,7 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): return _mock_result(cli_args) @@ -161,6 +165,7 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): raise RuntimeError("network down") @@ -179,6 +184,7 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): return _mock_result(cli_args) @@ -204,30 +210,41 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): calls.append( { "include_structures": include_structures, "include_peptides": include_peptides, + "include_mutations": include_mutations, } ) result = _mock_result(cli_args) result.structures = [{"pdbId": "1ABC", "structureModificationsList": []}] result.peptides = [{"peptideSequence": "ABCDE", "uniprotPosition": 10}] + result.mutations = [{"position": 326, "referenceAA": "R", "altAA": "H", "type": "Disease"}] return result with patch("scop3p_api_client.cli.Scop3pResult.from_api", new=classmethod(fake_from_api)): code, stdout, _ = self._run_main( - ["--accession", "O00571", "--include-structures", "--include-peptides"] + [ + "--accession", + "O00571", + "--include-structures", + "--include-peptides", + "--include-mutations", + ] ) self.assertIsNone(code) self.assertTrue(calls[0]["include_structures"]) self.assertTrue(calls[0]["include_peptides"]) + self.assertTrue(calls[0]["include_mutations"]) payload = json.loads(stdout) self.assertIn("structures", payload["apiResult"]) self.assertIn("peptides", payload["apiResult"]) + self.assertIn("mutations", payload["apiResult"]) def test_cli_save_multi_output_auto_enables_related_fetches(self) -> None: calls: list[dict] = [] @@ -240,12 +257,14 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): calls.append( { "include_structures": include_structures, "include_peptides": include_peptides, + "include_mutations": include_mutations, } ) result = _mock_result(cli_args) @@ -269,11 +288,26 @@ def fake_from_api( "uniprotPosition": 99, } ] + result.mutations = [ + { + "position": 326, + "referenceAA": "R", + "altAA": "H", + "type": "Disease", + }, + { + "position": 326, + "referenceAA": "R", + "altAA": "A", + "type": "Disease", + }, + ] return result mods_file = self.tmp_path / "mods.tsv" structures_file = self.tmp_path / "structures.tsv" peptides_file = self.tmp_path / "peptides.json" + mutations_file = self.tmp_path / "mutations.tsv" os.chdir(self.tmp_path) with patch("scop3p_api_client.cli.Scop3pResult.from_api", new=classmethod(fake_from_api)): code, stdout, _ = self._run_main( @@ -286,17 +320,22 @@ def fake_from_api( f"structures:tsv:{structures_file}", "--save", f"peptides:json:{peptides_file}", + "--save", + f"mutations:tsv:{mutations_file}", ] ) self.assertIsNone(code) self.assertTrue(calls[0]["include_structures"]) self.assertTrue(calls[0]["include_peptides"]) + self.assertTrue(calls[0]["include_mutations"]) self.assertIn("Saved structures (tsv)", stdout) self.assertIn("Saved peptides (json)", stdout) + self.assertIn("Saved mutations (tsv)", stdout) self.assertTrue(mods_file.exists()) self.assertTrue(structures_file.exists()) self.assertTrue(peptides_file.exists()) + self.assertTrue(mutations_file.exists()) self.assertTrue(structures_file.read_text(encoding="utf-8").startswith("pdbId")) peptides_payload = json.loads(peptides_file.read_text(encoding="utf-8")) self.assertEqual( @@ -307,6 +346,9 @@ def fake_from_api( list(peptides_payload[0].keys()), ["peptideSequence", "peptideStart", "peptideEnd", "uniprotPosition"], ) + mutation_lines = mutations_file.read_text(encoding="utf-8").splitlines() + self.assertEqual(mutation_lines[0], "position\tpdbIds\treferenceAA\taltAA\ttype\tdisease") + self.assertTrue(mutation_lines[1].startswith("326\t")) def test_cli_save_invalid_spec_fails(self) -> None: code, _, stderr = self._run_main( @@ -360,6 +402,7 @@ def fake_from_api( ttl, include_structures, include_peptides, + include_mutations, cli_args, ): return _mock_result(cli_args) diff --git a/tests/test_output.py b/tests/test_output.py index c3cb710..e21ab9b 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -14,6 +14,7 @@ Scop3pResultFairLogOutput, Scop3pResultJSONOutput, Scop3pResultModificationsTabularOutput, + Scop3pResultMutationsTabularOutput, Scop3pResultPeptidesTabularOutput, Scop3pResultStructuresTabularOutput, ) @@ -182,11 +183,42 @@ def test_structures_and_peptides_tabular_output_accept_dict_wrappers(self) -> No self.assertIn("1ABC", structures_output) self.assertIn("ABCDE", peptides_output) + def test_mutations_tabular_output(self) -> None: + result = Scop3pResult( + modifications={}, + mutations=[ + { + "position": 326, + "pdbIds": ["1ABC", "2XYZ"], + "referenceAA": "R", + "altAA": "H", + "type": "Disease", + "disease": "Mental retardation", + }, + { + "position": 326, + "pdbIds": [], + "referenceAA": "R", + "altAA": "A", + "type": "Disease", + "disease": "Other disease", + }, + ], + metadata={}, + ) + output = Scop3pResultMutationsTabularOutput(result, separator="\t", include_header=True).format() + lines = output.split("\n") + self.assertEqual(len(lines), 3) + self.assertIn("referenceAA", lines[0]) + self.assertEqual(lines[1].split("\t")[3], "A") + self.assertEqual(lines[2].split("\t")[3], "H") + def test_empty_data(self) -> None: result = Scop3pResult(modifications={}, metadata={}) self.assertEqual(Scop3pResultModificationsTabularOutput(result).format(), "") self.assertEqual(Scop3pResultStructuresTabularOutput(result).format(), "") self.assertEqual(Scop3pResultPeptidesTabularOutput(result).format(), "") + self.assertEqual(Scop3pResultMutationsTabularOutput(result).format(), "") def test_fair_log_formatter_contains_fair_sections(self) -> None: result = Scop3pResult( @@ -220,6 +252,7 @@ def test_fair_log_formatter_includes_optional_endpoints(self) -> None: modifications={"modifications": []}, structures=[{"pdbId": "1ABC", "structureModificationsList": []}], peptides=[{"peptideSequence": "ABC"}], + mutations=[{"position": 1, "referenceAA": "A", "altAA": "S", "type": "Disease"}], metadata={ "execution_datetime": "2026-03-02T00:00:00+00:00", "cli_arguments": {"accession": "O00571", "modifications": "phospho"}, @@ -227,9 +260,10 @@ def test_fair_log_formatter_includes_optional_endpoints(self) -> None: ) payload = json.loads(Scop3pResultFairLogOutput(result).format()) endpoints = payload["fair"]["accessible"]["api_endpoints"] - self.assertEqual(len(endpoints), 3) + self.assertEqual(len(endpoints), 4) self.assertTrue(any("get-structures-modifications" in endpoint for endpoint in endpoints)) self.assertTrue(any("get-peptides-modifications" in endpoint for endpoint in endpoints)) + self.assertTrue(any("get-mutations" in endpoint for endpoint in endpoints)) if __name__ == "__main__": diff --git a/tests/test_result.py b/tests/test_result.py index 8e46f37..d1c362f 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -43,7 +43,7 @@ def fake_fetch_modifications(self, accession, api_version, ttl, return_metadata) self.assertEqual(result.metadata["cli_arguments"]["output"], str(out_path)) self.assertIn("execution_datetime", result.metadata) - def test_from_api_includes_structures_and_peptides(self) -> None: + def test_from_api_includes_structures_peptides_and_mutations(self) -> None: def fake_fetch_modifications(self, accession, api_version, ttl, return_metadata): return {"modifications": []}, {"source": "cache"} @@ -53,6 +53,9 @@ def fake_fetch_structures(self, accession, ttl, return_metadata): def fake_fetch_peptides(self, accession, ttl, return_metadata): return {"peptides": [{"peptideSequence": "ABC"}]}, {"source": "api"} + def fake_fetch_mutations(self, accession, ttl, return_metadata): + return [{"position": 326, "referenceAA": "R", "altAA": "H", "type": "Disease"}], {"source": "api"} + with patch( "scop3p_api_client.result.Scop3pRestApi.fetch_modifications", new=fake_fetch_modifications, @@ -62,17 +65,23 @@ def fake_fetch_peptides(self, accession, ttl, return_metadata): ), patch( "scop3p_api_client.result.Scop3pRestApi.fetch_peptides", new=fake_fetch_peptides, + ), patch( + "scop3p_api_client.result.Scop3pRestApi.fetch_mutations", + new=fake_fetch_mutations, ): result = Scop3pResult.from_api( accession="O00571", include_structures=True, include_peptides=True, + include_mutations=True, ) self.assertEqual(result.structures[0]["pdbId"], "1ABC") self.assertEqual(result.peptides[0]["peptideSequence"], "ABC") + self.assertEqual(result.mutations[0]["position"], 326) self.assertIn("structures", result.metadata["caching"]) self.assertIn("peptides", result.metadata["caching"]) + self.assertIn("mutations", result.metadata["caching"]) def test_to_dict_and_dump_json_include_optional_sections(self) -> None: result = Scop3pResult( @@ -128,12 +137,31 @@ def test_to_dict_and_dump_json_include_optional_sections(self) -> None: "uniprotPosition": 10, }, ], + mutations=[ + { + "type": "Disease", + "position": 326, + "altAA": "H", + "referenceAA": "R", + "pdbIds": ["1ABC", "2XYZ"], + "disease": "Mental retardation", + }, + { + "position": 326, + "referenceAA": "R", + "altAA": "A", + "type": "Disease", + "pdbIds": [], + "disease": "Other disease", + }, + ], metadata={"a": "b"}, ) as_dict = result.to_dict() self.assertIn("modifications", as_dict["apiResult"]) self.assertIn("structures", as_dict["apiResult"]) self.assertIn("peptides", as_dict["apiResult"]) + self.assertIn("mutations", as_dict["apiResult"]) compact = result.dump_json(indent=None) pretty = result.dump_json(indent=2) @@ -173,6 +201,14 @@ def test_to_dict_and_dump_json_include_optional_sections(self) -> None: list(parsed["apiResult"]["structures"][0].keys()), ["pdbId", "resolution", "stoichiometry", "interfacingMolecule", "method"], ) + self.assertEqual( + [item["altAA"] for item in parsed["apiResult"]["mutations"]], + ["A", "H"], + ) + self.assertEqual( + list(parsed["apiResult"]["mutations"][0].keys()), + ["position", "pdbIds", "referenceAA", "altAA", "type", "disease"], + ) if __name__ == "__main__": diff --git a/tests/test_sorting.py b/tests/test_sorting.py index bc473f0..1f0d59a 100644 --- a/tests/test_sorting.py +++ b/tests/test_sorting.py @@ -36,6 +36,17 @@ def test_normalize_dataset_payload_handles_known_and_unknown_targets(self) -> No normalized = normalize_dataset_payload("modifications", mods) self.assertEqual([row["position"] for row in normalized], [10, 20]) + mutations = [ + {"position": 326, "referenceAA": "R", "altAA": "H", "type": "Disease"}, + {"type": "Disease", "position": 326, "altAA": "A", "referenceAA": "R"}, + ] + normalized_mutations = normalize_dataset_payload("mutations", mutations) + self.assertEqual([row["altAA"] for row in normalized_mutations], ["A", "H"]) + self.assertEqual( + list(normalized_mutations[0].keys()), + ["position", "referenceAA", "altAA", "type"], + ) + payload = {"anything": "kept"} self.assertIs(normalize_dataset_payload("unknown-target", payload), payload) self.assertEqual(normalize_dataset_payload("peptides", "raw"), "raw")