From c77de4716ea6085707b7d94a7596c19fc30ec780 Mon Sep 17 00:00:00 2001 From: VsevolodX Date: Fri, 27 Feb 2026 13:35:08 -0800 Subject: [PATCH 1/3] update: add utils from code --- src/py/mat3ra/__init__.py | 1 + src/py/mat3ra/utils/__init__.py | 3 +++ src/py/mat3ra/utils/object.py | 23 +++++++++++++++++++++++ src/py/mat3ra/utils/string.py | 10 ++++++++++ 4 files changed, 37 insertions(+) diff --git a/src/py/mat3ra/__init__.py b/src/py/mat3ra/__init__.py index e69de29..8db66d3 100644 --- a/src/py/mat3ra/__init__.py +++ b/src/py/mat3ra/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/src/py/mat3ra/utils/__init__.py b/src/py/mat3ra/utils/__init__.py index e6a4962..3446458 100644 --- a/src/py/mat3ra/utils/__init__.py +++ b/src/py/mat3ra/utils/__init__.py @@ -1,2 +1,5 @@ from .search import find_by_key_or_regex from .string import camel_to_snake, snake_to_camel + +from .object import calculate_hash_from_object, remove_timestampable_keys, sort_keys_deep +from .string import remove_comments_from_source_code, remove_empty_lines_from_string diff --git a/src/py/mat3ra/utils/object.py b/src/py/mat3ra/utils/object.py index 0e5e011..7c71ef6 100644 --- a/src/py/mat3ra/utils/object.py +++ b/src/py/mat3ra/utils/object.py @@ -1,4 +1,5 @@ import copy +import hashlib import json from typing import Any, Dict, List, Optional @@ -22,6 +23,28 @@ def clone_deep(obj: Any) -> Any: return copy.deepcopy(obj) +def sort_keys_deep(obj: Any) -> Any: + """Recursively sort object keys alphabetically.""" + if callable(getattr(obj, "model_dump", None)): + return sort_keys_deep(obj.model_dump(mode="json", exclude_none=True)) + if isinstance(obj, list): + return [sort_keys_deep(item) for item in obj] + if isinstance(obj, dict): + return {k: sort_keys_deep(obj[k]) for k in sorted(obj.keys())} + return obj + + +def calculate_hash_from_object(obj: Any) -> str: + """MD5 of JSON.stringify(sort_keys_deep(obj)).""" + message = json.dumps(sort_keys_deep(obj), separators=(",", ":")) + return hashlib.md5(message.encode()).hexdigest() + + +def remove_timestampable_keys(config: Dict[str, Any]) -> Dict[str, Any]: + """Removes createdAt, updatedAt, removedAt.""" + return {k: v for k, v in config.items() if k not in ("createdAt", "updatedAt", "removedAt")} + + def get(config: Dict, path: str = "", separator: str = "/") -> Any: """ Get value by deep/nested path with separator "/ or "." diff --git a/src/py/mat3ra/utils/string.py b/src/py/mat3ra/utils/string.py index cfc53cd..4d75756 100644 --- a/src/py/mat3ra/utils/string.py +++ b/src/py/mat3ra/utils/string.py @@ -57,3 +57,13 @@ def snake_to_camel(snake_case_str: str) -> str: """ parts = snake_case_str.split("_") return "".join(x.title() for x in parts) + + +def remove_comments_from_source_code(text: str, language: str = "shell") -> str: + """Removes lines starting with # (except shebang).""" + return re.sub(r"^(\s+)?#(?!!).*$", "", text, flags=re.MULTILINE) + + +def remove_empty_lines_from_string(text: str) -> str: + """Removes empty lines and trims.""" + return re.sub(r"^\s*[\r\n]", "", text, flags=re.MULTILINE).strip() From 0c91579b01ab48f4902e7c742689576f90be73c1 Mon Sep 17 00:00:00 2001 From: VsevolodX Date: Fri, 27 Feb 2026 19:57:54 -0800 Subject: [PATCH 2/3] update: sync with js --- src/py/mat3ra/utils/object.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/py/mat3ra/utils/object.py b/src/py/mat3ra/utils/object.py index 7c71ef6..879ff9f 100644 --- a/src/py/mat3ra/utils/object.py +++ b/src/py/mat3ra/utils/object.py @@ -26,7 +26,10 @@ def clone_deep(obj: Any) -> Any: def sort_keys_deep(obj: Any) -> Any: """Recursively sort object keys alphabetically.""" if callable(getattr(obj, "model_dump", None)): - return sort_keys_deep(obj.model_dump(mode="json", exclude_none=True)) + # Match JS behavior: + # - include explicit nulls (Python `None`) if provided + # - exclude fields that were never set (so defaults don't affect hashes) + return sort_keys_deep(obj.model_dump(mode="json", exclude_unset=True)) if isinstance(obj, list): return [sort_keys_deep(item) for item in obj] if isinstance(obj, dict): @@ -36,7 +39,8 @@ def sort_keys_deep(obj: Any) -> Any: def calculate_hash_from_object(obj: Any) -> str: """MD5 of JSON.stringify(sort_keys_deep(obj)).""" - message = json.dumps(sort_keys_deep(obj), separators=(",", ":")) + # JS JSON.stringify does not ASCII-escape Unicode characters. + message = json.dumps(sort_keys_deep(obj), separators=(",", ":"), ensure_ascii=False) return hashlib.md5(message.encode()).hexdigest() From afec60d3d2be05dde155aa95b1bcc9b887f34799 Mon Sep 17 00:00:00 2001 From: VsevolodX Date: Fri, 27 Feb 2026 20:24:11 -0800 Subject: [PATCH 3/3] update: add hash tests --- tests/py/unit/test_hash.py | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/py/unit/test_hash.py diff --git a/tests/py/unit/test_hash.py b/tests/py/unit/test_hash.py new file mode 100644 index 0000000..f8430cf --- /dev/null +++ b/tests/py/unit/test_hash.py @@ -0,0 +1,54 @@ +import pytest +from pydantic import BaseModel +from typing import Optional + +from mat3ra.utils.object import ( + calculate_hash_from_object, + remove_timestampable_keys, + sort_keys_deep, +) +from mat3ra.utils.string import remove_comments_from_source_code, remove_empty_lines_from_string + + +@pytest.mark.parametrize( + "obj,expected", + [ + ({"b": 2, "a": 1}, "608de49a4600dbb5b173492759792e4a"), + ({"z": {"b": 2, "a": 1}, "a": [{"d": 4, "c": 3}, 2]}, "a3f68f53e4bcec2a1b1a058b4a5c12ba"), + ({"a": None, "b": 1}, "b0ef272d275f68e05645af0b4fac87da"), + ({"msg": "Δ", "a": 1}, "f838bafaab72679f8607ebc879ab24a7"), + ], +) +def test_calculate_hash_from_object_matches_js(obj, expected): + assert calculate_hash_from_object(obj) == expected + + +def test_sort_keys_deep_sorts_dict_keys_recursively(): + assert sort_keys_deep({"b": 2, "a": 1, "z": {"b": 2, "a": 1}}) == {"a": 1, "b": 2, "z": {"a": 1, "b": 2}} + + +def test_sort_keys_deep_pydantic_exclude_unset_include_none(): + class Model(BaseModel): + a: int = 1 + b: Optional[int] = None + c: Optional[int] = None + + m = Model(c=None) + assert sort_keys_deep(m) == {"c": None} + assert calculate_hash_from_object(m) == "adcbf5dd65a518bdff3a02349d151b25" + + +def test_remove_timestampable_keys(): + assert remove_timestampable_keys( + {"a": 1, "createdAt": "x", "updatedAt": "y", "removedAt": "z"} + ) == {"a": 1} + + +def test_comment_and_empty_line_stripping_matches_js(): + text = "# comment\n\nx=1\n # indented\n#!/bin/bash\n echo hi # inline\n" + without_comments = remove_comments_from_source_code(text) + assert "#!/" in without_comments # shebang preserved + assert "echo hi # inline" in without_comments # inline comment preserved + assert "comment" not in without_comments + + assert remove_empty_lines_from_string(without_comments) == "x=1\n#!/bin/bash\n echo hi # inline"