Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/py/mat3ra/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__path__ = __import__("pkgutil").extend_path(__path__, __name__)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests??

3 changes: 3 additions & 0 deletions src/py/mat3ra/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from .search import find_by_key_or_regex
from .string import camel_to_snake, snake_to_camel

from .object import calculate_hash_from_object, remove_timestampable_keys, sort_keys_deep
from .string import remove_comments_from_source_code, remove_empty_lines_from_string
27 changes: 27 additions & 0 deletions src/py/mat3ra/utils/object.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import hashlib
import json
from typing import Any, Dict, List, Optional

Expand All @@ -22,6 +23,32 @@ def clone_deep(obj: Any) -> Any:
return copy.deepcopy(obj)


def sort_keys_deep(obj: Any) -> Any:
"""Recursively sort object keys alphabetically."""
if callable(getattr(obj, "model_dump", None)):
# Match JS behavior:
# - include explicit nulls (Python `None`) if provided
# - exclude fields that were never set (so defaults don't affect hashes)
return sort_keys_deep(obj.model_dump(mode="json", exclude_unset=True))
if isinstance(obj, list):
return [sort_keys_deep(item) for item in obj]
if isinstance(obj, dict):
return {k: sort_keys_deep(obj[k]) for k in sorted(obj.keys())}
return obj


def calculate_hash_from_object(obj: Any) -> str:
"""MD5 of JSON.stringify(sort_keys_deep(obj))."""
# JS JSON.stringify does not ASCII-escape Unicode characters.
message = json.dumps(sort_keys_deep(obj), separators=(",", ":"), ensure_ascii=False)
return hashlib.md5(message.encode()).hexdigest()


def remove_timestampable_keys(config: Dict[str, Any]) -> Dict[str, Any]:
"""Removes createdAt, updatedAt, removedAt."""
return {k: v for k, v in config.items() if k not in ("createdAt", "updatedAt", "removedAt")}


def get(config: Dict, path: str = "", separator: str = "/") -> Any:
"""
Get value by deep/nested path with separator "/ or "."
Expand Down
10 changes: 10 additions & 0 deletions src/py/mat3ra/utils/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,13 @@ def snake_to_camel(snake_case_str: str) -> str:
"""
parts = snake_case_str.split("_")
return "".join(x.title() for x in parts)


def remove_comments_from_source_code(text: str, language: str = "shell") -> str:
"""Removes lines starting with # (except shebang)."""
return re.sub(r"^(\s+)?#(?!!).*$", "", text, flags=re.MULTILINE)


def remove_empty_lines_from_string(text: str) -> str:
"""Removes empty lines and trims."""
return re.sub(r"^\s*[\r\n]", "", text, flags=re.MULTILINE).strip()
54 changes: 54 additions & 0 deletions tests/py/unit/test_hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pytest
from pydantic import BaseModel
from typing import Optional

from mat3ra.utils.object import (
calculate_hash_from_object,
remove_timestampable_keys,
sort_keys_deep,
)
from mat3ra.utils.string import remove_comments_from_source_code, remove_empty_lines_from_string


@pytest.mark.parametrize(
"obj,expected",
[
({"b": 2, "a": 1}, "608de49a4600dbb5b173492759792e4a"),
({"z": {"b": 2, "a": 1}, "a": [{"d": 4, "c": 3}, 2]}, "a3f68f53e4bcec2a1b1a058b4a5c12ba"),
({"a": None, "b": 1}, "b0ef272d275f68e05645af0b4fac87da"),
({"msg": "Δ", "a": 1}, "f838bafaab72679f8607ebc879ab24a7"),
],
)
def test_calculate_hash_from_object_matches_js(obj, expected):
assert calculate_hash_from_object(obj) == expected


def test_sort_keys_deep_sorts_dict_keys_recursively():
assert sort_keys_deep({"b": 2, "a": 1, "z": {"b": 2, "a": 1}}) == {"a": 1, "b": 2, "z": {"a": 1, "b": 2}}


def test_sort_keys_deep_pydantic_exclude_unset_include_none():
class Model(BaseModel):
a: int = 1
b: Optional[int] = None
c: Optional[int] = None

m = Model(c=None)
assert sort_keys_deep(m) == {"c": None}
assert calculate_hash_from_object(m) == "adcbf5dd65a518bdff3a02349d151b25"


def test_remove_timestampable_keys():
assert remove_timestampable_keys(
{"a": 1, "createdAt": "x", "updatedAt": "y", "removedAt": "z"}
) == {"a": 1}


def test_comment_and_empty_line_stripping_matches_js():
text = "# comment\n\nx=1\n # indented\n#!/bin/bash\n echo hi # inline\n"
without_comments = remove_comments_from_source_code(text)
assert "#!/" in without_comments # shebang preserved
assert "echo hi # inline" in without_comments # inline comment preserved
assert "comment" not in without_comments

assert remove_empty_lines_from_string(without_comments) == "x=1\n#!/bin/bash\n echo hi # inline"
Loading