diff --git a/other/materials_designer/workflows/total_energy.ipynb b/other/materials_designer/workflows/total_energy.ipynb new file mode 100644 index 00000000..7f5fefc0 --- /dev/null +++ b/other/materials_designer/workflows/total_energy.ipynb @@ -0,0 +1,597 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Total Energy\n", + "\n", + "Calculate the total energy of a material using a DFT workflow on the Mat3ra platform.\n", + "\n", + "

Usage

\n", + "\n", + "1. Set material and calculation parameters in cell 1.2. below (or use the default values).\n", + "1. Click \"Run\" > \"Run All\" to run all cells.\n", + "1. Wait for the job to complete.\n", + "1. Scroll down to view the result.\n", + "\n", + "## Summary\n", + "\n", + "1. Set up the environment and parameters: install packages (JupyterLite only) and configure parameters for material, workflow, compute resources, and job.\n", + "1. Authenticate and initialize API client: authenticate via browser, initialize the client, then select account and project.\n", + "1. Create material: materials are read from the `../uploads` folder — place files there manually or run a material creation notebook first. If the material is not found by name, Standata is used as a fallback. The material is then saved to the platform.\n", + "1. Create workflow and set its parameters: select application, load total energy workflow from Standata, optionally add relaxation or adjust model/method parameters, and save the workflow to the platform.\n", + "1. Configure compute: get list of clusters and create compute configuration with selected cluster, queue, and number of processors.\n", + "1. Create the job with material and workflow configuration: assemble the job from material, workflow, project, and compute configuration.\n", + "1. Submit the job and monitor the status: submit the job and wait for completion.\n", + "1. Retrieve results: get and display the properies." + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## 1. Set up the environment and parameters\n", + "### 1.1. Install packages (JupyterLite)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if sys.platform == \"emscripten\":\n", + " import micropip\n", + "\n", + " await micropip.install(\"mat3ra-api-examples\", deps=False)\n", + " await micropip.install(\"mat3ra-utils\")\n", + " from mat3ra.utils.jupyterlite.packages import install_packages\n", + "\n", + " await install_packages(\"api_examples\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "### 1.2. Set parameters and configurations for the workflow and job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from mat3ra.ide.compute import QueueName\n", + "\n", + "# 2. Auth and organization parameters\n", + "# Set organization name to use it as the owner, otherwise your personal account is used\n", + "ORGANIZATION_NAME = None\n", + "\n", + "# 3. Material parameters\n", + "FOLDER = \"../uploads\"\n", + "MATERIAL_NAME = \"Silicon\" # Name of the material to load from local file or Standata\n", + "\n", + "# 4. Workflow parameters\n", + "WORKFLOW_SEARCH_TERM = \"total_energy.json\" # Search term for Workflows Standata\n", + "APPLICATION_NAME = \"espresso\" # Specify application name (e.g., \"espresso\", \"vasp\", \"nwchem\")\n", + "ADD_RELAXATION = True # Whether to add relaxation subworkflow as first unit\n", + "\n", + "RELAXATION_KGRID = [1, 1, 1]\n", + "SCF_KGRID = [1, 1, 1]\n", + "\n", + "MY_WORKFLOW_NAME = \"Total Energy\" + (\" (relax)\" if ADD_RELAXATION else \"\")\n", + "\n", + "# 5. Compute parameters\n", + "CLUSTER_NAME = None # specify full or partial name i.e. \"cluster-001\" to select\n", + "QUEUE_NAME = QueueName.D\n", + "PPN = 1\n", + "\n", + "# 6. Job parameters\n", + "timestamp = datetime.now().strftime(\"%Y-%m-%d %H:%M\")\n", + "POLL_INTERVAL = 30 # seconds\n" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## 2. Authenticate and initialize API client\n", + "### 2.1. Authenticate\n", + "Authenticate in the browser and have credentials stored in environment variable \"OIDC_ACCESS_TOKEN\".\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.auth import authenticate\n", + "\n", + "\n", + "await authenticate()" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "### 2.2. Initialize API Client\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.api_client import APIClient\n", + "\n", + "client = APIClient.authenticate()\n", + "client" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "### 2.3. Select account to work under" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "client.list_accounts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "selected_account = client.my_account\n", + "\n", + "if ORGANIZATION_NAME:\n", + " selected_account = client.get_account(name=ORGANIZATION_NAME)\n", + "\n", + "ACCOUNT_ID = selected_account.id\n", + "print(f\"✅ Selected account ID: {ACCOUNT_ID}, name: {selected_account.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "### 2.4. Select project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "projects = client.projects.list({\"isDefault\": True, \"owner._id\": ACCOUNT_ID})\n", + "project_id = projects[0][\"_id\"]\n", + "print(f\"✅ Using project: {projects[0]['name']} ({project_id})\")" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "## 3. Create material\n", + "### 3.1. Load material from local file (or Standata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.made.material import Material\n", + "from mat3ra.standata.materials import Materials\n", + "from utils.visualize import visualize_materials as visualize\n", + "from utils.jupyterlite import load_material_from_folder\n", + "\n", + "material = load_material_from_folder(FOLDER, MATERIAL_NAME) or Material.create(\n", + " Materials.get_by_name_first_match(MATERIAL_NAME))\n", + "\n", + "visualize(material)" + ] + }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "### 3.2. Save material to the platform" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.api import get_or_create_material\n", + "\n", + "saved_material_response = get_or_create_material(client, material, ACCOUNT_ID)\n", + "saved_material = Material.create(saved_material_response)" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "## 4. Create workflow and set its parameters\n", + "### 4.1. Get list of applications and select one" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.standata.applications import ApplicationStandata\n", + "from mat3ra.ade.application import Application\n", + "\n", + "app_config = ApplicationStandata.get_by_name_first_match(APPLICATION_NAME)\n", + "app = Application(**app_config)\n", + "print(f\"Using application: {app.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "### 4.2. Create workflow from standard workflows and preview it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.standata.workflows import WorkflowStandata\n", + "from mat3ra.wode.workflows import Workflow\n", + "from utils.visualize import visualize_workflow\n", + "\n", + "workflow_config = WorkflowStandata.filter_by_application(app.name).get_by_name_first_match(WORKFLOW_SEARCH_TERM)\n", + "workflow = Workflow.create(workflow_config)\n", + "workflow.name = MY_WORKFLOW_NAME\n", + "\n", + "visualize_workflow(workflow)" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "### 4.3. Modify workflow (Optional)\n", + "#### 4.3.1. Add relaxation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "if ADD_RELAXATION:\n", + " workflow.add_relaxation()\n" + ] + }, + { + "cell_type": "markdown", + "id": "24", + "metadata": {}, + "source": [ + "#### 4.3.2. Modify model and method parameters (Optional)\n", + "Uncomment the code below and adjust selection of model parameters as needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "# # Example: Change model parameters\n", + "#\n", + "# from mat3ra.mode import Model\n", + "# from mat3ra.standata.model_tree import ModelTreeStandata\n", + "#\n", + "# model_config = ModelTreeStandata.get_model_by_parameters(\n", + "# type=\"dft\",\n", + "# subtype=\"gga\",\n", + "# functional=\"pbe\"\n", + "# )\n", + "# model_config[\"method\"] = {\"type\": \"pseudopotential\", \"subtype\": \"us\"}\n", + "# model = Model.create(model_config)\n", + "#\n", + "# for subworkflow in workflow.subworkflows:\n", + "# subworkflow.model = model\n", + "#\n", + "# if ADD_RELAXATION:\n", + "# # If relaxation subworkflow is added, set the same model for it\n", + "# workflow.relaxation_subworkflow.model = model\n", + "#\n", + "# # Preview modified workflow\n", + "# visualize_workflow(workflow)" + ] + }, + { + "cell_type": "markdown", + "id": "26", + "metadata": {}, + "source": [ + "#### 4.3.3. Modify important settings\n", + "Set k-grid." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.wode.context.providers import PointsGridDataProvider\n", + "\n", + "new_context_relax = PointsGridDataProvider(dimensions=RELAXATION_KGRID, isEdited=True).yield_data() if ADD_RELAXATION else None\n", + "new_context_scf = PointsGridDataProvider(dimensions=SCF_KGRID, isEdited=True).yield_data()\n", + "\n", + "if ADD_RELAXATION:\n", + " relaxation_subworkflow = workflow.subworkflows[0]\n", + " unit_to_modify_relax = relaxation_subworkflow.get_unit_by_name(name_regex=\"relax\")\n", + " unit_to_modify_relax.add_context(new_context_relax)\n", + " relaxation_subworkflow.set_unit(unit_to_modify_relax)\n", + "\n", + "band_gap_subworkflow = workflow.subworkflows[1 if ADD_RELAXATION else 0]\n", + "unit_to_modify_scf = band_gap_subworkflow.get_unit_by_name(name=\"pw_scf\")\n", + "unit_to_modify_scf.add_context(new_context_scf)\n", + "band_gap_subworkflow.set_unit(unit_to_modify_scf)\n", + "\n", + "# Preview modified workflow\n", + "visualize_workflow(workflow)" + ] + }, + { + "cell_type": "markdown", + "id": "28", + "metadata": {}, + "source": [ + "### 4.4. Save workflow to collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.generic import dict_to_namespace\n", + "from utils.api import get_or_create_workflow\n", + "\n", + "workflow_id_or_dict = None\n", + "\n", + "saved_workflow_response = get_or_create_workflow(client, workflow, ACCOUNT_ID)\n", + "saved_workflow = Workflow.create(saved_workflow_response)\n", + "print(f\"Workflow ID: {saved_workflow.id}\")" + ] + }, + { + "cell_type": "markdown", + "id": "30", + "metadata": {}, + "source": [ + "## 5. Create the compute configuration\n", + "### 5.1. Get list of clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31", + "metadata": {}, + "outputs": [], + "source": [ + "clusters = client.clusters.list()\n", + "print(f\"Available clusters: {[c['hostname'] for c in clusters]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "32", + "metadata": {}, + "source": [ + "### 5.2. Create compute configuration for the job\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.ide.compute import Compute\n", + "\n", + "# Select cluster: use specified name if provided, otherwise use first available\n", + "if CLUSTER_NAME:\n", + " cluster = next((c for c in clusters if CLUSTER_NAME in c[\"hostname\"]), None)\n", + "else:\n", + " cluster = clusters[0]\n", + "\n", + "compute = Compute(\n", + " cluster=cluster,\n", + " queue=QUEUE_NAME,\n", + " ppn=PPN\n", + ")\n", + "print(f\"Using cluster: {compute.cluster.hostname}, queue: {QUEUE_NAME}, ppn: {PPN}\")" + ] + }, + { + "cell_type": "markdown", + "id": "34", + "metadata": {}, + "source": [ + "## 6. Create the job with material and workflow configuration\n", + "### 6.1. Create job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.api import create_job\n", + "from utils.visualize import display_JSON\n", + "\n", + "print(f\"Material: {saved_material.id}\")\n", + "print(f\"Workflow: {saved_workflow.id}\")\n", + "print(f\"Project: {project_id}\")\n", + "\n", + "job_name = MY_WORKFLOW_NAME + \" \" + saved_material.formula + \" \" + timestamp\n", + "job_response = create_job(\n", + " api_client=client,\n", + " materials=[saved_material],\n", + " workflow=saved_workflow,\n", + " project_id=project_id,\n", + " owner_id=ACCOUNT_ID,\n", + " prefix=job_name,\n", + " compute=compute.to_dict()\n", + ")\n", + "\n", + "job = dict_to_namespace(job_response)\n", + "job_id = job._id\n", + "print(\"✅ Job created successfully!\")\n", + "print(f\"Job ID: {job_id}\")\n", + "display_JSON(job_response)" + ] + }, + { + "cell_type": "markdown", + "id": "36", + "metadata": {}, + "source": [ + "## 7. Submit the job and monitor the status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37", + "metadata": {}, + "outputs": [], + "source": [ + "client.jobs.submit(job_id)\n", + "print(f\"✅ Job {job_id} submitted successfully!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.api import wait_for_jobs_to_finish_async\n", + "\n", + "await wait_for_jobs_to_finish_async(client.jobs, [job_id], poll_interval=POLL_INTERVAL)" + ] + }, + { + "cell_type": "markdown", + "id": "39", + "metadata": {}, + "source": [ + "## 8. Retrieve results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.prode import PropertyName\n", + "from utils.visualize import visualize_properties\n", + "\n", + "property_data = client.properties.get_for_job(job_id, property_name=PropertyName.scalar.total_energy.value)\n", + "visualize_properties(property_data, title=\"Total Energy\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/utils/api.py b/utils/api.py index a42a2944..6eae007e 100644 --- a/utils/api.py +++ b/utils/api.py @@ -3,11 +3,14 @@ import os import time import urllib.request -from typing import List +from typing import List, Optional, Union +from mat3ra.api_client import APIClient from mat3ra.api_client.endpoints.bank_workflows import BankWorkflowEndpoints from mat3ra.api_client.endpoints.jobs import JobEndpoints from mat3ra.api_client.endpoints.properties import PropertiesEndpoints +from mat3ra.made.material import Material +from mat3ra.wode import Workflow from tabulate import tabulate @@ -123,3 +126,102 @@ def get_property_by_subworkflow_and_unit_indicies( def get_cluster_name(name: str = "cluster-001") -> str: clusters = json.loads(os.environ.get("CLUSTERS", "[]") or "[]") return clusters[0] if clusters else name + + +def get_or_create_material(api_client: APIClient, material, owner_id: str) -> dict: + """ + Returns an existing material from the collection if one with the same structural hash + exists under the given owner, otherwise creates a new one. + Uses the client-side hash (mat3ra-made Material.hash) to avoid unnecessary DB writes. + + Args: + api_client (APIClient): API client instance carrying the authorization context. + material: mat3ra-made Material object (must have a .hash property). + owner_id (str): Account ID under which to search and create. + + Returns: + dict: The material dict (existing or newly created). + """ + existing = api_client.materials.list({"hash": material.hash, "owner._id": owner_id}) + if existing: + print(f"♻️ Reusing already existing Material: {existing[0]['_id']}") + return existing[0] + created = api_client.materials.create(material.to_dict(), owner_id=owner_id) + print(f"✅ Material created: {created['_id']}") + return created + + +def get_or_create_workflow(api_client: APIClient, workflow, owner_id: str) -> dict: + """ + Creates a workflow from the given mat3ra-wode Workflow object if a workflow doesn't exist. + Returns an existing workflow from the collection if one with the same hash exists under the given owner. + Important settings are preserved on the workflow. + + Args: + api_client (APIClient): API client instance carrying the authorization context. + workflow: mat3ra-wode Workflow object with a .to_dict() method. + owner_id (str): Account ID under which to search and create. + + Returns: + dict: The workflow dict (existing or newly created). + """ + existing = api_client.workflows.list({"hash": workflow.hash, "owner._id": owner_id}) + if existing: + print(f"♻️ Reusing already existing Workflow: {existing[0]['_id']}") + # We only add reference to the existing workflow ID, keeping any client changes to the WF + workflow.id = existing[0]["id"] + return workflow + created = api_client.workflows.create(workflow.to_dict(), owner_id=owner_id) + print(f"✅ Workflow created: {created['_id']}") + return created + + +def create_job( + api_client: APIClient, + materials: List[Union[dict, Material]], + workflow: Union[dict, Workflow], + project_id: str, + owner_id: str, + prefix: str, + compute: Optional[dict] = None, +) -> List[dict]: + """ + Creates jobs for each material using either collection references or an embedded workflow. + + Args: + api_client (APIClient): API client instance carrying the authorization context. + materials (list): List of material dicts or mat3ra-made Material objects to create jobs for. + workflow: Workflow dictionaru or mat3ra-wode Workflow object to use for the jobs. + project_id (str): Project ID. + owner_id (str): Account ID. + prefix (str): Job name prefix. + compute (dict, optional): Compute configuration dict. + + Returns: + list[dict]: List of created job dicts. + """ + material_dicts = [] + for material in materials: + if isinstance(material, Material): + material_dicts.append(material.to_dict()) + else: + material_dicts.append(material) + + workflow_dict = workflow.to_dict() if isinstance(workflow, Workflow) else workflow + is_multimaterial = workflow_dict.get("isMultimaterial", False) + + config = { + "_project": {"_id": project_id}, + "workflow": workflow_dict, + "owner": {"_id": owner_id}, + "name": prefix, + } + + if is_multimaterial: + config["_materials"] = [{"_id": mid} for mid in {md["_id"] for md in material_dicts}] + else: + config["_material"] = {"_id": material_dicts[0]["_id"]} + + if compute: + config["compute"] = compute + return api_client.jobs.create(config)