From 6599f13d8ba131d036ae31407af02e6b2a49299b Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Thu, 26 Feb 2026 16:28:36 -1000 Subject: [PATCH 1/2] tmp commit --- .../translators_loggers/Dockerfile.streamflow | 47 +++++++++++++++++++ .../test_translators_loggers.py | 41 +++++++++++----- 2 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 tests/translators_loggers/Dockerfile.streamflow diff --git a/tests/translators_loggers/Dockerfile.streamflow b/tests/translators_loggers/Dockerfile.streamflow new file mode 100644 index 00000000..636fc637 --- /dev/null +++ b/tests/translators_loggers/Dockerfile.streamflow @@ -0,0 +1,47 @@ +# docker build --platform amd64 -t wfcommons-dev-streamflow -f Dockerfile.streamflow . +# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-streamflow /bin/bash + +FROM alphaunito/streamflow + +LABEL org.containers.image.authors="henric@hawaii.edu" + +# update repositories +RUN apt-get update + +# set timezone +RUN echo "America/Los_Angeles" > /etc/timezone && export DEBIAN_FRONTEND=noninteractive && apt-get install -y tzdata + +# install useful stuff +RUN apt-get -y install pkg-config +RUN apt-get -y install git +RUN apt-get -y install wget +RUN apt-get -y install make +RUN apt-get -y install cmake +RUN apt-get -y install cmake-data +RUN apt-get -y install sudo +RUN apt-get -y install vim --fix-missing +RUN apt-get -y install gcc +RUN apt-get -y install gcc-multilib + +# Python stuff +RUN apt-get -y install python3 python3-pip +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 +RUN python3 -m pip install --break-system-packages pathos pandas filelock +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests +RUN python3 -m pip install --break-system-packages --upgrade setuptools + +# Stress-ng +RUN apt-get -y install stress-ng + +# Add wfcommons user +RUN useradd -ms /bin/bash wfcommons +RUN adduser wfcommons sudo +RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers +ENV PATH="$PATH:/home/wfcommons/.local/bin/" + +USER wfcommons +WORKDIR /home/wfcommons +# Making this directory world rwx to facilitate testing +RUN chmod -R 777 /home/wfcommons + diff --git a/tests/translators_loggers/test_translators_loggers.py b/tests/translators_loggers/test_translators_loggers.py index 135a6a4b..4baa6c84 100644 --- a/tests/translators_loggers/test_translators_loggers.py +++ b/tests/translators_loggers/test_translators_loggers.py @@ -114,6 +114,7 @@ def _additional_setup_swiftt(container): "taskvine": _additional_setup_taskvine, "makeflow": noop, "cwl": noop, + "streamflow": noop, "pegasus": _additional_setup_pegasus, "swiftt": _additional_setup_swiftt, } @@ -189,6 +190,19 @@ def run_workflow_cwl(container, num_tasks, str_dirpath): # and there is a 2* because there is a message for the job and for the step) assert (output.decode().count("completed success") == 3 + 2 *num_tasks) +def run_workflow_streamflow(container, num_tasks, str_dirpath): + # Run the workflow! + # Note that the input file is hardcoded and Blast-specific + sys.stderr.write("TODO: RUN THE STREAMFLOW WORKFLOW!!!") + time.sleep(100000) + # exit_code, output = container.exec_run(cmd="cwltool ./main.cwl --split_fasta_00000001_input ./data/workflow_infile_0001 ", + # user="wfcommons", stdout=True, stderr=True) + # # Check sanity + # assert (exit_code == 0) + # # this below is ugly (the 3 is for "workflow", "compile_output_files" and "compile_log_files", + # # and there is a 2* because there is a message for the job and for the step) + # assert (output.decode().count("completed success") == 3 + 2 *num_tasks) + def run_workflow_pegasus(container, num_tasks, str_dirpath): # Run the workflow! exit_code, output = container.exec_run(cmd="bash /home/wfcommons/run_workflow.sh", @@ -217,6 +231,7 @@ def run_workflow_swiftt(container, num_tasks, str_dirpath): "taskvine": run_workflow_taskvine, "makeflow": run_workflow_makeflow, "cwl": run_workflow_cwl, + "streamflow": run_workflow_streamflow, "pegasus": run_workflow_pegasus, "swiftt": run_workflow_swiftt, } @@ -231,6 +246,7 @@ def run_workflow_swiftt(container, num_tasks, str_dirpath): "taskvine": TaskVineTranslator, "makeflow": MakeflowTranslator, "cwl": CWLTranslator, + "streamflow": CWLTranslator, "pegasus": PegasusTranslator, "swiftt": SwiftTTranslator, } @@ -241,17 +257,18 @@ class TestTranslators: @pytest.mark.parametrize( "backend", [ - "swiftt", - "dask", - "parsl", - "nextflow", - "nextflow_subworkflow", - "airflow", - "bash", - "taskvine", - "makeflow", - "cwl", - "pegasus", + # "swiftt", + # "dask", + # "parsl", + # "nextflow", + # "nextflow_subworkflow", + # "airflow", + # "bash", + # "taskvine", + # "makeflow", + # "cwl", + "streamflow", + # "pegasus", ]) @pytest.mark.unit # @pytest.mark.skip(reason="tmp") @@ -305,6 +322,8 @@ def test_translator(self, backend) -> None: parser = TaskVineLogsParser(dirpath / "vine-run-info/most-recent/vine-logs", filenames_to_ignore=["cpu-benchmark","stress-ng", "wfbench"]) elif backend == "makeflow": parser = MakeflowLogsParser(execution_dir = dirpath, resource_monitor_logs_dir = dirpath / "monitor_data/") + elif backend == "streamflow": + parsed = else: parser = None From 72914684e6b78f1e52edf7f1aee662fbe6954438 Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Fri, 27 Feb 2026 16:26:29 -1000 Subject: [PATCH 2/2] Implemented a Streamflow Executor Modified the CWL executor so that it's Streamflow/RO-Crate friendly Updated Dockerfiles (images have been pushed) Minor fixes here and there --- pyproject.toml | 1 + tests/translators_loggers/Dockerfile.airflow | 8 +- tests/translators_loggers/Dockerfile.bash | 4 +- tests/translators_loggers/Dockerfile.cwl | 7 +- tests/translators_loggers/Dockerfile.dask | 4 +- tests/translators_loggers/Dockerfile.makeflow | 8 +- tests/translators_loggers/Dockerfile.nextflow | 4 +- tests/translators_loggers/Dockerfile.parsl | 8 +- tests/translators_loggers/Dockerfile.pegasus | 8 +- tests/translators_loggers/Dockerfile.pycompss | 8 +- .../translators_loggers/Dockerfile.streamflow | 12 ++- tests/translators_loggers/Dockerfile.swiftt | 3 +- tests/translators_loggers/Dockerfile.taskvine | 8 +- .../build_docker_docker_images.sh | 2 +- .../test_translators_loggers.py | 58 +++++++------- wfcommons/common/workflow.py | 4 +- wfcommons/wfbench/__init__.py | 13 +++- wfcommons/wfbench/translator/__init__.py | 1 + wfcommons/wfbench/translator/cwl.py | 21 +++++- wfcommons/wfbench/translator/streamflow.py | 75 +++++++++++++++++++ .../translator/templates/cwl/folder.cwl | 2 +- .../translator/templates/cwl/shell.cwl | 13 +++- .../translator/templates/cwl/wfbench.cwl | 42 ----------- .../templates/streamflow/streamflow.yml | 11 +++ wfcommons/wfinstances/logs/taskvine.py | 4 +- 25 files changed, 219 insertions(+), 110 deletions(-) create mode 100644 wfcommons/wfbench/translator/streamflow.py delete mode 100644 wfcommons/wfbench/translator/templates/cwl/wfbench.cwl create mode 100644 wfcommons/wfbench/translator/templates/streamflow/streamflow.yml diff --git a/pyproject.toml b/pyproject.toml index b3ceed01..47a72e57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ license = {text = "GNU General Public License v3 (GPLv3)"} dependencies = [ "jsonschema", "matplotlib", + "pygraphviz", "networkx", "numpy", "pandas", diff --git a/tests/translators_loggers/Dockerfile.airflow b/tests/translators_loggers/Dockerfile.airflow index 1b8e6510..2e433e7f 100644 --- a/tests/translators_loggers/Dockerfile.airflow +++ b/tests/translators_loggers/Dockerfile.airflow @@ -1,5 +1,5 @@ -# docker build -t wfcommons-dev -f Dockerfile.airflow . -# docker run -it --rm -v .:/home/wfcommons/mount wfcommons-dev /bin/bash +# docker build -t wfcommons-dev-airflow -f Dockerfile.airflow . +# docker run -it --rm -v .:/home/wfcommons/mount wfcommons-dev-airflow /bin/bash FROM amd64/ubuntu:noble @@ -21,12 +21,14 @@ RUN apt-get -y install cmake-data RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc +RUN apt-get -y install graphviz libgraphviz-dev + #RUN apt-get -y install gcc-multilib # Python stuff RUN apt-get -y install python3 python3-pip RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 diff --git a/tests/translators_loggers/Dockerfile.bash b/tests/translators_loggers/Dockerfile.bash index 6063921e..d999041b 100644 --- a/tests/translators_loggers/Dockerfile.bash +++ b/tests/translators_loggers/Dockerfile.bash @@ -22,12 +22,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.cwl b/tests/translators_loggers/Dockerfile.cwl index 9f43e5bc..61b37023 100644 --- a/tests/translators_loggers/Dockerfile.cwl +++ b/tests/translators_loggers/Dockerfile.cwl @@ -1,5 +1,5 @@ -# docker build --platform amd64 -t wfcommons-dev-dask -f Dockerfile.dask . -# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-dask /bin/bash +# docker build --platform amd64 -t wfcommons-dev-cwl -f Dockerfile.cwl . +# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-cwl /bin/bash FROM amd64/ubuntu:noble @@ -22,12 +22,13 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.dask b/tests/translators_loggers/Dockerfile.dask index 48b4f9da..5cf570a6 100644 --- a/tests/translators_loggers/Dockerfile.dask +++ b/tests/translators_loggers/Dockerfile.dask @@ -22,12 +22,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.makeflow b/tests/translators_loggers/Dockerfile.makeflow index 3bb8f2d7..05346524 100644 --- a/tests/translators_loggers/Dockerfile.makeflow +++ b/tests/translators_loggers/Dockerfile.makeflow @@ -1,5 +1,5 @@ -# docker build --platform amd64 -t wfcommons-dev -f Dockerfile.parsl . -# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev /bin/bash +# docker build --platform amd64 -t wfcommons-dev-makeflow -f Dockerfile.makeflow . +# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-makeflow /bin/bash FROM amd64/ubuntu:noble @@ -23,12 +23,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.nextflow b/tests/translators_loggers/Dockerfile.nextflow index c13e91df..e70b3cb5 100644 --- a/tests/translators_loggers/Dockerfile.nextflow +++ b/tests/translators_loggers/Dockerfile.nextflow @@ -23,12 +23,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN pip install --break-system-packages pathos pandas filelock -RUN pip install --break-system-packages networkx scipy matplotlib +RUN pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.parsl b/tests/translators_loggers/Dockerfile.parsl index 1bcde454..9b6138fd 100644 --- a/tests/translators_loggers/Dockerfile.parsl +++ b/tests/translators_loggers/Dockerfile.parsl @@ -1,5 +1,5 @@ -# docker build --platform amd64 -t wfcommons-dev -f Dockerfile.parsl . -# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev /bin/bash +# docker build --platform amd64 -t wfcommons-dev-parsl -f Dockerfile.parsl . +# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-parsl /bin/bash FROM amd64/ubuntu:noble @@ -22,12 +22,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.pegasus b/tests/translators_loggers/Dockerfile.pegasus index 921df750..3457f664 100644 --- a/tests/translators_loggers/Dockerfile.pegasus +++ b/tests/translators_loggers/Dockerfile.pegasus @@ -1,5 +1,5 @@ -# docker build --platform amd64 -t wfcommons-dev-dask -f Dockerfile.dask . -# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-dask /bin/bash +# docker build --platform amd64 -t wfcommons-dev-pegasus -f Dockerfile.pegasus . +# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-pegasus /bin/bash FROM amd64/ubuntu:noble @@ -22,12 +22,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.pycompss b/tests/translators_loggers/Dockerfile.pycompss index 8cacf929..fc89390d 100644 --- a/tests/translators_loggers/Dockerfile.pycompss +++ b/tests/translators_loggers/Dockerfile.pycompss @@ -1,5 +1,5 @@ -# docker build --platform amd64 -t wfcommons-dev-dask -f Dockerfile.dask . -# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-dask /bin/bash +# docker build --platform amd64 -t wfcommons-dev-pycompss -f Dockerfile.pycompss . +# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-pycompss /bin/bash FROM compss/pycompss @@ -22,12 +22,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install pathos pandas filelock -RUN python3 -m pip install networkx scipy matplotlib +RUN python3 -m pip install networkx scipy matplotlib pygraphviz RUN python3 -m pip install pyyaml jsonschema requests #RUN python3 -m pip install --upgrade setuptools #RUN python3 -m pip install --upgrade wheels diff --git a/tests/translators_loggers/Dockerfile.streamflow b/tests/translators_loggers/Dockerfile.streamflow index 636fc637..26a340f4 100644 --- a/tests/translators_loggers/Dockerfile.streamflow +++ b/tests/translators_loggers/Dockerfile.streamflow @@ -1,7 +1,8 @@ # docker build --platform amd64 -t wfcommons-dev-streamflow -f Dockerfile.streamflow . # docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-streamflow /bin/bash -FROM alphaunito/streamflow +# FROM alphaunito/streamflow +FROM amd64/ubuntu:noble LABEL org.containers.image.authors="henric@hawaii.edu" @@ -22,18 +23,25 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools # Stress-ng RUN apt-get -y install stress-ng +# Streamflow +RUN apt-get -y install nodejs +RUN python3 -m pip install --break-system-packages streamflow==0.2.0.dev14 + # Add wfcommons user RUN useradd -ms /bin/bash wfcommons RUN adduser wfcommons sudo diff --git a/tests/translators_loggers/Dockerfile.swiftt b/tests/translators_loggers/Dockerfile.swiftt index fddac958..98dc56a6 100644 --- a/tests/translators_loggers/Dockerfile.swiftt +++ b/tests/translators_loggers/Dockerfile.swiftt @@ -22,12 +22,13 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/Dockerfile.taskvine b/tests/translators_loggers/Dockerfile.taskvine index ab80f08a..7ca94ef4 100644 --- a/tests/translators_loggers/Dockerfile.taskvine +++ b/tests/translators_loggers/Dockerfile.taskvine @@ -1,5 +1,5 @@ -# docker build --platform amd64 -t wfcommons-dev -f Dockerfile.parsl . -# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev /bin/bash +# docker build --platform amd64 -t wfcommons-dev-taskvine -f Dockerfile.taskvine . +# docker run -it --rm -v `pwd`:/home/wfcommons wfcommons-dev-taskvine /bin/bash FROM amd64/ubuntu:noble @@ -23,12 +23,14 @@ RUN apt-get -y install sudo RUN apt-get -y install vim --fix-missing RUN apt-get -y install gcc RUN apt-get -y install gcc-multilib +RUN apt-get -y install graphviz libgraphviz-dev + # Python stuff RUN apt-get -y install python3 python3-pip RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 RUN python3 -m pip install --break-system-packages pathos pandas filelock -RUN python3 -m pip install --break-system-packages networkx scipy matplotlib +RUN python3 -m pip install --break-system-packages networkx scipy matplotlib pygraphviz RUN python3 -m pip install --break-system-packages pyyaml jsonschema requests RUN python3 -m pip install --break-system-packages --upgrade setuptools diff --git a/tests/translators_loggers/build_docker_docker_images.sh b/tests/translators_loggers/build_docker_docker_images.sh index 23986a54..bac4b5ea 100755 --- a/tests/translators_loggers/build_docker_docker_images.sh +++ b/tests/translators_loggers/build_docker_docker_images.sh @@ -2,7 +2,7 @@ set -e -for backend in "dask" "parsl" "nextflow" "airflow" "bash" "taskvine" "makeflow" "cwl" "pegasus" "swiftt"; do +for backend in "dask" "parsl" "nextflow" "airflow" "bash" "taskvine" "makeflow" "cwl" "pegasus" "swiftt" "streamflow"; do echo "Building $backend Docker image..." docker build --platform linux/amd64 -t wfcommons/wfcommons-testing-$backend -f Dockerfile.$backend . done diff --git a/tests/translators_loggers/test_translators_loggers.py b/tests/translators_loggers/test_translators_loggers.py index 4baa6c84..5c633a37 100644 --- a/tests/translators_loggers/test_translators_loggers.py +++ b/tests/translators_loggers/test_translators_loggers.py @@ -34,12 +34,14 @@ from wfcommons.wfbench import TaskVineTranslator from wfcommons.wfbench import MakeflowTranslator from wfcommons.wfbench import CWLTranslator +from wfcommons.wfbench import StreamflowTranslator from wfcommons.wfbench import PegasusTranslator from wfcommons.wfbench import SwiftTTranslator from wfcommons.wfinstances import PegasusLogsParser from wfcommons.wfinstances.logs import TaskVineLogsParser from wfcommons.wfinstances.logs import MakeflowLogsParser +from wfcommons.wfinstances.logs import ROCrateLogsParser def _create_workflow_benchmark() -> (WorkflowBenchmark, int): @@ -193,15 +195,20 @@ def run_workflow_cwl(container, num_tasks, str_dirpath): def run_workflow_streamflow(container, num_tasks, str_dirpath): # Run the workflow! # Note that the input file is hardcoded and Blast-specific - sys.stderr.write("TODO: RUN THE STREAMFLOW WORKFLOW!!!") - time.sleep(100000) - # exit_code, output = container.exec_run(cmd="cwltool ./main.cwl --split_fasta_00000001_input ./data/workflow_infile_0001 ", - # user="wfcommons", stdout=True, stderr=True) - # # Check sanity - # assert (exit_code == 0) - # # this below is ugly (the 3 is for "workflow", "compile_output_files" and "compile_log_files", - # # and there is a 2* because there is a message for the job and for the step) - # assert (output.decode().count("completed success") == 3 + 2 *num_tasks) + exit_code, output = container.exec_run(cmd="streamflow run ./streamflow.yml", + user="wfcommons", stdout=True, stderr=True) + # print(output.decode()) + # Check sanity + assert (exit_code == 0) + # 2 extra "COMPLETED Step" ("COMPLETED Step /compile_output_files", "COMPLETED Step /compile_log_files") + assert (output.decode().count("COMPLETED Step") == num_tasks + 2) + + # Generate RO-Crate now that the workflow has completed (Fails for now) + exit_code, output = container.exec_run(cmd="streamflow list", + user="wfcommons", stdout=True, stderr=True) + uuid = output.decode().splitlines()[1].strip().split(" ")[0] + exit_code, output = container.exec_run(cmd=f"streamflow prov {uuid}", + user="wfcommons", stdout=True, stderr=True) def run_workflow_pegasus(container, num_tasks, str_dirpath): # Run the workflow! @@ -246,7 +253,7 @@ def run_workflow_swiftt(container, num_tasks, str_dirpath): "taskvine": TaskVineTranslator, "makeflow": MakeflowTranslator, "cwl": CWLTranslator, - "streamflow": CWLTranslator, + "streamflow": StreamflowTranslator, "pegasus": PegasusTranslator, "swiftt": SwiftTTranslator, } @@ -257,18 +264,18 @@ class TestTranslators: @pytest.mark.parametrize( "backend", [ - # "swiftt", - # "dask", - # "parsl", - # "nextflow", - # "nextflow_subworkflow", - # "airflow", - # "bash", - # "taskvine", - # "makeflow", - # "cwl", + "swiftt", + "dask", + "parsl", + "nextflow", + "nextflow_subworkflow", + "airflow", + "bash", + "taskvine", + "makeflow", + "cwl", "streamflow", - # "pegasus", + "pegasus", ]) @pytest.mark.unit # @pytest.mark.skip(reason="tmp") @@ -316,18 +323,17 @@ def test_translator(self, backend) -> None: stdout=True, stderr=True) # Run the log parser if any + parser = None if backend == "pegasus": parser = PegasusLogsParser(dirpath / "work/wfcommons/pegasus/Blast-Benchmark/run0001/") elif backend == "taskvine": parser = TaskVineLogsParser(dirpath / "vine-run-info/most-recent/vine-logs", filenames_to_ignore=["cpu-benchmark","stress-ng", "wfbench"]) elif backend == "makeflow": parser = MakeflowLogsParser(execution_dir = dirpath, resource_monitor_logs_dir = dirpath / "monitor_data/") - elif backend == "streamflow": - parsed = - else: - parser = None + # elif backend == "streamflow": + # parser =ROCrateLogsParser(dirpath / "work/wfcommons/most-recent/wfbench") - if parser: + if parser is not None: sys.stderr.write(f"[{backend}] Parsing the logs...\n") reconstructed_workflow : Workflow = parser.build_workflow(f"reconstructed_workflow_{backend}") reconstructed_workflow.write_json(pathlib.Path("/tmp/reconstructed_workflow.json")) diff --git a/wfcommons/common/workflow.py b/wfcommons/common/workflow.py index 3a0af603..947e6e96 100644 --- a/wfcommons/common/workflow.py +++ b/wfcommons/common/workflow.py @@ -259,8 +259,8 @@ def to_nx_digraph(self) -> nx.DiGraph: self.write_json(pathlib.Path(temp.name)) return create_graph(pathlib.Path(temp.name)) - def roots(self) -> List[Task]: + def roots(self) -> List[str]: return [n for n,d in self.in_degree() if d==0] - def leaves(self) -> List[Task]: + def leaves(self) -> List[str]: return [n for n,d in self.out_degree() if d==0] diff --git a/wfcommons/wfbench/__init__.py b/wfcommons/wfbench/__init__.py index 4bcfd734..42d62c75 100644 --- a/wfcommons/wfbench/__init__.py +++ b/wfcommons/wfbench/__init__.py @@ -9,4 +9,15 @@ # (at your option) any later version. from .bench import WorkflowBenchmark -from .translator import AirflowTranslator, DaskTranslator, NextflowTranslator, ParslTranslator, PegasusTranslator, SwiftTTranslator, TaskVineTranslator, MakeflowTranslator, CWLTranslator, BashTranslator, PyCompssTranslator +from .translator import (AirflowTranslator, + BashTranslator, + DaskTranslator, + NextflowTranslator, + ParslTranslator, + PegasusTranslator, + SwiftTTranslator, + TaskVineTranslator, + MakeflowTranslator, + CWLTranslator, + StreamflowTranslator, + PyCompssTranslator) diff --git a/wfcommons/wfbench/translator/__init__.py b/wfcommons/wfbench/translator/__init__.py index ee7231e5..1e3b2a15 100644 --- a/wfcommons/wfbench/translator/__init__.py +++ b/wfcommons/wfbench/translator/__init__.py @@ -11,6 +11,7 @@ from .airflow import AirflowTranslator from .bash import BashTranslator from .cwl import CWLTranslator +from .streamflow import StreamflowTranslator from .dask import DaskTranslator from .nextflow import NextflowTranslator from .parsl import ParslTranslator diff --git a/wfcommons/wfbench/translator/cwl.py b/wfcommons/wfbench/translator/cwl.py index 1f0c65ea..1374fd02 100644 --- a/wfcommons/wfbench/translator/cwl.py +++ b/wfcommons/wfbench/translator/cwl.py @@ -17,7 +17,7 @@ from collections import defaultdict, deque from .abstract_translator import Translator -from ...common import Workflow +from ...common import Workflow, Task, File this_dir = pathlib.Path(__file__).resolve().parent @@ -35,7 +35,7 @@ def __init__(self, workflow: Union[Workflow, pathlib.Path], logger: Optional[logging.Logger] = None) -> None: super().__init__(workflow, logger) - self.cwl_script = ["cwlVersion: v1.2", + self.cwl_script = ["cwlVersion: v1.0", "class: Workflow", "requirements:", " MultipleInputFeatureRequirement: {}", @@ -77,7 +77,7 @@ def translate(self, output_folder: pathlib.Path) -> None: # Parsing the inputs and outputs of the workflow self._parse_inputs_outputs() - # Parsing the steos + # Parsing the steps self._parse_steps() # additional files @@ -87,6 +87,9 @@ def translate(self, output_folder: pathlib.Path) -> None: # Writing the CWL files to the output folder self._write_cwl_files(output_folder) + # Write README file + self._write_readme_file(output_folder) + def _parse_steps(self) -> None: self.cwl_script.append("steps:") @@ -223,7 +226,6 @@ def _write_cwl_files(self, output_folder: pathlib.Path) -> None: clt_folder = cwl_folder.joinpath("clt") clt_folder.mkdir(exist_ok=True) - shutil.copy(this_dir.joinpath("templates/cwl/wfbench.cwl"), clt_folder) shutil.copy(this_dir.joinpath("templates/cwl/folder.cwl"), clt_folder) shutil.copy(this_dir.joinpath("templates/cwl/shell.cwl"), clt_folder) @@ -232,3 +234,14 @@ def _write_cwl_files(self, output_folder: pathlib.Path) -> None: with (open(cwl_folder.joinpath("config.yml"), "w", encoding="utf-8")) as f: f.write("\n".join(self.yml_script)) + + def _write_readme_file(self, output_folder: pathlib.Path) -> None: + readme_file_path = output_folder.joinpath("README") + + with open(readme_file_path, "w") as out: + out.write(f"In directory {str(output_folder)}: cwltool ./main.cwl ") + for task_id in self.workflow.roots(): + task = self.workflow.tasks[task_id] + for input_file in task.input_files: + out.write(f"--{task_id}_input ./data/{input_file} ") + out.write("\n") \ No newline at end of file diff --git a/wfcommons/wfbench/translator/streamflow.py b/wfcommons/wfbench/translator/streamflow.py new file mode 100644 index 00000000..ee7afc14 --- /dev/null +++ b/wfcommons/wfbench/translator/streamflow.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024-2025 The WfCommons Team. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +import shutil +import logging +import pathlib +import ast +import json +from typing import Union, Optional +from collections import defaultdict, deque + + +from .abstract_translator import Translator +from ...common import Workflow, Task, File + +this_dir = pathlib.Path(__file__).resolve().parent + + +class StreamflowTranslator(Translator): + """ + A WfFormat parser for creating Streamflow workflow benchmarks. + + :param workflow: Workflow benchmark object or path to the workflow benchmark JSON instance. + :type workflow: Union[Workflow, pathlib.Path], + :param logger: The logger where to log information/warning or errors (optional). + :type logger: Logger + """ + def __init__(self, + workflow: Union[Workflow, pathlib.Path], + logger: Optional[logging.Logger] = None) -> None: + super().__init__(workflow, logger) + + def translate(self, output_folder: pathlib.Path) -> None: + # Perform the CWL translation (which will create the output folder) + from wfcommons.wfbench import CWLTranslator + cwl_translator = CWLTranslator(workflow=self.workflow, logger=self.logger) + cwl_translator.translate(output_folder) + + # Generate the streamflow.yml file + self._generate_streamflow_file(output_folder) + + # Generate the inputs.yml file + self._generate_inputs_file(output_folder) + + # Generate the README file + self._write_readme_file(output_folder) + + def _generate_streamflow_file(self, output_folder: pathlib.Path) -> None: + shutil.copy(this_dir.joinpath("templates/streamflow/streamflow.yml"), output_folder) + + def _generate_inputs_file(self, output_folder: pathlib.Path) -> None: + + file_path = output_folder.joinpath("inputs.yml") + with open(file_path, "w") as out: + for task_id in self.workflow.roots(): + task = self.workflow.tasks[task_id] + out.write(f"{task.task_id}_input:\n") + for f in task.input_files: + out.write(f" - class: File\n") + out.write(f" path: ./data/{f}") + + + + + def _write_readme_file(self, output_folder: pathlib.Path) -> None: + readme_file_path = output_folder.joinpath("README") + with open(readme_file_path, "w") as out: + out.write(f"In directory {str(output_folder)}: streamflow run ./streamflow.yml\n") \ No newline at end of file diff --git a/wfcommons/wfbench/translator/templates/cwl/folder.cwl b/wfcommons/wfbench/translator/templates/cwl/folder.cwl index eba208bd..23cb4200 100644 --- a/wfcommons/wfbench/translator/templates/cwl/folder.cwl +++ b/wfcommons/wfbench/translator/templates/cwl/folder.cwl @@ -7,7 +7,7 @@ # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. -cwlVersion: v1.2 +cwlVersion: v1.0 class: ExpressionTool requirements: InlineJavascriptRequirement: {} diff --git a/wfcommons/wfbench/translator/templates/cwl/shell.cwl b/wfcommons/wfbench/translator/templates/cwl/shell.cwl index 208c742a..1ff21740 100644 --- a/wfcommons/wfbench/translator/templates/cwl/shell.cwl +++ b/wfcommons/wfbench/translator/templates/cwl/shell.cwl @@ -3,8 +3,6 @@ class: CommandLineTool requirements: InlineJavascriptRequirement: {} ShellCommandRequirement: {} -stdout: $(inputs.step_name + ".out") -stderr: $(inputs.step_name + ".err") arguments: - position: 1 @@ -16,6 +14,9 @@ arguments: cmd = cmd.replace(new RegExp(inputs.input_files[i].basename, 'g'), inputs.input_files[i].path); } } + cmd = cmd + " > " + runtime.outdir + "/" + inputs.step_name + ".out 2> " + runtime.outdir + "/" + inputs.step_name + ".err"; + cmd = cmd + " ; echo '-- end of stdout for " + inputs.step_name + " --' >> " + runtime.outdir + "/" + inputs.step_name + ".out"; + cmd = cmd + " ; echo '-- end of stderr for " + inputs.step_name + " --' >> " + runtime.outdir + "/" + inputs.step_name + ".err"; return cmd; } shellQuote: false @@ -32,9 +33,13 @@ inputs: outputs: out: - type: stdout + type: File + outputBinding: + glob: $(inputs.step_name + ".out") err: - type: stderr + type: File + outputBinding: + glob: $(inputs.step_name + ".err") output_files: type: File[] outputBinding: diff --git a/wfcommons/wfbench/translator/templates/cwl/wfbench.cwl b/wfcommons/wfbench/translator/templates/cwl/wfbench.cwl deleted file mode 100644 index 9aaafa90..00000000 --- a/wfcommons/wfbench/translator/templates/cwl/wfbench.cwl +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023-2024 The WfCommons Team. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -cwlVersion: v1.2 -class: CommandLineTool -requirements: - InlineJavascriptRequirement: {} -baseCommand: wfbench -arguments: - - valueFrom: $(inputs.input_params) -stdout: $(inputs.step_name + ".out") -stderr: $(inputs.step_name + ".err") -inputs: - step_name: - type: string - input_params: - type: string[]? - input_files: - type: File[]? - inputBinding: - position: 0 - itemSeparator: " " - output_filenames: - type: string[]? -outputs: - out: - type: stdout - err: - type: stderr - output_files: - type: - type: array - items: - - File - outputBinding: - glob: $(inputs.output_filenames) diff --git a/wfcommons/wfbench/translator/templates/streamflow/streamflow.yml b/wfcommons/wfbench/translator/templates/streamflow/streamflow.yml new file mode 100644 index 00000000..0915d977 --- /dev/null +++ b/wfcommons/wfbench/translator/templates/streamflow/streamflow.yml @@ -0,0 +1,11 @@ +version: "v1.0" +workflows: + head-tail: + type: cwl + config: + file: main.cwl + settings: inputs.yml +deployments: + local: + type: local + config: {} \ No newline at end of file diff --git a/wfcommons/wfinstances/logs/taskvine.py b/wfcommons/wfinstances/logs/taskvine.py index cf367c25..2003d853 100644 --- a/wfcommons/wfinstances/logs/taskvine.py +++ b/wfcommons/wfinstances/logs/taskvine.py @@ -119,7 +119,7 @@ def build_workflow(self, workflow_name: Optional[str] = None) -> Workflow: if not "size" in self.files_map[file_key]: sys.stderr.write(f"Warning: Could not determine size for file with key {file_key}: assuming zero bytes.\n") self.files_map[file_key]["size"] = 0 - sys.stderr.write(f"Identified {len(self.files_map)} valid files\n") + # sys.stderr.write(f"Identified {len(self.files_map)} valid files\n") # Construct the task runtimes self._construct_task_runtimes() @@ -134,7 +134,7 @@ def build_workflow(self, workflow_name: Optional[str] = None) -> Workflow: for victim in to_remove: self.known_task_ids.remove(victim) - sys.stderr.write(f"Identified {len(self.known_task_ids)} valid tasks\n") + # sys.stderr.write(f"Identified {len(self.known_task_ids)} valid tasks\n") # Construct the input and output file for each task self._construct_task_input_output_files()