From d958a7d06d76097b7b07445f10bd00f5ec090e22 Mon Sep 17 00:00:00 2001
From: Brummans <nick.brummans@wur.nl>
Date: Tue, 6 Jun 2023 14:36:40 +0200
Subject: [PATCH] working test example

---
 __pycache__/model.cpython-39.pyc | Bin 1036 -> 0 bytes
 __pycache__/utils.cpython-39.pyc | Bin 583 -> 0 bytes
 clearml_example.conf             | 229 +++++++++++++++++++++++++++++
 controller.py                    |  12 +-
 pipeline.py                      | 245 -------------------------------
 preprocess.py                    |  43 ------
 requirements.txt                 |   2 +-
 stage_one.py                     |   3 +-
 stage_three.py                   |  11 --
 stage_two.py                     |   3 +-
 test.jpg                         | Bin 4004 -> 0 bytes
 11 files changed, 239 insertions(+), 309 deletions(-)
 delete mode 100644 __pycache__/model.cpython-39.pyc
 delete mode 100644 __pycache__/utils.cpython-39.pyc
 create mode 100644 clearml_example.conf
 delete mode 100644 pipeline.py
 delete mode 100644 preprocess.py
 delete mode 100644 stage_three.py
 delete mode 100644 test.jpg

diff --git a/__pycache__/model.cpython-39.pyc b/__pycache__/model.cpython-39.pyc
deleted file mode 100644
index 4320d7de9ba253882f078b871ef8dc31a137d2a9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1036
zcmYe~<>g{vU|_iR>P>PXGXuk85C<8vFfcGUFfcF_D={!Iq%fo~<}l<kMlmunxHF_M
zr7*WJq%fy4W-&E0M=_@`1~X`~yaeg?O9qk1m>J3`R$*XZNM(p(Oks#(YG+7eOkqr6
zYT<}tPGL@AX<>+BX=h+zh+++9&}6;E;q2$<o1c=JlgtR!4WdA7P6h@Bkcq`53=9k<
z3?+;;49!dnm=-cHGL$ecU@2kFVg;ECGBbrWg{_yhgn0qmLWUCNEcO)E6!u=mh0HY!
z@f;Af3s_T_QaB)L7s6F8U{7IP2vNsb!n}Yhg)@a~AyW-QJa-C1FoPzy-%F4KUJ5WU
zFuY_15g>QHWMW_dxtu#QFFrXVF)uGQr}!36erXAU$DN&8l$V+lUz}N$dW)^Nq$o2b
z^%i?UVoFM8Ub-e@5hyxdDljlGWGb*RFuVkDHF<8a7MB*J78QYvzr_(BpP83g5+8qy
zDKGC9S8!@!X=+|cW@64QHs}1jGNY7REJ3M0p|?1F6DtDp^K*<+Zm}lk=am^kD5G0!
zKACx`iAA@V^Gi!K8E>%^r{<)sWW2>0AD^6)SX>+*zmnmXp|e#?XmM&$aZFNCX>P88
zk!g%eesXDUYF<fkOma?YVo`36Zc1uyeoQVXhV%+5i$FmEN-p3iW?<xFWMO1sWP-up
zEbMHIT;JKaSr`Qv*%&$gvawbP!Xj7?=A~qi4?(dE!k_>K1+6nE{h2T@Fw`(CV5nhS
z$XLr*!?1u6%3@r=RKvJ{IfZc{V+zwkCa@d}SdOWNDVRZ%*{=v3HUIzr|F6kfB*wtN
zP$bU4z;KJDEHkzI77HjMiX=ht##|%?5@)Qq#g<=MQczk_BnwguCgeae>}mN$<%vZp
z5+DbGVupc{i;;^_h>?r2N&xN`s3J`^P)y||=BCES-{Ojo&&^LM%>l7_;^PZT6LTOk
z;7}_9Wr|z8(C|-A&B=+6N3vTQ<O+}j!0`_Xm10mhb1?FVfJHP}ZgG_47bR!t<>eI#
ef*i``3$_d_i)0YOCmc2q=h%UQrWh3HJWK%19{FJa

diff --git a/__pycache__/utils.cpython-39.pyc b/__pycache__/utils.cpython-39.pyc
deleted file mode 100644
index 4baf7c40eed4d077b7dafe81aef6c84a5949c341..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 583
zcmYe~<>g{vU|<Mf`IdZ$k%8ech=Yuo85kHG7#J9eO&Ay$QW#Pga~Pr^G-DJan9r2U
z9L2)Okjj|Fx_~W(X(1ycLke>WOAA93dkSMPgC^@ska>PlZ0YF*Ir$~YEFdlvgV^j0
z3=Ga78<ZFr7)lrxFxD{CFfL$P$WX%sCYe(h=dh$OEo1_j8_b}|Qf11eprD}OR+^Vw
zl9`{UP?E1uoLH8sP?C|VP*RkbnU|WPkei>9ngfza$t=#+1FP3$xy71Vke{3pB><L<
zFD^+eNsUj*OfHEM$uB6$%+0JyEkY8##gda>TwKKoR;^c1lHsSxe2cXtzbHB577NJ6
zTU-buG+A!3flV*I#R}GNixY0<Ezabk%#zfi%>2BS3`N`w3=F>vovmU*i&Kk=W0HzW
zb8`)hOk-T~lS^|`^Gb?il5<iMi*j>xQ&MyDV@gXhbBgr}DsORttc(Yn4GOYi0Z<sS
zurM+)vN5tTGW}s;W%^yj%fP^($x#Ff*IQh<i6x*A&&f=>C5T|_6;y&mZ?QpX=7OA(
zTdc(;l{u-mm`jUO!DfJ6iA3-)FfiQWu*uC&Da}c>V+2JP2Ll5G2O|d~4-*d)0M6H+
A@&Et;

diff --git a/clearml_example.conf b/clearml_example.conf
new file mode 100644
index 000000000..d33981514
--- /dev/null
+++ b/clearml_example.conf
@@ -0,0 +1,229 @@
+# ClearML SDK configuration file
+api {
+    api_server: https://api.clearml.containers.wurnet.nl
+    web_server: https://app.clearml.containers.wurnet.nl
+    files_server: "s3://endpoint:443/bucket"
+    # Credentials are generated using the webapp, https://app.clearml.containers.wurnet.nl/settings
+    credentials {"access_key": "ACCESSKEY", "secret_key": "SECRETKEY"}
+}
+sdk {
+    storage {
+        cache {
+            # Defaults to system temp folder / cache
+            default_base_dir: "~/.clearml/cache"
+            # default_cache_manager_size: 100
+        }
+
+        direct_access: [
+            # Objects matching are considered to be available for direct access, i.e. they will not be downloaded
+            # or cached, and any download request will return a direct reference.
+            # Objects are specified in glob format, available for url and content_type.
+            { url: "file://*" }  # file-urls are always directly referenced
+        ]
+    }
+
+    metrics {
+        # History size for debug files per metric/variant. For each metric/variant combination with an attached file
+        # (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
+        # X files are stored in the upload destination for each metric/variant combination.
+        file_history_size: 100
+
+        # Max history size for matplotlib imshow files per plot title.
+        # File names for the uploaded images will be recycled in such a way that no more than
+        # X images are stored in the upload destination for each matplotlib plot title.
+        matplotlib_untitled_history_size: 100
+
+        # Limit the number of digits after the dot in plot reporting (reducing plot report size)
+        # plot_max_num_digits: 5
+
+        # Settings for generated debug images
+        images {
+            format: JPEG
+            quality: 87
+            subsampling: 0
+        }
+
+        # Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to true, each series should have its own graph)
+        tensorboard_single_series_per_graph: false
+    }
+
+    network {
+        # Number of retries before failing to upload file
+        file_upload_retries: 3
+
+        metrics {
+            # Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
+            # a specific iteration
+            file_upload_threads: 4
+
+            # Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
+            # being sent for upload
+            file_upload_starvation_warning_sec: 120
+        }
+
+        iteration {
+            # Max number of retries when getting frames if the server returned an error (http code 500)
+            max_retries_on_server_error: 5
+            # Backoff factory for consecutive retry attempts.
+            # SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
+            retry_backoff_factor_sec: 10
+        }
+    }
+    aws {
+       s3 {
+           # S3 credentials, used for read/write access by various SDK elements
+
+           # The following settings will be used for any bucket not specified below in the "credentials" section
+           # ---------------------------------------------------------------------------------------------------
+           # Specify explicit keys
+           key: "S3ACCESSKEY"
+           secret: "S3SECRETKEY"
+           # ---------------------------------------------------------------------------------------------------
+
+           credentials: [
+           ]
+       }
+   }
+    google.storage {
+        # # Default project and credentials file
+        # # Will be used when no bucket configuration is found
+        # project: "clearml"
+        # credentials_json: "/path/to/credentials.json"
+        # pool_connections: 512
+        # pool_maxsize: 1024
+
+        # # Specific credentials per bucket and sub directory
+        # credentials = [
+        #     {
+        #         bucket: "my-bucket"
+        #         subdir: "path/in/bucket" # Not required
+        #         project: "clearml"
+        #         credentials_json: "/path/to/credentials.json"
+        #     },
+        # ]
+    }
+    azure.storage {
+        # max_connections: 2
+
+        # containers: [
+        #     {
+        #         account_name: "clearml"
+        #         account_key: "secret"
+        #         # container_name:
+        #     }
+        # ]
+    }
+
+    log {
+        # debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
+        null_log_propagate: false
+        task_log_buffer_capacity: 66
+
+        # disable urllib info and lower levels
+        disable_urllib3_info: true
+    }
+
+    development {
+        # Development-mode options
+
+        # dev task reuse window
+        task_reuse_time_window_in_hours: 72.0
+
+        # Run VCS repository detection asynchronously
+        vcs_repo_detect_async: true
+
+        # Store uncommitted git/hg source code diff in experiment manifest when training in development mode
+        # This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
+        store_uncommitted_code_diff: true
+
+        # Support stopping an experiment in case it was externally stopped, status was changed or task was reset
+        support_stopping: true
+
+        # Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
+        default_output_uri: ""
+
+        # Default auto generated requirements optimize for smaller requirements
+        # If True, analyze the entire repository regardless of the entry point.
+        # If False, first analyze the entry point script, if it does not contain other to local files,
+        # do not analyze the entire repository.
+        force_analyze_entire_repo: false
+
+        # If set to true, *clearml* update message will not be printed to the console
+        # this value can be overwritten with os environment variable CLEARML_SUPPRESS_UPDATE_MESSAGE=1
+        suppress_update_message: false
+
+        # If this flag is true (default is false), instead of analyzing the code with Pigar, analyze with `pip freeze`
+        detect_with_pip_freeze: false
+
+        # Log specific environment variables. OS environments are listed in the "Environment" section
+        # of the Hyper-Parameters.
+        # multiple selected variables are supported including the suffix '*'.
+        # For example: "AWS_*" will log any OS environment variable starting with 'AWS_'.
+        # This value can be overwritten with os environment variable CLEARML_LOG_ENVIRONMENT="[AWS_*, CUDA_VERSION]"
+        # Example: log_os_environments: ["AWS_*", "CUDA_VERSION"]
+        log_os_environments: []
+
+        # Development mode worker
+        worker {
+            # Status report period in seconds
+            report_period_sec: 2
+
+            # The number of events to report
+            report_event_flush_threshold: 100
+
+            # ping to the server - check connectivity
+            ping_period_sec: 30
+
+            # Log all stdout & stderr
+            log_stdout: true
+
+            # Carriage return (\r) support. If zero (0) \r treated as \n and flushed to backend
+            # Carriage return flush support in seconds, flush consecutive line feeds (\r) every X (default: 10) seconds
+            console_cr_flush_period: 10
+
+            # compatibility feature, report memory usage for the entire machine
+            # default (false), report only on the running process and its sub-processes
+            report_global_mem_used: false
+        }
+    }
+
+    # Apply top-level environment section from configuration into os.environ
+    apply_environment: false
+    # Top-level environment section is in the form of:
+    #   environment {
+    #     key: value
+    #     ...
+    #   }
+    # and is applied to the OS environment as `key=value` for each key/value pair
+
+    # Apply top-level files section from configuration into local file system
+    apply_files: false
+    # Top-level files section allows auto-generating files at designated paths with a predefined contents
+    # and target format. Options include:
+    #  contents: the target file's content, typically a string (or any base type int/float/list/dict etc.)
+    #  format: a custom format for the contents. Currently supported value is `base64` to automatically decode a
+    #          base64-encoded contents string, otherwise ignored
+    #  path: the target file's path, may include ~ and inplace env vars
+    #  target_format: format used to encode contents before writing into the target file. Supported values are json,
+    #                 yaml, yml and bytes (in which case the file will be written in binary mode). Default is text mode.
+    #  overwrite: overwrite the target file in case it exists. Default is true.
+    #
+    # Example:
+    #   files {
+    #     myfile1 {
+    #       contents: "The quick brown fox jumped over the lazy dog"
+    #       path: "/tmp/fox.txt"
+    #     }
+    #     myjsonfile {
+    #       contents: {
+    #         some {
+    #           nested {
+    #             value: [1, 2, 3, 4]
+    #           }
+    #         }
+    #       }
+    #       path: "/tmp/test.json"
+    #       target_format: json
+    #     }
+    #   }
+}
diff --git a/controller.py b/controller.py
index bef163e8d..96ba1482f 100644
--- a/controller.py
+++ b/controller.py
@@ -17,7 +17,8 @@ pipe.add_parameter(
     'data/mnist_png/testing'
 )
 
-pipe.set_default_execution_queue('test')
+pipe.set_default_execution_queue("default")
+# pipe.set_default_execution_queue('test')
 
  # Adding the first stage to the pipeline, a clone of the base tasks will be created and used
 pipe.add_step(name='stage_data',
@@ -38,10 +39,7 @@ pipe.add_step(name='stage_train',
                 )
 
 # Starting the pipeline
-# pipe.start_locally()
-pipe.start(queue='test')
-# Wait until pipeline terminates
-pipe.wait()
-# cleanup everything
-pipe.stop()
+pipe.start_locally(run_pipeline_steps_locally=True)
+#pipe.start(queue='test')
+
 print('done')
diff --git a/pipeline.py b/pipeline.py
deleted file mode 100644
index 0439de2c2..000000000
--- a/pipeline.py
+++ /dev/null
@@ -1,245 +0,0 @@
-from clearml.automation.controller import PipelineDecorator
-from clearml import TaskTypes
-
-@PipelineDecorator.component(
-        return_values=['dataset_name_training, dataset_name_test, dataset_project'],
-        cache=True,
-        repo="https://git.wur.nl/mdt-research-it-solutions/clearml-demo.git",
-        task_type=TaskTypes.data_processing,
-        packages="./requirements.txt"
-        )
-def step_one(training_path: str = 'data/mnist_png/training',
-            test_path: str = 'data/mnist_png/testing',
-            dataset_project: str = "pipeline",
-            dataset_name_training: str = "training_dataset",
-            dataset_name_test: str = "testing_dataset"):
-
-    print('step_one')
-    # make sure we have scikit-learn for this step, we need it to use to unpickle the object
-    from clearml import Dataset
-
-    dataset_train = Dataset.create(
-        dataset_name=dataset_name_training, dataset_project=dataset_project
-    )
-    dataset_test = Dataset.create(
-        dataset_name=dataset_name_test, dataset_project=dataset_project
-    )
-
-    dataset_train.add_files(path=training_path)
-    dataset_test.add_files(path=test_path)
-    dataset_train.upload()
-    dataset_test.upload()
-    dataset_train.finalize()
-    dataset_test.finalize()
-
-    return dataset_name_training, dataset_name_test, dataset_project
-
-@PipelineDecorator.component(
-        return_values=['model'],
-        cache=True,
-        repo="https://git.wur.nl/mdt-research-it-solutions/clearml-demo.git",
-        task_type=TaskTypes.training,
-        packages="./requirements.txt"
-    )
-def step_two(dataset_name_training,
-            dataset_name_test,
-            dataset_project,
-            epochs: int = 10,
-            train_batch_size: int = 256,
-            validation_batch_size: int = 256,
-            train_num_workers: int = 0,
-            validation_num_workers: int = 0,
-            resize: int = 28,
-            lr: float = 1e-3
-        ):
-
-    print('step_two')
-    # make sure we have pandas for this step, we need it to use the data_frame
-    import pickle
-    import torchvision.transforms as transforms
-    import torchvision.datasets as datasets
-    from torch.utils.data import DataLoader
-    import torch
-    import torch.nn as nn
-    import torch.optim as optim
-    import time
-    from tqdm.auto import tqdm
-    from model import CNNModel
-    from model_utils import train, validate
-    from clearml import Logger
-    from clearml import StorageManager
-    from clearml import Dataset
-
-    mnist_train = Dataset.get(
-        dataset_name=dataset_name_training, dataset_project=dataset_project
-    ).get_local_copy()
-    mnist_test = Dataset.get(
-        dataset_name=dataset_name_test, dataset_project=dataset_project
-    ).get_local_copy()
-
-    # get logger
-    logger = Logger.current_logger()
-
-    # the training transforms
-    train_transform = transforms.Compose([
-        transforms.Resize(resize),
-        #transforms.RandomHorizontalFlip(p=0.5),
-        #transforms.RandomVerticalFlip(p=0.5),
-        #transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
-        #transforms.RandomRotation(degrees=(30, 70)),
-        transforms.ToTensor(),
-        transforms.Normalize(
-            mean=[0.5, 0.5, 0.5],
-            std=[0.5, 0.5, 0.5]
-        )
-    ])
-    # the validation transforms
-    valid_transform = transforms.Compose([
-        transforms.Resize(resize),
-        transforms.ToTensor(),
-        transforms.Normalize(
-            mean=[0.5, 0.5, 0.5],
-            std=[0.5, 0.5, 0.5]
-        )
-    ])
-
-    # training dataset
-    train_dataset = datasets.ImageFolder(
-        root=mnist_train,
-        transform=train_transform
-    )
-    # validation dataset
-    valid_dataset = datasets.ImageFolder(
-        root=mnist_test,
-        transform=valid_transform
-    )
-    # training data loaders
-    train_loader = DataLoader(
-        train_dataset, batch_size=train_batch_size, shuffle=True,
-        num_workers=train_num_workers, pin_memory=True
-    )
-    # validation data loaders
-    valid_loader = DataLoader(
-        valid_dataset, batch_size=validation_batch_size, shuffle=False,
-        num_workers=validation_num_workers, pin_memory=True
-    )
-
-    device = ('cuda' if torch.cuda.is_available() else 'cpu')
-
-    print(f"Computation device: {device}\n")
-
-    model = CNNModel().to(device)
-    print(model)
-
-    # total parameters and trainable parameters
-    total_params = sum(p.numel() for p in model.parameters())
-    print(f"{total_params:,} total parameters.")
-
-    total_trainable_params = sum(
-        p.numel() for p in model.parameters() if p.requires_grad)
-    print(f"{total_trainable_params:,} training parameters.")
-
-    # optimizer
-    optimizer = optim.Adam(model.parameters(), lr=lr)
-
-    # loss function
-    criterion = nn.CrossEntropyLoss()
-
-    # lists to keep track of losses and accuracies
-    train_loss, valid_loss = [], []
-    train_acc, valid_acc = [], []
-
-    # start the training
-    for epoch in range(epochs):
-        print(f"[INFO]: Epoch {epoch+1} of {epochs}")
-        train_epoch_loss, train_epoch_acc = train(model, train_loader,
-                                                  optimizer, criterion, device)
-        valid_epoch_loss, valid_epoch_acc = validate(model, valid_loader,
-                                                     criterion, device)
-        train_loss.append(train_epoch_loss)
-        valid_loss.append(valid_epoch_loss)
-        train_acc.append(train_epoch_acc)
-        valid_acc.append(valid_epoch_acc)
-        print(f"Training loss: {train_epoch_loss:.3f}, training acc: {train_epoch_acc:.3f}")
-        logger.report_scalar(
-                "loss", "train", iteration=epoch, value=train_epoch_loss
-            )
-        logger.report_scalar(
-                "accuracy", "train", iteration=epoch, value=train_epoch_acc
-            )
-        print(f"Validation loss: {valid_epoch_loss:.3f}, validation acc: {valid_epoch_acc:.3f}")
-        logger.report_scalar(
-                "loss", "validation", iteration=epoch, value=valid_epoch_loss
-            )
-        logger.report_scalar(
-                "accuracy", "validation", iteration=epoch, value=valid_epoch_acc
-            )
-    return model
-
-# The actual pipeline execution context
-# notice that all pipeline component function calls are actually executed remotely
-# Only when a return value is used, the pipeline logic will wait for the component execution to complete
-@PipelineDecorator.pipeline(
-        name='pipeline test',
-        pipeline_execution_queue="test",
-        repo="https://git.wur.nl/mdt-research-it-solutions/clearml-demo.git",
-        project='pipeline_deco',
-        version='0.0.5',
-        add_pipeline_tags=True
-    )
-def executing_pipeline(
-        training_path='data/mnist_png/training',
-        test_path='data/mnist_png/testing'
-    ):
-    from utils import save_model
-    from clearml import OutputModel
-    import torch
-
-    # Use the pipeline argument to start the pipeline and pass it ot the first step
-    print('launch step one')
-
-    dataset_name_training, dataset_name_test, dataset_project = step_one(
-                                            training_path=training_path,
-                                            test_path=test_path,
-                                            dataset_project="pipeline",
-                                            dataset_name_training="training_dataset",
-                                            dataset_name_test="testing_dataset"
-                                        )
-    # Use the returned data from the first step (`step_one`), and pass it to the next step (`step_two`)
-    # Notice! unless we actually access the `data_frame` object,
-    # the pipeline logic does not actually load the artifact itself.
-    # When actually passing the `data_frame` object into a new step,
-    # It waits for the creating step/function (`step_one`) to complete the execution
-    print('launch step two')
-
-    model = step_two(
-                    dataset_name_training=dataset_name_training,
-                    dataset_name_test=dataset_name_test,
-                    dataset_project=dataset_project,
-                    epochs=10,
-                    train_batch_size=256,
-                    validation_batch_size=256,
-                    train_num_workers=0,
-                    validation_num_workers=0,
-                    resize=28,
-                    lr=1e-3
-                )
-
-    # store in a way we can easily load into triton without having to have the model class
-    torch.jit.script(model).save('serving_model.pt')
-    OutputModel().update_weights('serving_model.pt')
-
-if __name__ == '__main__':
-    # set the pipeline steps default execution queue (per specific step we can override it with the decorator)
-    PipelineDecorator.set_default_execution_queue('test')
-    # Run the pipeline steps as subprocesses on the current machine, great for local executions
-    # (for easy development / debugging, use `PipelineDecorator.debug_pipeline()` to execute steps as regular functions)
-    #PipelineDecorator.run_locally()
-
-    # Start the pipeline execution logic.
-    executing_pipeline(
-        training_path='data/mnist_png/training',
-        test_path='data/mnist_png/testing'
-    )
-
-    print('process completed')
diff --git a/preprocess.py b/preprocess.py
deleted file mode 100644
index 23e840ba6..000000000
--- a/preprocess.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import io
-from typing import Any, Union
-
-import numpy as np
-from PIL import Image, ImageOps
-import traceback
-
-from clearml import StorageManager
-
-
-# Notice Preprocess class Must be named "Preprocess"
-class Preprocess(object):
-    def __init__(self):
-        # set internal state, this will be called only once. (i.e. not per request)
-        pass
-
-    def preprocess(self, body: Union[bytes, dict], state: dict, collect_custom_statistics_fn=None) -> Any:
-        # we expect to get two valid on the dict x0, and x1
-        try:
-            if isinstance(body, bytes):
-                # we expect to get a stream of encoded image bytes
-                image = Image.open(io.BytesIO(body)).convert("RGB")
-                image = ImageOps.grayscale(image).resize((28, 28))
-
-            if isinstance(body, dict) and "url" in body.keys():
-                # image is given as url, and is fetched
-                url = body.get("url")
-                local_file = StorageManager.get_local_copy(remote_url=url)
-                image = Image.open(local_file)
-                image = ImageOps.grayscale(image).resize((28, 28))
-        except Exception:
-            traceback.print_exc()
-        return np.array([np.array(image).flatten()])
-
-    def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict:
-        # post process the data returned from the model inference engine
-        # data is the return value from model.predict we will put is inside a return value as Y
-        if not isinstance(data, np.ndarray):
-            # this should not happen
-            return dict(digit=-1)
-
-        # data is returned as probability per class (10 class/digits)
-        return dict(digit=int(data.flatten().argmax()))
diff --git a/requirements.txt b/requirements.txt
index e71c1b517..546bbdaa0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 matplotlib
-clearml
+clearml[s3]
 torchvision
 torch
 torchaudio
diff --git a/stage_one.py b/stage_one.py
index 13bae51a7..d0423f457 100644
--- a/stage_one.py
+++ b/stage_one.py
@@ -2,7 +2,8 @@ from clearml import Task, Dataset
 
 Task.add_requirements("-r ./requirements.txt")
 task = Task.init(project_name="pipeline",
-                task_name="pipeline step 1 dataset artifact")
+                task_name="pipeline step 1 dataset artifact",
+                output_uri=True)
 
 # only create the task, it will be executed remotely later
 task.execute_remotely()
diff --git a/stage_three.py b/stage_three.py
deleted file mode 100644
index fbd716e9b..000000000
--- a/stage_three.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from clearml import Task, Dataset, InputModel
-task = Task.init(project_name="pipeline", task_name="pipeline step 3 model deployment")
-
-# only create the task, it will be executed remotely later
-task.execute_remotely()
-
-# Create an input model using the ClearML ID of a model already registered in the ClearML platform
-input_model = InputModel(model_id="fd8b402e874549d6944eebd49e37eb7b")
-
-print('uploading datasets in the background')
-print('Done')
diff --git a/stage_two.py b/stage_two.py
index 0cd951323..c485bf1ef 100644
--- a/stage_two.py
+++ b/stage_two.py
@@ -77,7 +77,8 @@ def validate(model, testloader, criterion):
 
 Task.add_requirements("-r ./requirements.txt")
 task = Task.init(project_name="pipeline",
-                task_name="pipeline step 2 train model")
+                task_name="pipeline step 2 train model",
+                output_uri=True)
 
 # only create the task, we will actually execute it later
 task.execute_remotely()
diff --git a/test.jpg b/test.jpg
deleted file mode 100644
index 12f4d983b5c408b5467ff43211828678280b4b08..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4004
zcmex=<NpH&0WUXCHwH#V1_nk3Mh1rew;7xnIM~?O*;qN)+1WWcIk<R4czL+Fc_f8|
z`9)-<<mF_gWMmXn^wbrUbd+UeG|V-13=B<7Oyt!qZ7qy!^o&i6{vTrC<^UPT$SBD0
z{|JLL10w?yGsq4G7+_^%WM*Mv|9^{t11!$?^9M`{g_f=^eqDAZ^V!qu4Ghfpebyen
z_R~|=&_MLXIei8OxuyAHbI)urz5b#=fx-N?UE{U2KM$x`C9E_(ufRCl_FnlJ@pA<U
zrRTiDSIs)WRP{2=;Zk;jZP~X^$A2<#KfPSSYqfI?0|WEw7uR-Fspdyti}=LAkao6u
z&dQq8@%!$*YqzQAU|>$awC49Kd&~RR^ERG%X13Jp83V(sUwhu9R?oV7dFM3c7hfhY
z@W;DvE#2IrQ#LcQ=g6Pw6Bw9|*UmdG!<u9sKfCn&>gJ~c4;W;{IBklKxW3Zces9P7
z$wwO)%-&>QF^|)id$2Eb0Rsc0efj31D|QtO3=FUKN0%K6SiOLOL0b2EX2z^o^AijV
zEdLoUso8|u6`x>WV5?duzV_$Zz}F8LnD?)rEM_fpzJ7&OYVsTg2H$9Fp-s|16<?K}
zGhkqt@r$+F@^Rlf7pG4Q49x4U8qcjUG?1!kKn`K%|3?_~1Q-}u7@1gD*qE3ha?A{j
zOf0OT4uV3$A_j)WCTs#?3Pz@i$|?y3&?sYMW%|!B{mYXrzqS3hY`LAcxMQ#Bq7>nW
zn|RK5?>KvNZppH_JM7Lb*0jl%pTA|(E|pj%jh;DLM<0LE|77G;Yu7kEGgWMB-;#wI
zc1NemSLjC{+Wh+I-fIW4Io~V(tSJ1GUv)FKO>INp%F7JCr_L<<!+vLKR@t?sxmjVT
z#PtbAW=jP9!jxxp8%>|y?KR^KySPhIfUx6PbBX?v-uMvn=A%w`jx#>>v$(m(+q++0
znuBkx)xnp_;)}Uw@<lB<`MG;`DT7zxLC%M6x(8GFrM|9>7tH?n^Fz$)sh79&uJu}`
zniH{1>+~b}g!g~+!ee_|Y(j4y-0=Ncf33J^nc!UKK53hf)Mc%67ktWC-(kP0nc-5|
z^6dWf%o48EZh`CN)x(uFxjuN`+~IL{)8V?6(P4YF>fY}8_j=!jfA>?n|CZMKzf)ha
za>dWnA9udl7Md$j`q6b})^hhb3xw|2IIq4Jwf4$S>AYX9OItQ2EAHMib;tJ9ld@BM
zHI7XS`P#hhmjB!>-Lp3<daGm}Uh$tH{qT$U$v>lKe~Wp)x#wr}s$DCeAKiE2@XQTN
z4;IStc^38E{LgUsRo;t>suLcqoU|z|Y{`YSNmo?EOSz7!6l^^$mDGLIXQFh9F8fB+
zi5A;!CWkbHuk34C7#lw)iTkL`%6A{@*9Ar1PqjaucQ5hlzratCsnr*6@f1!`V%!}m
z<F=|#MK>$-Sw+&zEx)BsPu&;J=P0h3^D13YI)mdQFBj7Wria>s%<K`xau!arIcHuE
zy|{i#>WP%I*KKFsxXo6>+E$k}$6{F?m;Y(Wh^KwawjRk@ZkZ#!-gXPu#Xf7pxTW<g
zR_U*JrTND>C~#ihmW3-?i#L@&`p@w8{{JHkHUbQcOiV1CT&&EjkOT}Wa0G=EjTZ?R
z8aX%yCQjTatYlDFRNOe};6(*x6Q_kNDnZT(1x*j3$(hZb=|6+`qp<o3Yu2--R_Nt$
zAFQ?g_Iu?F@k2A4D|Gr#L`><+RNFFnBmZP!`H%7b;wM69f7dq8i{!gA*T9Ww?pcei
z+Uh^UFCYCRQs?5Y8C31<cX8(C<4x%qC3(JY(p2qtf0Z?Ovm(y2B`<LEl%-L6$toGI
zEo9u5PW=(K-D&GC(X|)W=Cx**Xh$!My32L#Mc8$xOvCIqtzMc-3N`HP=2{kaFZT-7
z*y6o<nyH~ljEJ+^lc|&W%Uz_HLz)xc`7V>vc=hsLruVa~8^Sv;&swT^ZC<5rd{K3!
zOj2azT?MBKqb*+~q@FWt6|J=F&3;z;yr($Q@Y)<XmCOg}RTlD1=RXM)CtSV7b?8Hw
z_@UVg?s4pWGeNr8BUIKVwCZXK%Yk!+PucVqEm5?YcS?R+?-T8piz~S{)lW4Ma6F{Z
zlzn0Tr~8ZDt{U_H-fUrK<Hx3WZRX>{3+mfNg*O#j#MuPC{&9}|M`Hio-EC>twtiwx
zDxQ~?`LK9aw!h-nwz-Vace!r82sQ-yUgOW=<yux&ACAAfdobzD5|wU=8%0a44xMP;
z@q0~&b^k7fO=6GVb$drfOw(`wD*yU*n|;|k$z`gYNkP*O_tq`i|M+#oRQcI|%?lzn
zZ~7+I=oPwR%V}TDqyx`IMLe%=cwRE=X5W?v(cfR+5S%r;r>XP&)x}u_d~13muX)~?
z;iCTXciAGDOP9U<7-Ak+-twvtl=6)#dp==L#l!T<7sV@T7k)VO(Ru5vCa05smR%Hb
zvGDOUUDqBS>1)$B|4HclCl49>CQMt(J%7E^<hh2kwu)PxpQ{<ta$jui;+sBpdb-Is
z+1u?BGY>8lYun_=mu*z4c8Pa&SPskN-F3}87flLt!@{RuzW<-0WB0^8%_~l=O#8If
zvOmSid-++$$m4fxug<AGeAjz&?~dw48m?=y<X)@(XSiqo{|JM&00R>v6Dt!NGb6Yr
zWnf?wWKc{rbPT-s!J$yVpmF2Dj|vkPK17t<_KN=*PB)}5cFs9g_}<U=1jBOmif4<C
zzyILyQo!Od=L&}O_j7(<ZaI`Lt8lZMf#)!j!OZ76KjvpP*6Z<xnXT+BOcLA2;J`TB
zsg2L`_}%3yKbm>k`LAj|mI`<6D_l2c*}nMA-{v3tZY%oNBjF#ryfRn4^qq6|9tEEp
z=56~SX65cZaaT_q^9<3pr5r};7Jjy!=Qmn@Q@1<v{KlN$!t;}g-}uGucy;7ILwbh8
zqJX9Tp6;Ctas~MhLfx7!Evs0gpK;14v_V2h%=SUbhaA4S&+nUmij&gdb!>HS)LXje
z+#j#vuUpjQ%Dh@c1KuR7iX>e$HT<&oU}n4eHaBa(dn-&iuN+P|m&w&pb&3B%LY%+^
z9oL7Vk9gYUE^p<MRWfXy)|Hhk)e>CE&+Om8EHPF0obHQ5td0K}Dgu_vS5(H&P;1DZ
zIrBwe?4uVGFNp*s=6*Q9DDE11)~ki{wby3GNzWX5_VtC#>3&qZiswd&(le$HY)6Zl
zPn@qk*Wtmza8mu!-p@6DzCBN34XXduyjJI3?tXCpsm_zHA0+!3hr2wDKFM<B=xYuW
zHYW~`AQc9c&_~G=*sf=u-EL9#OiRXS&TOGusZ4y5rvy0M63=h6<aaSqkvh~V7@DD<
zq3XI+WLucjgpI;Nx=$zmcxIpBxh&v6gUc2xtJKhI#RdC+_y0YAY_Ds{Y*zl$B~zBJ
zI9PY|^6Xa!(?o+8Y<)H{^_<`J4#QUp8{R3j9Vn10O<2D}q$4cmL94y5V|mkt`8ra{
z+a|F_m6oYDoZ${hS*aoBB%v=ADk%{i%pg2@&GY*YC)Jxt?Dag5{kWOu@t67MzS(NO
z5U%)9oHcWrdBmE-j|-&^%of{y@X`YInR8}eI>N}J{EBbFv&8$d*^Z`7A7nL7t*iTx
z+z`svbZPe2L`K6;L7Rl82lOsbEUAcW-lr^3@?O_`c5tPy*QM8&%YV*&92I)#OXZG_
zQ8U&%g%tj(NRyXZo}W2+E!(exARa|wcMgxZ3;YLqxg3`{^ga=&^|%*hcvAlBe+FoI
z&%hu6sVLA$rWXeoIEo%HaGyWGAaC%2L79I6gK>iagE|8PLx}+cgHsd(!-@$E3<c*H
z7<QX8FdWlnU`Uo^U@*~SU|=v{U|>jLU|@)0U|>*SU|@Q{z>v9tfq}n(fq|=mfk7Rm
z;RR|KYY8xd8(J(VdDqY}Fj2svaN`Gsi3cwlEd1DrlywFEGn}4qWC?4<@%!wpDvRd%
zBu0DR<5TA;`|#PcD|JnJ%nZhUemTbP{!DBtpAvR(2On4&a_*q7(gN1oUmi?8wol{m
zng0x$C&a_AMj1pd{kKDNNe9c0DgFx?wGPMCe>}KoV-IJ_t?u0p$7c9gMt*y;@mIm(
zBN<;re(j0fGvz-+|MLdF_uCgsb=Ah+ud`y``?FIo{QVu(AH7_2q^w&yc3oH`c#LPu
zUvG~Yj$R-C+Q>)#ku^EYV{q*8M>d&bAyXG0abXr@7G0xw_IrjR&wmDkV?hj!#^Jl8
z?=Cv5!<%WuEl`^qb~eH&M&4va(sIoY%JUCy37N1?%sf2y<%Xkc)0lO)Zj^qxDJ`t4
zL$T-f=kjs`Keer2`d!|=jx3Q3omNn?u9<aa$g9<F4;uDMeV&&QqTZ->CUdLJr(K3`
z-g*}4O9a{l38vOux|a35xivXXY|0Yn;?4yVF6{ihy83QVt5VXPWmi)U7|&2UZ825m
zO~NM2O)oCE3SSY)XWZ|_!lunydAV^q!*7;DGvBThS#n*@q)7cl${wrS)it8mRGE%!
z^qMwj+UHvndp1f1t%@{IR@#=ZX!>3u#^fXJ2C2pOHV7*}w_)0r^1Xtw;{4G#Zt>17
zmlZl|Ug$VFluh;O@ecO+<R)@Yf`zeb<7uAwRSyDxt#|a7u{pOdaQ=;N7IO3MZ#-&f
z6`p>7vY>9Zsnr~BbI+q$YSs@XA8ihsa^|wo!da&mAM9jH`&ylC$T9KJe+F@(FnyNk
z8$yH27#BJO3m4p3?x^f7Uvu$!%%t3t9BFS}d$W5@3pjD>l*pASr7PuJ4{IG<aCU<y
zOPl7O>F+OP-}$b2!)4Y5F)J<mS${tjRh;yl;(q?oBG-9kYZ(0Mro599EI*sQ^3%P|
zMlCLBVH3H1(nQw@{PX(s&vMIo6=9{6wW;|QN``4}U7k}8^?a@4zOyf5VbdC+&2f2e
zCnwHG+;mRW<CUT6HmTbkJI+~!HYB9n6IV{#!~aE#RZ(%O<8;%%q6Yd&$~#564~Sis
zlUZ|#C*ZZunh6X$gsv_<QE=x#rbqrcrSOvn*XQ4#abUgb;{MWm8;f2odiqtT=kCir
z+s?D?zo(O?WLUnkwNWLGXYQ>N8y_0X*(%Dup^r(ObDrt~hK3dTr;jL{Dbe~e)#|9i
z#5iH)@~Ix1ib@0HA|ya*GRlDC#<AU+F^)Y-?z&TLH#9yyaw6MVa>AtfG4^x*UhQs(
zxnr64peF9i4dIkC>)LgDP8_|mW;vf)#*4*S(|2YZ+ENjfY_V=qmyhsrdHu>C9c;Fy
z)B0yRZ8pe0z5UB%Mu~G<99L*f_-wvwxq9p8xqk!=ADeJHXp1Z<5c`+;-PNeg@a2Do
H`2RNn^iks?

-- 
GitLab