Study Notes
Automated machine learning capabilities allows to determine the best performing algorithm for your data.
- Classification
- Regression
- Time Series Forecasting
- Select algorithm or algorithms used in experiment.
- Preprocessing and featurization
- Scaling and normalization
- Run automated machine learning experiments
- Configure an automated machine learning experiment
- Specify data for training
- Specify the primary metric
- Submit an automated machine learning experiment
- Retrieve the best run and its model
- Explore preprocessing steps
Check :
pip show azureml-train-automl
Automl is diferent from running an experiment.
There is no script to be executed (pipelines steps)
There is a configuration file only (automl config)
We need to set workspace, set dataset and compute anyway.
Specific is AutoMLConfigClass
Represents configuration for submitting an automated ML experiment in Azure Machine Learning
This configuration object contains and persists the parametersfor configuring the experiment run, as well asthe training data to be used at run time.
Feb 2023 -below will work with Python version >=3.5,<3.8
Set workspace
import azureml.core
from azureml.core import Workspace
# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))
Prepare data
from azureml.core import Dataset
default_ds = ws.get_default_datastore()
if 'diabetes dataset' not in ws.datasets:
default_ds.upload_files(files=['./data/diabetes.csv', './data/diabetes2.csv'], # Upload the diabetes csv files in /data
target_path='diabetes-data/', # Put it in a folder path in the datastore
overwrite=True, # Replace existing files of the same name
show_progress=True)
#Create a tabular dataset from the path on the datastore (this may take a short while)
tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'diabetes-data/*.csv'))
# Register the tabular dataset
try:
tab_data_set = tab_data_set.register(workspace=ws,
name='diabetes dataset',
description='diabetes data',
tags = {'format':'CSV'},
create_new_version=True)
print('Dataset registered.')
except Exception as ex:
print(ex)
else:
print('Dataset already registered.')
# Split the dataset into training and validation subsets
diabetes_ds = ws.datasets.get("diabetes dataset")
train_ds, test_ds = diabetes_ds.random_split(percentage=0.7, seed=123)
print("Data ready!")
Prepare compute
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
cluster_name = "your-compute-cluster"
try:
# Check for existing compute target
training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
print('Found existing cluster, use it.')
except ComputeTargetException:
# If it doesn't already exist, create it
try:
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
training_cluster.wait_for_completion(show_output=True)
except Exception as ex:
print(ex)
from azureml.core.compute_target import ComputeTargetException
cluster_name = "your-compute-cluster"
try:
# Check for existing compute target
training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
print('Found existing cluster, use it.')
except ComputeTargetException:
# If it doesn't already exist, create it
try:
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
training_cluster.wait_for_completion(show_output=True)
except Exception as ex:
print(ex)
Suppose you select: Classification algorithm.
Check what metrics are available:
import azureml.train.automl.utilities as automl_utils
for metric in automl_utils.get_primary_metrics('classification'):
print(metric)
result:
AUC_weighted
average_precision_score_weighted
norm_macro_recall
precision_score_weighted
accuracy
Restrict: 4 iterations to reduce the amount of time taken.
from azureml.train.automl import AutoMLConfig
automl_config = AutoMLConfig(name='Automated ML Experiment',
task='classification',
compute_target=training_cluster,
training_data = train_ds,
validation_data = test_ds,
label_column_name='Diabetic',
iterations=4,
primary_metric = 'AUC_weighted',
max_concurrent_iterations=2,
featurization='auto'
)
print("Ready for Auto ML run.")
Submit theexperiment.
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails
print('Submitting Auto ML experiment...')
automl_experiment = Experiment(ws, 'mslearn-diabetes-automl-sdk')
automl_run = automl_experiment.submit(automl_config)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)
from azureml.widgets import RunDetails
print('Submitting Auto ML experiment...')
automl_experiment = Experiment(ws, 'mslearn-diabetes-automl-sdk')
automl_run = automl_experiment.submit(automl_config)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)
Once submitted you will find it in Azure ML studio:

You will notice cluster as well is getting busy.

for run in automl_run.get_children():
print('Run ID', run.id)
for metric in run.get_metrics():
print('', run.get_metrics(metric))
print('Run ID', run.id)
for metric in run.get_metrics():
print('', run.get_metrics(metric))
best_run, fitted_model = automl_run.get_output()
print(best_run)
print(' Best Model Definition:')
print(fitted_model)
print(' Best Run Transformations:')
for step in fitted_model.named_steps:
print(step)
print(' Best Run Metrics:')
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
metric = best_run_metrics[metric_name]
print(metric_name, metric)
print(best_run)
print(' Best Model Definition:')
print(fitted_model)
print(' Best Run Transformations:')
for step in fitted_model.named_steps:
print(step)
print(' Best Run Metrics:')
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
metric = best_run_metrics[metric_name]
print(metric_name, metric)
Register the model for future use
from azureml.core import Model
# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='diabetes_model',
tags={'Training context':'Auto ML'},
properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})
# List registered models
for model in Model.list(ws):
print(model.name, 'version:', model.version)
for tag_name in model.tags:
tag = model.tags[tag_name]
print ('',tag_name, ':', tag)
for prop_name in model.properties:
prop = model.properties[prop_name]
print ('',prop_name, ':', prop)
print(' ')
tags={'Training context':'Auto ML'},
properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})
# List registered models
for model in Model.list(ws):
print(model.name, 'version:', model.version)
for tag_name in model.tags:
tag = model.tags[tag_name]
print ('',tag_name, ':', tag)
for prop_name in model.properties:
prop = model.properties[prop_name]
print ('',prop_name, ':', prop)
print(' ')
In Azure ML Studio you will get all details about run, best model ane explanations

References
Automate machine learning model selection with Azure Machine Learning - Training | Microsoft Learn