How to run a model using fowt_ml and calculate scores¶
Data preparation (common for any type of model)¶
In [3]:
Copied!
from fowt_ml.datasets import get_data, fix_column_names
from fowt_ml import Config
from sklearn.model_selection import train_test_split
from fowt_ml.datasets import get_data, fix_column_names
from fowt_ml import Config
from sklearn.model_selection import train_test_split
In [4]:
Copied!
example_config_file = "../../src/example_config.yml"
example_config_file = "../../src/example_config.yml"
In [5]:
Copied!
config = Config.from_yaml(example_config_file)
config["data"]["exp699"]["path_file"] = "/home/sarah/temp/hybridlabs/data_example/exp699.mat"
df = get_data("exp699", config["data"])
config = Config.from_yaml(example_config_file)
config["data"]["exp699"]["path_file"] = "/home/sarah/temp/hybridlabs/data_example/exp699.mat"
df = get_data("exp699", config["data"])
In [6]:
Copied!
# train/test split
predictors_labels = config["ml_setup"]["predictors"]
target_labels = config["ml_setup"]["targets"]
# rename the column names to exclude []
X_data = df.loc[:, predictors_labels]
Y_data = df.loc[:, target_labels]
X_data, Y_data = fix_column_names(X_data), fix_column_names(Y_data)
train_test_split_kwargs = config["ml_setup"]["train_test_split_kwargs"]
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, **train_test_split_kwargs)
# train/test split
predictors_labels = config["ml_setup"]["predictors"]
target_labels = config["ml_setup"]["targets"]
# rename the column names to exclude []
X_data = df.loc[:, predictors_labels]
Y_data = df.loc[:, target_labels]
X_data, Y_data = fix_column_names(X_data), fix_column_names(Y_data)
train_test_split_kwargs = config["ml_setup"]["train_test_split_kwargs"]
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, **train_test_split_kwargs)
In [7]:
Copied!
metrics = config["ml_setup"]["metric_names"]
metrics
metrics = config["ml_setup"]["metric_names"]
metrics
Out[7]:
['neg_mean_absolute_error', 'neg_root_mean_squared_error', 'r2', 'model_fit_time', 'model_predict_time']
Linear models¶
In [6]:
Copied!
from fowt_ml import LinearModels
LinearModels.ESTIMATOR_NAMES
from fowt_ml import LinearModels
LinearModels.ESTIMATOR_NAMES
Out[6]:
{'LinearRegression': sklearn.linear_model._base.LinearRegression,
'RidgeRegression': sklearn.linear_model._ridge.Ridge,
'LassoRegression': sklearn.linear_model._coordinate_descent.Lasso,
'ElasticNetRegression': sklearn.linear_model._coordinate_descent.ElasticNet,
'LeastAngleRegression': sklearn.linear_model._least_angle.Lars}
In [7]:
Copied!
# calculate metrics
model_name = "LeastAngleRegression"
model = LinearModels(model_name)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
# calculate metrics
model_name = "LeastAngleRegression"
model = LinearModels(model_name)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
Out[7]:
{'neg_mean_absolute_error': -1.603052611245188,
'neg_root_mean_squared_error': -2.1309153094889486,
'r2': 0.10732388900915783,
'model_fit_time': np.float64(0.096),
'model_predict_time': np.float64(0.001)}
In [8]:
Copied!
scores = model.cross_validate(X_train, y_train, metrics)
scores
scores = model.cross_validate(X_train, y_train, metrics)
scores
Out[8]:
{'model_fit_time': array([0.109, 0.085, 0.081, 0.08 , 0.082]),
'neg_mean_absolute_error': array([-1.60136019, -1.60960767, -1.6112611 , -1.61425787, -1.59912871]),
'neg_root_mean_squared_error': array([-2.13342478, -2.13724123, -2.14536043, -2.15210295, -2.1258841 ]),
'r2': array([0.10649117, 0.10830421, 0.10762699, 0.10839059, 0.10954135]),
'model_predict_time': array([0.001, 0.001, 0.001, 0.001, 0.001])}
Random Forest¶
In [9]:
Copied!
from fowt_ml import EnsembleModel
EnsembleModel.ESTIMATOR_NAMES
from fowt_ml import EnsembleModel
EnsembleModel.ESTIMATOR_NAMES
Out[9]:
{'ExtraTrees': sklearn.ensemble._forest.ExtraTreesRegressor,
'RandomForest': sklearn.ensemble._forest.RandomForestRegressor}
In [10]:
Copied!
model_name = "RandomForest"
model = EnsembleModel(estimator=model_name, max_depth=9, bootstrap=True, max_samples=10_000, n_estimators=50)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
model_name = "RandomForest"
model = EnsembleModel(estimator=model_name, max_depth=9, bootstrap=True, max_samples=10_000, n_estimators=50)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
Out[10]:
{'neg_mean_absolute_error': -1.5959389437050893,
'neg_root_mean_squared_error': -2.1135488771633013,
'r2': 0.12112219032333066,
'model_fit_time': np.float64(5.78),
'model_predict_time': np.float64(0.003)}
Gaussian Process¶
In [12]:
Copied!
from fowt_ml import SparseGaussianModel
SparseGaussianModel.ESTIMATOR_NAMES
from fowt_ml import SparseGaussianModel
SparseGaussianModel.ESTIMATOR_NAMES
Out[12]:
{'SklearnGPRegressor': fowt_ml.gaussian_process.SklearnGPRegressor}
In [13]:
Copied!
model_name = "SklearnGPRegressor"
params = config["ml_setup"]["model_names"][model_name]
model = SparseGaussianModel("SklearnGPRegressor", **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
model_name = "SklearnGPRegressor"
params = config["ml_setup"]["model_names"][model_name]
model = SparseGaussianModel("SklearnGPRegressor", **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
Out[13]:
{'neg_mean_absolute_error': -1.5981088588623225,
'neg_root_mean_squared_error': -2.1218047367573445,
'r2': 0.12320574147031532,
'model_fit_time': np.float64(88.066),
'model_predict_time': np.float64(0.021)}
MLP¶
In [10]:
Copied!
from fowt_ml import NeuralNetwork
NeuralNetwork.ESTIMATOR_NAMES
from fowt_ml import NeuralNetwork
NeuralNetwork.ESTIMATOR_NAMES
Out[10]:
{'MultilayerPerceptron': sklearn.neural_network._multilayer_perceptron.MLPRegressor,
'RNNRegressor': fowt_ml.neural_network.SklearnRNNRegressor,
'LSTMRegressor': fowt_ml.neural_network.SklearnLSTMRegressor,
'GRURegressor': fowt_ml.neural_network.SklearnGRURegressor}
In [11]:
Copied!
model_name = "MultilayerPerceptron"
params = config["ml_setup"]["model_names"][model_name]
model = NeuralNetwork(model_name, **params)
model_name = "MultilayerPerceptron"
params = config["ml_setup"]["model_names"][model_name]
model = NeuralNetwork(model_name, **params)
In [12]:
Copied!
model.estimator
model.estimator
Out[12]:
MLPRegressor(hidden_layer_sizes=10, max_iter=50)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MLPRegressor(hidden_layer_sizes=10, max_iter=50)
In [ ]:
Copied!
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
Out[ ]:
{'neg_mean_absolute_error': -1.5981261086400942,
'neg_root_mean_squared_error': -2.1186915415839853,
'r2': 0.12120288013830806,
'model_fit_time': np.float64(11.438),
'model_predict_time': np.float64(0.001)}
XGBoots¶
In [16]:
Copied!
from fowt_ml import XGBoost
XGBoost.ESTIMATOR_NAMES
from fowt_ml import XGBoost
XGBoost.ESTIMATOR_NAMES
Out[16]:
{'XGBoostRegression': xgboost.sklearn.XGBRegressor}
In [17]:
Copied!
model_name = "XGBoostRegression"
params = config["ml_setup"]["model_names"][model_name]
model = XGBoost(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
model_name = "XGBoostRegression"
params = config["ml_setup"]["model_names"][model_name]
model = XGBoost(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
Out[17]:
{'neg_mean_absolute_error': -1.581722378730774,
'neg_root_mean_squared_error': -2.0865964889526367,
'r2': 0.15320807695388794,
'model_fit_time': np.float64(2.811),
'model_predict_time': np.float64(0.001)}
RNN models¶
In [8]:
Copied!
from fowt_ml import NeuralNetwork
import numpy as np
NeuralNetwork.ESTIMATOR_NAMES
from fowt_ml import NeuralNetwork
import numpy as np
NeuralNetwork.ESTIMATOR_NAMES
Out[8]:
{'MultilayerPerceptron': sklearn.neural_network._multilayer_perceptron.MLPRegressor,
'RNNRegressor': <function fowt_ml.neural_network.RNNRegressor(**args)>,
'LSTMRegressor': <function fowt_ml.neural_network.LSTMRegressor(**args)>,
'GRURegressor': <function fowt_ml.neural_network.GRURegressor(**args)>}
In [9]:
Copied!
# for torch based models, this is needed
X_train = np.asarray(X_train, dtype=np.float32)
X_test = np.asarray(X_test, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.float32)
# for torch based models, this is needed
X_train = np.asarray(X_train, dtype=np.float32)
X_test = np.asarray(X_test, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.float32)
In [11]:
Copied!
model_name = "RNNRegressor"
params = {
"input_size": len(predictors_labels),
"hidden_size": 64,
"output_size": len(target_labels),
"num_layers":2,
}
model_name = "RNNRegressor"
params = {
"input_size": len(predictors_labels),
"hidden_size": 64,
"output_size": len(target_labels),
"num_layers":2,
}
In [12]:
Copied!
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
Out[12]:
<fowt_ml.neural_network.NeuralNetwork at 0x772ec59b3a10>
In [13]:
Copied!
model.estimator.fit(X_train, y_train)
model.estimator.fit(X_train, y_train)
Out[13]:
TransformedTargetRegressor(regressor=Pipeline(steps=[('scaler',
StandardScaler()),
('model',
NeuralNetRegressor(_params_to_validate={'module__num_layers', 'module__output_size', 'module__input_size', 'module__hidden_size', 'module__rnn_model'}, batch_size=128, callbacks=None, compile=False, dataset=<class 'skorch.dataset.Dataset'>, device='cpu', iterator_train=<...4, module__input_size=14, module__num_layers=2, module__output_size=6, module__rnn_model=<class 'torch.nn.modules.rnn.RNN'>, optimizer=<class 'torch.optim.sgd.SGD'>, predict_nonlinearity='auto', torch_load_kwargs=None, train_split=<skorch.dataset.ValidSplit object at 0x772ed0f63c10>, use_caching='auto', verbose=0, warm_start=False))]),
transformer=StandardScaler())In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
TransformedTargetRegressor(regressor=Pipeline(steps=[('scaler',
StandardScaler()),
('model',
NeuralNetRegressor(_params_to_validate={'module__num_layers', 'module__output_size', 'module__input_size', 'module__hidden_size', 'module__rnn_model'}, batch_size=128, callbacks=None, compile=False, dataset=<class 'skorch.dataset.Dataset'>, device='cpu', iterator_train=<...4, module__input_size=14, module__num_layers=2, module__output_size=6, module__rnn_model=<class 'torch.nn.modules.rnn.RNN'>, optimizer=<class 'torch.optim.sgd.SGD'>, predict_nonlinearity='auto', torch_load_kwargs=None, train_split=<skorch.dataset.ValidSplit object at 0x772ed0f63c10>, use_caching='auto', verbose=0, warm_start=False))]),
transformer=StandardScaler())Pipeline(steps=[('scaler', StandardScaler()),
('model',
NeuralNetRegressor(_params_to_validate={'module__num_layers', 'module__output_size', 'module__input_size', 'module__hidden_size', 'module__rnn_model'}, batch_size=128, callbacks=None, compile=False, dataset=<class 'skorch.dataset.Dataset'>, device='cpu', iterator_train=<class 'torch.utils.data.dataloader.Dat...Module'>, module__hidden_size=64, module__input_size=14, module__num_layers=2, module__output_size=6, module__rnn_model=<class 'torch.nn.modules.rnn.RNN'>, optimizer=<class 'torch.optim.sgd.SGD'>, predict_nonlinearity='auto', torch_load_kwargs=None, train_split=<skorch.dataset.ValidSplit object at 0x772ed0f63c10>, use_caching='auto', verbose=0, warm_start=False))])StandardScaler()
<class 'skorch.regressor.NeuralNetRegressor'>[uninitialized]( module=<class 'fowt_ml.neural_network.GenericRNNModule'>, module__hidden_size=64, module__input_size=14, module__num_layers=2, module__output_size=6, module__rnn_model=<class 'torch.nn.modules.rnn.RNN'>, )
StandardScaler()
StandardScaler()
In [ ]:
Copied!
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
epoch train_loss valid_loss dur
------- ------------ ------------ -------
1 0.9143 0.8909 24.6896
2 0.8943 0.8866 26.2002
3 0.8915 0.8850 33.9241
4 0.8902 0.8840 36.8488
5 0.8893 0.8833 38.8893
6 0.8886 0.8827 37.3335
7 0.8881 0.8823 40.8689
8 0.8876 0.8819 36.2290
9 0.8872 0.8815 36.1466
10 0.8869 0.8812 40.8171
Out[ ]:
{'neg_mean_absolute_error': -1.6005001068115234,
'neg_root_mean_squared_error': -2.124774694442749,
'r2': 0.11344795674085617,
'model_fit_time': np.float64(352.913),
'model_predict_time': np.float64(0.001)}
In [10]:
Copied!
model_name = "LSTMRegressor"
params = {
"input_size": len(predictors_labels),
"hidden_size": 64,
"output_size": len(target_labels),
"num_layers":2,
"max_epochs":5,
}
model_name = "LSTMRegressor"
params = {
"input_size": len(predictors_labels),
"hidden_size": 64,
"output_size": len(target_labels),
"num_layers":2,
"max_epochs":5,
}
In [11]:
Copied!
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
epoch train_loss valid_loss dur
------- ------------ ------------ ------
1 1.0000 0.9892 6.0975
2 0.9963 0.9848 6.2456
3 0.9905 0.9776 6.3866
4 0.9819 0.9680 6.6953
5 0.9717 0.9581 7.1940
Out[11]:
{'neg_mean_absolute_error': -1.6250715255737305,
'neg_root_mean_squared_error': -2.1550610065460205,
'r2': 0.03436806797981262,
'model_fit_time': np.float64(33.554),
'model_predict_time': np.float64(0.001)}
In [19]:
Copied!
model_name = "GRURegressor"
params = {
"input_size": len(predictors_labels),
"hidden_size": 64,
"output_size": len(target_labels),
"num_layers":2,
"max_epochs": 5,
}
model_name = "GRURegressor"
params = {
"input_size": len(predictors_labels),
"hidden_size": 64,
"output_size": len(target_labels),
"num_layers":2,
"max_epochs": 5,
}
In [20]:
Copied!
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
model = NeuralNetwork(model_name, **params)
model.use_scaled_data()
scores = model.calculate_score(X_train, X_test, y_train, y_test, metrics)
scores
epoch train_loss valid_loss dur
------- ------------ ------------ -------
1 0.9858 0.9623 81.2824
2 0.9600 0.9410 107.4753
3 0.9390 0.9225 109.0925
4 0.9237 0.9114 115.3645
5 0.9143 0.9039 117.8505
Out[20]:
{'neg_mean_absolute_error': -1.6065346002578735,
'neg_root_mean_squared_error': -2.1341967582702637,
'r2': 0.08973821997642517,
'model_fit_time': np.float64(531.139),
'model_predict_time': np.float64(0.001)}
In [ ]:
Copied!