import logging
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.feature_selection import SelectFromModel
from sklearn.impute import KNNImputer
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, mean_squared_error, r2_score, roc_curve, auc, roc_auc_score
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import autosklearn.classification
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
import ydata_profiling


import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

cid ="xx" 
secret = "xx"

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


# create a function to retrieve tracks
import timeit
def retrieve_tracks(year, limit):
    offset = 0
    results = []
    while True:
        track_results = sp.search(q=f'year:{year}', type='track', limit=50, offset=offset)
        items = track_results['tracks']['items']
        results.extend(items)
        offset += len(items)
        if len(items) < 50 or offset >= limit:
            break
    return results

start = timeit.default_timer()

# retrieve tracks for year 2023, limit of 1000
tracks = retrieve_tracks(year=2023, limit=1000)

# extract relevant information and store in lists
artist_name = [track['artists'][0]['name'] for track in tracks]
track_name = [track['name'] for track in tracks]
track_id = [track['id'] for track in tracks]
popularity = [track['popularity'] for track in tracks]

stop = timeit.default_timer()
print('Time to run this code (in seconds):', stop - start)

Time to run this code (in seconds): 7.474733628999957


print('number of elements in the track_id list:', len(track_id))

number of elements in the track_id list: 1000


df_tracks = pd.DataFrame({'artist_name':artist_name,'track_name':track_name,'track_id':track_id,'popularity':popularity})
print(df_tracks.shape)
df_tracks.head()

(1000, 4)


# again measuring the time
start = timeit.default_timer()

# empty list, batchsize and the counter for None results
rows = []
batchsize = 100
None_counter = 0

for i in range(0,len(df_tracks['track_id']),batchsize):
    batch = df_tracks['track_id'][i:i+batchsize]
    feature_results = sp.audio_features(batch)
    for i, t in enumerate(feature_results):
        if t == None:
            None_counter = None_counter + 1
        else:
            rows.append(t)
            
print('Number of tracks where no audio features were available:',None_counter)

stop = timeit.default_timer()
print ('Time to run this code (in seconds):',stop - start)

Number of tracks where no audio features were available: 2
Time to run this code (in seconds): 3.023385842000039


df_audio_features = pd.DataFrame.from_dict(rows,orient='columns')
print("Shape of the dataset:", df_audio_features.shape)
df_audio_features.head()

Shape of the dataset: (998, 18)


columns_to_drop = ['analysis_url','track_href','type','uri']
df_audio_features.drop(columns_to_drop, axis=1,inplace=True)

df_audio_features.rename(columns={'id': 'track_id'}, inplace=True)

df_audio_features.shape

(998, 14)


# merge both dataframes
# the 'inner' method will make sure that we only keep track IDs present in both datasets
df = pd.merge(df_tracks,df_audio_features,on='track_id',how='inner')
print("Shape of the dataset:", df_audio_features.shape)
df.head()

Shape of the dataset: (998, 14)


# Save the modified DataFrame to a CSV file
df.to_csv('spotifyfeatures2023v04.csv', index=False)


report = ydata_profiling.ProfileReport(df, title="Pandas Profiling Report")
report

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]


df.describe()


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)



def impute_missing_values(df):
    """Fill missing values using KNN imputation."""
    imputer = KNNImputer(n_neighbors=5)
    return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

def remove_duplicates(df):
    """Remove any duplicate rows."""
    num_duplicates = df.duplicated().sum()
    df.drop_duplicates(inplace=True)
    print(f"Removed {num_duplicates} duplicate rows.")
    return df

def remove_inconsistent_rows(df):
    """Remove any inconsistent rows."""
    #three standard deviations (mean +- (3 * STD))
    popularity_filter = (df['popularity'] >= 0.195220 - (3 * 1.015686)) & (df['popularity'] <= 0.195220 + (3 * 1.015686))
    energy_filter = (df['energy'] >= -0.040686 - (3 * 0.951108)) & (df['energy'] <= -0.040686 + (3 * 0.951108))
    duration_filter = (df['duration_ms'] >=  -0.064580 - (3 * 0.898885) ) & (df['duration_ms'] <= -0.064580 + (3 *  0.898885))
    num_inconsistent = len(df) - len(df.loc[popularity_filter & energy_filter & duration_filter])
    df = df.loc[popularity_filter & energy_filter & duration_filter]
    print(f"Removed {num_inconsistent} inconsistent rows.")
    return df

def remove_invalid_values(df):
    """Check for invalid values and remove the corresponding rows."""
    invalid_row_filter = pd.to_numeric(df['popularity'], errors='coerce').isna()
    if any(invalid_row_filter):
        invalid_row_count = invalid_row_filter.sum()
        df = df.loc[~invalid_row_filter]
        print(f"Removed {invalid_row_count} row(s) with invalid value(s).")
    return df

def non_scale_remove_inconsistent_rows(df):
    """Remove any inconsistent rows."""
    popularity_filter = (df['popularity'] >= 0) & (df['popularity'] <= 100)
    energy_filter = (df['energy'] >= 0) & (df['energy'] <= 1)
    duration_filter = (df['duration_ms'] >= 0) & (df['duration_ms'] <= 600000)
    num_inconsistent = len(df) - len(df.loc[popularity_filter & energy_filter & duration_filter])
    df = df.loc[popularity_filter & energy_filter & duration_filter]
    print(f"Removed {num_inconsistent} inconsistent rows.")
    return df

def clean_data(df):
    """Clean the dataset by imputing missing values, removing duplicates, inconsistent rows, and invalid values."""
    df = impute_missing_values(df)
    df = remove_duplicates(df)
    df = remove_inconsistent_rows(df)
    df = remove_invalid_values(df)
    return df

def non_scale_clean_data(df):
    """Clean the dataset by imputing missing values, removing duplicates, inconsistent rows, and invalid values."""
    df = impute_missing_values(df)
    df = remove_duplicates(df)
    df = non_scale_remove_inconsistent_rows(df)
    df = remove_invalid_values(df)
    return df

def preprocess_data(df):
    """Preprocess the dataset by removing outliers, scaling numerical features, and one-hot encoding categorical features."""
    # Separate the numerical and categorical columns
    numerical_cols = ['popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']
    categorical_cols = ['key', 'mode', 'time_signature']

    # Identify and remove outliers using Isolation Forest algorithm
    clf = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
    df['outlier'] = clf.fit_predict(df[numerical_cols])
    df = df[df['outlier'] == 1].drop('outlier', axis=1)

    # Scale the numerical columns
    scaler = StandardScaler()
    df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

    # One-hot encode the categorical columns
    encoder = OneHotEncoder()
    encoded_cols = []
    for col in categorical_cols:
        encoded_col = encoder.fit_transform(df[[col]]).toarray()
        n_values = encoded_col.shape[1]
        col_names = [f"{col}_{i}" for i in range(n_values)]
        df[col_names] = encoded_col
        encoded_cols.extend(col_names)
    df = df.drop(categorical_cols, axis=1)

     # Check if "time_signature" column exists in the DataFrame
    if "time_signature" in df.columns:
        # Convert "time_signature" column to integer type
        df["time_signature"] = df["time_signature"].astype(int)


    # Clean the dataset by imputing missing values, removing duplicates, inconsistent rows, and invalid values
    df = clean_data(df)

    return df


def select_features(X, y):
    # Feature scaling and one-hot encoding for categorical features
    preprocessor = ColumnTransformer(transformers=[
        ('num', StandardScaler(), X.select_dtypes(include=np.number).columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), X.select_dtypes(include=object).columns)
    ])
    X = preprocessor.fit_transform(X)
    
    # Feature selection using a Random Forest Classifier
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    selector = SelectFromModel(clf)
    X = selector.fit_transform(X, y)
    
    return X, y


# Load the data
spotify_df1 = pd.read_csv('spotifyfeatures2023v04.csv')

spotify_df1 = spotify_df1.drop(['track_name', 'track_id','artist_name'], axis=1)

# Preprocess the data
spotify_df1 = preprocess_data(spotify_df1)

spotify_df1['duration_ms'].describe()

# separate the features and target variable
X = spotify_df1.drop(['popularity'], axis=1)
y = spotify_df1['popularity']


# split the data into training, validation, and testing sets
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.5, random_state=42)

# create the linear regression model and fit it to the training data
lr = LinearRegression()
lr.fit(X_train, y_train)

# perform cross-validation to find optimal hyperparameters
# here, we vary the regularization parameter alpha and use 5-fold cross-validation
alphas = [0.001, 0.01, 0.1, 1, 10]
cv_scores = []
for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    scores = -cross_val_score(ridge, X_trainval, y_trainval, cv=5, scoring='neg_mean_squared_error')
    cv_scores.append(scores.mean())
optimal_alpha = alphas[cv_scores.index(min(cv_scores))]

# retrain the model with optimal hyperparameters using both the training and validation sets
ridge = Ridge(alpha=optimal_alpha)
ridge.fit(X_trainval, y_trainval)

# evaluate the model on the testing set
y_pred = ridge.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('Mean Squared Error:', mse)
print('R^2:', r2)

Removed 173 duplicate rows.
Removed 17 inconsistent rows.
Mean Squared Error: 0.962113062937113
R^2: 0.0645853847564527


# Load data
data1 = df

# Split into features and target
X = data1.drop(['popularity'], axis=1)
y = data1['popularity'].apply(lambda x: 1 if x > 1.190157 else 0)

# Preprocessing pipeline
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), X.select_dtypes(include=np.number).columns),
    ('cat', OneHotEncoder(handle_unknown='ignore'), X.select_dtypes(include=object).columns)
])
X_preprocessed = preprocessor.fit_transform(X)


# Feature selection using a Random Forest Classifier
X_selected, y = select_features(X, y)


# Handle data imbalance using SMOTE
oversample = SMOTE()
X_resampled, y_resampled = oversample.fit_resample(X_selected, y)

# Try Logistic Regression as an alternative algorithm
model = LogisticRegression(random_state=42, max_iter=1000)
param_grid = {'C': [0.01, 0.1, 1, 10]}
grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_resampled, y_resampled)
model = grid_search.best_estimator_

# Evaluate performance using cross-validation
scores = cross_val_score(model, X_resampled, y_resampled, cv=5, scoring='accuracy')
print('Cross-validation scores:', scores)
print('Mean accuracy:', np.mean(scores))

# Evaluate performance on testing set
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on testing set: {:.2f}%".format(accuracy * 100))

Cross-validation scores: [0.78341014 0.79262673 0.70833333 0.66203704 0.8287037 ]
Mean accuracy: 0.7550221880867043
Accuracy on testing set: 86.18%


# Load the data
Scaled_df1 = pd.read_csv('spotifyfeatures2023v04.csv')

Scaled_df1 = preprocess_data(Scaled_df1.drop(['track_name', 'track_id','artist_name'], axis=1))

Scaled_df1.describe()

Removed 173 duplicate rows.
Removed 17 inconsistent rows.


Scaled_df1['duration_ms'].describe()

count    758.000000
mean      -0.064580
std        0.898885
min       -2.676101
25%       -0.641892
50%       -0.116462
75%        0.481821
max        2.500583
Name: duration_ms, dtype: float64


Scaled_df1.columns

Index(['popularity', 'danceability', 'energy', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'key_0', 'key_1', 'key_2', 'key_3', 'key_4', 'key_5',
       'key_6', 'key_7', 'key_8', 'key_9', 'key_10', 'key_11', 'mode_0',
       'mode_1', 'time_signature_0', 'time_signature_1', 'time_signature_2'],
      dtype='object')


# Load the data

Non_Scale_data1 = pd.read_csv('spotifyfeatures2023v04.csv')

Non_Scale_data1 = non_scale_clean_data(Non_Scale_data1.drop(['track_name', 'track_id','artist_name'], axis=1))

Non_Scale_data1.describe()

Removed 177 duplicate rows.
Removed 0 inconsistent rows.


Non_Scale_data1.columns

Index(['popularity', 'danceability', 'energy', 'key', 'loudness', 'mode',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'time_signature'],
      dtype='object')


# Define the features and target variable
#(Ensure that the binary cutoff for binary variable is the 75% cutoff of popularity) 
features = ['danceability', 'energy', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'key_0', 'key_1', 'key_2', 'key_3', 'key_4', 'key_5',
       'key_6', 'key_7', 'key_8', 'key_9', 'key_10', 'key_11', 'mode_0',
       'mode_1', 'time_signature_0', 'time_signature_1', 'time_signature_2']

Scaled_df1['target'] = Scaled_df1['popularity'].apply(lambda x: 1 if x > 1.190157 else 0)

# Split the data into training and testing sets
train, test = train_test_split(Scaled_df1, test_size=0.33, random_state=1)

# Split the training and testing sets into features and target variables
X_train = train[features]
y_train = train['target']
X_test = test[features]
y_test = test['target']

# Create an Auto-sklearn classification model
scaled_model = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=5*60,
                                                        per_run_time_limit=30,
                                                        n_jobs=6,
                                                        metric=autosklearn.metrics.roc_auc,
                                                                   )

# Find cross-validation accuracy scores
cv_accuracy_scores = cross_val_score(scaled_model, X_train, y_train, cv=5)

# Print the cross-validation accuracy scores
print("Cross-validation accuracy scores:", cv_accuracy_scores)

# Find the mean and standard deviation of cross-validation accuracy scores
mean_cv_accuracy = np.mean(cv_accuracy_scores)
std_cv_accuracy = np.std(cv_accuracy_scores)

# Fit the model to the training data
scaled_model.fit(X_train, y_train)

# Score the model
predictions = scaled_model.predict_proba(X_test)

# Calculate the model's performance metrics
roc_auc = roc_auc_score(y_test, predictions[:, 1])
precision = precision_score(y_test, predictions[:, 1].round(), average='binary', zero_division=1)
recall = recall_score(y_test, predictions[:, 1].round(), average='binary')
f1 = f1_score(y_test, predictions[:, 1].round(), average='binary')

# Print the model's performance metrics
print(f"Auto-Sklearn Classifier performance metrics: ROC AUC = {roc_auc:.2f}, Precision = {precision:.2f}, Recall = {recall:.2f}, F1 Score = {f1:.2f}")

Cross-validation accuracy scores: [0.56983806 0.56831984 0.60026316 0.52815789 0.57538462]

	Models besides current dummy model: 0
	Dummy models: 1

	Models besides current dummy model: 0
	Dummy models: 1
Auto-Sklearn Classifier performance metrics: ROC AUC = 0.66, Precision = 0.40, Recall = 0.23, F1 Score = 0.29


%matplotlib inline
# Calculate the fpr and tpr for all thresholds of the model
fpr, tpr, thresholds = roc_curve(y_test, predictions[:, 1])
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.show()


r = permutation_importance(
    scaled_model, X_test, y_test,
    n_repeats=10, random_state=0, n_jobs=4,
    scoring='roc_auc',
)

fig = plt.figure(figsize=(20, 12))
ax = fig.add_subplot(111)
sort_idx = r.importances_mean.argsort()
ax.boxplot(r.importances[sort_idx].T, labels=[X_test.columns[i] for i in sort_idx], vert=False)
for label in ax.get_xticklabels() + ax.get_yticklabels():
    label.set_fontsize(24)
fig.tight_layout()

plt.savefig('FeatureImportance_roc_auc.png')


# Get the weights of each feature
weights = scaled_model.get_models_with_weights()
    
# Print the weights of each layer
print(weights)

[(0.28, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 1.3692488379612932e-08, 'classifier:gradient_boosting:learning_rate': 0.8288685061585398, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 20, 'classifier:gradient_boosting:min_samples_leaf': 28, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 2},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.16, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 2.3302874783091707e-08, 'classifier:gradient_boosting:learning_rate': 0.20076124721016717, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 419, 'classifier:gradient_boosting:min_samples_leaf': 29, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 18},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.1, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 1.2563922658725854e-08, 'classifier:gradient_boosting:learning_rate': 0.09141931987765436, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 23, 'classifier:gradient_boosting:min_samples_leaf': 30, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 2},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.08, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 2.3302874783091707e-08, 'classifier:gradient_boosting:learning_rate': 0.20076124721016717, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 119, 'classifier:gradient_boosting:min_samples_leaf': 29, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 3, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 11},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.08, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 3.153828542860241e-07, 'classifier:gradient_boosting:learning_rate': 0.09114872782470299, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 48, 'classifier:gradient_boosting:min_samples_leaf': 30, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 3},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.04, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 2.3005699807887665e-08, 'classifier:gradient_boosting:learning_rate': 0.09592271478287911, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 62, 'classifier:gradient_boosting:min_samples_leaf': 32, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'True', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 19},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'extra_trees_preproc_for_classification', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 1.538125833944859e-09, 'classifier:gradient_boosting:learning_rate': 0.46654288946256744, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 7, 'classifier:gradient_boosting:min_samples_leaf': 16, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'median', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:extra_trees_preproc_for_classification:bootstrap': 'True', 'feature_preprocessor:extra_trees_preproc_for_classification:criterion': 'entropy', 'feature_preprocessor:extra_trees_preproc_for_classification:max_depth': 'None', 'feature_preprocessor:extra_trees_preproc_for_classification:max_features': 0.9070521907099777, 'feature_preprocessor:extra_trees_preproc_for_classification:max_leaf_nodes': 'None', 'feature_preprocessor:extra_trees_preproc_for_classification:min_impurity_decrease': 0.0, 'feature_preprocessor:extra_trees_preproc_for_classification:min_samples_leaf': 4, 'feature_preprocessor:extra_trees_preproc_for_classification:min_samples_split': 11, 'feature_preprocessor:extra_trees_preproc_for_classification:min_weight_fraction_leaf': 0.0, 'feature_preprocessor:extra_trees_preproc_for_classification:n_estimators': 100, 'classifier:gradient_boosting:n_iter_no_change': 12},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'feature_agglomeration', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 1.1071840086322682e-07, 'classifier:gradient_boosting:learning_rate': 0.1683884426743291, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 119, 'classifier:gradient_boosting:min_samples_leaf': 8, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'quantile_transformer', 'feature_preprocessor:feature_agglomeration:affinity': 'euclidean', 'feature_preprocessor:feature_agglomeration:linkage': 'average', 'feature_preprocessor:feature_agglomeration:n_clusters': 33, 'feature_preprocessor:feature_agglomeration:pooling_func': 'mean', 'classifier:gradient_boosting:n_iter_no_change': 20, 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles': 1895, 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution': 'uniform'},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'no_preprocessing', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 1.1459986546422021e-10, 'classifier:gradient_boosting:learning_rate': 0.289519534249227, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 42, 'classifier:gradient_boosting:min_samples_leaf': 27, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'median', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'quantile_transformer', 'classifier:gradient_boosting:n_iter_no_change': 16, 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles': 913, 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution': 'uniform'},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 4.184483472956591e-06, 'classifier:gradient_boosting:learning_rate': 0.1600745231405469, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 89, 'classifier:gradient_boosting:min_samples_leaf': 10, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 3, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 14},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 3.6553821298338375e-08, 'classifier:gradient_boosting:learning_rate': 0.7094988520768325, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 328, 'classifier:gradient_boosting:min_samples_leaf': 25, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'quantile_transformer', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'True', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 20, 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles': 1735, 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution': 'uniform'},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'random_forest', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:max_features': 0.013104702559186276, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 4, 'classifier:random_forest:min_samples_split': 12, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'median', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'quantile_transformer', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'True', 'feature_preprocessor:polynomial:interaction_only': 'False', 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles': 1185, 'data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution': 'normal'},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 3.5862877582731166e-06, 'classifier:gradient_boosting:learning_rate': 0.15709132481447288, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 30, 'classifier:gradient_boosting:min_samples_leaf': 37, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 3, 'feature_preprocessor:polynomial:include_bias': 'True', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 2},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'valid', 'classifier:gradient_boosting:l2_regularization': 2.4098568440222188e-08, 'classifier:gradient_boosting:learning_rate': 0.19135609457038777, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 145, 'classifier:gradient_boosting:min_samples_leaf': 27, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 3, 'feature_preprocessor:polynomial:include_bias': 'True', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 2, 'classifier:gradient_boosting:validation_fraction': 0.1},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 4.513669113408288e-07, 'classifier:gradient_boosting:learning_rate': 0.27496084119289055, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 178, 'classifier:gradient_boosting:min_samples_leaf': 42, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 14},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'adaboost', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'select_percentile_classification', 'classifier:adaboost:algorithm': 'SAMME', 'classifier:adaboost:learning_rate': 0.06141308175234524, 'classifier:adaboost:max_depth': 3, 'classifier:adaboost:n_estimators': 384, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'none', 'feature_preprocessor:select_percentile_classification:percentile': 63.37846687117217, 'feature_preprocessor:select_percentile_classification:score_func': 'f_classif'},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'weighting', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 2.378931455133502e-08, 'classifier:gradient_boosting:learning_rate': 0.44689457344657213, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 574, 'classifier:gradient_boosting:min_samples_leaf': 100, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'median', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 20},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 1.8975927775258082e-08, 'classifier:gradient_boosting:learning_rate': 0.09649678975771797, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 41, 'classifier:gradient_boosting:min_samples_leaf': 34, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 3, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 20},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False})), (0.02, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'gradient_boosting', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:gradient_boosting:early_stop': 'train', 'classifier:gradient_boosting:l2_regularization': 3.4687330484315174e-07, 'classifier:gradient_boosting:learning_rate': 0.056043395851908644, 'classifier:gradient_boosting:loss': 'auto', 'classifier:gradient_boosting:max_bins': 255, 'classifier:gradient_boosting:max_depth': 'None', 'classifier:gradient_boosting:max_leaf_nodes': 139, 'classifier:gradient_boosting:min_samples_leaf': 30, 'classifier:gradient_boosting:scoring': 'loss', 'classifier:gradient_boosting:tol': 1e-07, 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean', 'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:polynomial:degree': 2, 'feature_preprocessor:polynomial:include_bias': 'False', 'feature_preprocessor:polynomial:interaction_only': 'True', 'classifier:gradient_boosting:n_iter_no_change': 3},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multiclass': False,
  'target_type': 'classification',
  'signed': False}))]


# summarize
print(scaled_model.sprint_statistics())

auto-sklearn results:
  Dataset name: 2e8c8efe-d7c5-11ed-803d-00155d73b8cb
  Metric: roc_auc
  Best validation score: 0.702003
  Number of target algorithm runs: 169
  Number of successful target algorithm runs: 152
  Number of crashed target algorithm runs: 13
  Number of target algorithms that exceeded the time limit: 4
  Number of target algorithms that exceeded the memory limit: 0


scaled_model.leaderboard()


print(scaled_model.show_models())

{16: {'model_id': 16, 'rank': 1, 'cost': 0.30404383975812554, 'ensemble_weight': 0.16, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3cde445600>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3cff6c99f0>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3cff6c9270>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=2.3302874783091707e-08,
                               learning_rate=0.20076124721016717, loss='auto',
                               max_iter=512, max_leaf_nodes=419,
                               min_samples_leaf=29, n_iter_no_change=18,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 20: {'model_id': 20, 'rank': 2, 'cost': 0.3242630385487528, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2a286710>, 'balancing': Balancing(random_state=1, strategy='weighting'), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2970eb60>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2970d180>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=1.538125833944859e-09,
                               learning_rate=0.46654288946256744, loss='auto',
                               max_iter=512, max_leaf_nodes=7,
                               min_samples_leaf=16, n_iter_no_change=12,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 65: {'model_id': 65, 'rank': 3, 'cost': 0.32294028722600143, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2a2047f0>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2970ee90>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2970cc70>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=1.1071840086322682e-07,
                               learning_rate=0.1683884426743291, loss='auto',
                               max_iter=512, max_leaf_nodes=119,
                               min_samples_leaf=8, n_iter_no_change=20,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 66: {'model_id': 66, 'rank': 4, 'cost': 0.3458049886621314, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3cff15cfd0>, 'balancing': Balancing(random_state=1, strategy='weighting'), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2ae72c20>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2ae73970>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=1.1459986546422021e-10,
                               learning_rate=0.289519534249227, loss='auto',
                               max_iter=512, max_leaf_nodes=42,
                               min_samples_leaf=27, n_iter_no_change=16,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 73: {'model_id': 73, 'rank': 5, 'cost': 0.3361678004535148, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2970d420>, 'balancing': Balancing(random_state=1, strategy='weighting'), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2a370d30>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2a370730>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=4.184483472956591e-06,
                               learning_rate=0.1600745231405469, loss='auto',
                               max_iter=512, max_leaf_nodes=89,
                               min_samples_leaf=10, n_iter_no_change=14,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 74: {'model_id': 74, 'rank': 6, 'cost': 0.33635676492819355, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2ae723e0>, 'balancing': Balancing(random_state=1, strategy='weighting'), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d20895d80>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d209cd480>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.6553821298338375e-08,
                               learning_rate=0.7094988520768325, loss='auto',
                               max_iter=512, max_leaf_nodes=328,
                               min_samples_leaf=25, n_iter_no_change=20,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 91: {'model_id': 91, 'rank': 7, 'cost': 0.29799697656840507, 'ensemble_weight': 0.08, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3cff3e4ca0>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d298bfa60>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d298bcd60>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=2.3302874783091707e-08,
                               learning_rate=0.20076124721016717, loss='auto',
                               max_iter=512, max_leaf_nodes=119,
                               min_samples_leaf=29, n_iter_no_change=11,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 93: {'model_id': 93, 'rank': 8, 'cost': 0.35015117157974296, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3cd28d8190>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2ab34b20>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2ab35d80>, 'sklearn_classifier': RandomForestClassifier(max_features=1, min_samples_leaf=4, min_samples_split=12,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)}, 119: {'model_id': 119, 'rank': 9, 'cost': 0.3197278911564626, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d20e90f10>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2ab379d0>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2ab35b10>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.5862877582731166e-06,
                               learning_rate=0.15709132481447288, loss='auto',
                               max_iter=512, max_leaf_nodes=30,
                               min_samples_leaf=37, n_iter_no_change=2,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 121: {'model_id': 121, 'rank': 10, 'cost': 0.3595049130763416, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d298bc250>, 'balancing': Balancing(random_state=1, strategy='weighting'), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3cd28cc760>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3cd28ce320>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=2.4098568440222188e-08,
                               learning_rate=0.19135609457038777, loss='auto',
                               max_iter=8, max_leaf_nodes=145,
                               min_samples_leaf=27, n_iter_no_change=2,
                               random_state=1, warm_start=True)}, 124: {'model_id': 124, 'rank': 11, 'cost': 0.32331821617535905, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2ab36a40>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d29c9cbb0>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d29c9f430>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=4.513669113408288e-07,
                               learning_rate=0.27496084119289055, loss='auto',
                               max_iter=512, max_leaf_nodes=178,
                               min_samples_leaf=42, n_iter_no_change=14,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 125: {'model_id': 125, 'rank': 12, 'cost': 0.326341647770219, 'ensemble_weight': 0.28, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3cd28cda80>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2a107370>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2a107760>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=1.3692488379612932e-08,
                               learning_rate=0.8288685061585398, loss='auto',
                               max_iter=128, max_leaf_nodes=20,
                               min_samples_leaf=28, n_iter_no_change=2,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 136: {'model_id': 136, 'rank': 13, 'cost': 0.3605442176870749, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3cd27303d0>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d29c705e0>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d29c73640>, 'sklearn_classifier': AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=DecisionTreeClassifier(max_depth=3),
                   learning_rate=0.06141308175234524, n_estimators=384,
                   random_state=1)}, 141: {'model_id': 141, 'rank': 14, 'cost': 0.3163265306122449, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d29c9f190>, 'balancing': Balancing(random_state=1, strategy='weighting'), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3cd26b1750>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3cd26b1330>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=2.378931455133502e-08,
                               learning_rate=0.44689457344657213, loss='auto',
                               max_iter=512, max_leaf_nodes=574,
                               min_samples_leaf=100, n_iter_no_change=20,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 142: {'model_id': 142, 'rank': 15, 'cost': 0.31027966742252466, 'ensemble_weight': 0.08, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2a106a40>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2a84f310>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2a84d8d0>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.153828542860241e-07,
                               learning_rate=0.09114872782470299, loss='auto',
                               max_iter=512, max_leaf_nodes=48,
                               min_samples_leaf=30, n_iter_no_change=3,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 145: {'model_id': 145, 'rank': 16, 'cost': 0.29856386999244133, 'ensemble_weight': 0.1, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2086c790>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2abfabc0>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2abf9000>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=1.2563922658725854e-08,
                               learning_rate=0.09141931987765436, loss='auto',
                               max_iter=512, max_leaf_nodes=23,
                               min_samples_leaf=30, n_iter_no_change=2,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 148: {'model_id': 148, 'rank': 17, 'cost': 0.30272108843537404, 'ensemble_weight': 0.04, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3cd26b3970>, 'balancing': Balancing(random_state=1, strategy='weighting'), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d2ae70ac0>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d2ae72b30>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=2.3005699807887665e-08,
                               learning_rate=0.09592271478287911, loss='auto',
                               max_iter=512, max_leaf_nodes=62,
                               min_samples_leaf=32, n_iter_no_change=19,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 153: {'model_id': 153, 'rank': 18, 'cost': 0.3303099017384732, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d2a84c940>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3d29e26200>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3d29e25f00>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=1.8975927775258082e-08,
                               learning_rate=0.09649678975771797, loss='auto',
                               max_iter=512, max_leaf_nodes=41,
                               min_samples_leaf=34, n_iter_no_change=20,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}, 159: {'model_id': 159, 'rank': 19, 'cost': 0.3053665910808768, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f3d20ec9450>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f3cd29f9450>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f3cff3a87c0>, 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.4687330484315174e-07,
                               learning_rate=0.056043395851908644, loss='auto',
                               max_iter=512, max_leaf_nodes=139,
                               min_samples_leaf=30, n_iter_no_change=3,
                               random_state=1, validation_fraction=None,
                               warm_start=True)}}


# # For compatibility with scikit-learn we implement `cv_results_`
# print(scaled_model.cv_results_)

	artist_name	track_name	track_id	popularity
0	PinkPantheress	Boy's a liar Pt. 2	6AQbmUe0Qwf5PZnt4HmTXv	97
1	Miley Cyrus	Flowers	0yLdNVWF3Srea0uzk55zFn	100
2	Morgan Wallen	Last Night	59uQI0PADDKeE6UZDTJEe8	89
3	Morgan Wallen	Last Night	7K3BhSpAxZBznislvUMVtn	88
4	Morgan Wallen	Thinkin’ Bout Me	0PAcdVzhPO4gq1Iym9ESnK	86

	danceability	energy	key	loudness	mode	speechiness	acousticness	instrumentalness	liveness	valence	tempo	type	id	uri	track_href	analysis_url	duration_ms	time_signature
0	0.696	0.809	5	-8.254	1	0.0500	0.2520	0.000128	0.2480	0.857	132.962	audio_features	6AQbmUe0Qwf5PZnt4HmTXv	spotify:track:6AQbmUe0Qwf5PZnt4HmTXv	https://api.spotify.com/v1/tracks/6AQbmUe0Qwf5...	https://api.spotify.com/v1/audio-analysis/6AQb...	131013	4
1	0.707	0.681	0	-4.325	1	0.0668	0.0632	0.000005	0.0322	0.646	117.999	audio_features	0yLdNVWF3Srea0uzk55zFn	spotify:track:0yLdNVWF3Srea0uzk55zFn	https://api.spotify.com/v1/tracks/0yLdNVWF3Sre...	https://api.spotify.com/v1/audio-analysis/0yLd...	200455	4
2	0.517	0.675	6	-5.382	1	0.0357	0.4590	0.000000	0.1510	0.518	203.853	audio_features	59uQI0PADDKeE6UZDTJEe8	spotify:track:59uQI0PADDKeE6UZDTJEe8	https://api.spotify.com/v1/tracks/59uQI0PADDKe...	https://api.spotify.com/v1/audio-analysis/59uQ...	163855	4
3	0.492	0.675	6	-5.456	1	0.0389	0.4670	0.000000	0.1420	0.478	203.759	audio_features	7K3BhSpAxZBznislvUMVtn	spotify:track:7K3BhSpAxZBznislvUMVtn	https://api.spotify.com/v1/tracks/7K3BhSpAxZBz...	https://api.spotify.com/v1/audio-analysis/7K3B...	163855	4
4	0.656	0.757	3	-5.775	0	0.0308	0.4920	0.000000	0.1170	0.429	139.971	audio_features	0PAcdVzhPO4gq1Iym9ESnK	spotify:track:0PAcdVzhPO4gq1Iym9ESnK	https://api.spotify.com/v1/tracks/0PAcdVzhPO4g...	https://api.spotify.com/v1/audio-analysis/0PAc...	177388	4

	artist_name	track_name	track_id	popularity	danceability	energy	key	loudness	mode	speechiness	acousticness	instrumentalness	liveness	valence	tempo	duration_ms	time_signature
0	PinkPantheress	Boy's a liar Pt. 2	6AQbmUe0Qwf5PZnt4HmTXv	97	0.696	0.809	5	-8.254	1	0.0500	0.2520	0.000128	0.2480	0.857	132.962	131013	4
1	Miley Cyrus	Flowers	0yLdNVWF3Srea0uzk55zFn	100	0.707	0.681	0	-4.325	1	0.0668	0.0632	0.000005	0.0322	0.646	117.999	200455	4
2	Morgan Wallen	Last Night	59uQI0PADDKeE6UZDTJEe8	89	0.517	0.675	6	-5.382	1	0.0357	0.4590	0.000000	0.1510	0.518	203.853	163855	4
3	Morgan Wallen	Last Night	7K3BhSpAxZBznislvUMVtn	88	0.492	0.675	6	-5.456	1	0.0389	0.4670	0.000000	0.1420	0.478	203.759	163855	4
4	Morgan Wallen	Thinkin’ Bout Me	0PAcdVzhPO4gq1Iym9ESnK	86	0.656	0.757	3	-5.775	0	0.0308	0.4920	0.000000	0.1170	0.429	139.971	177388	4

	popularity	danceability	energy	key	loudness	mode	speechiness	acousticness	instrumentalness	liveness	valence	tempo	duration_ms	time_signature
count	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000	998.000000
mean	32.168337	0.638227	0.651112	5.225451	-6.371672	0.529058	0.095876	0.236224	0.015087	0.182434	0.474137	121.593969	206397.752505	3.951904
std	37.144405	0.142282	0.186641	3.612954	2.821479	0.499405	0.094535	0.268231	0.079293	0.129081	0.228448	28.028740	48050.199661	0.289730
min	0.000000	0.206000	0.056200	0.000000	-23.023000	0.000000	0.023200	0.000146	0.000000	0.032200	0.038500	40.319000	28428.000000	3.000000
25%	0.000000	0.546000	0.543250	2.000000	-7.664000	0.000000	0.037500	0.037000	0.000000	0.095525	0.304000	99.994000	177270.250000	4.000000
50%	2.000000	0.655500	0.676500	5.000000	-5.848000	1.000000	0.056000	0.115000	0.000002	0.128000	0.468000	121.961000	201354.500000	4.000000
75%	75.000000	0.737000	0.796000	8.000000	-4.424000	1.000000	0.105500	0.349000	0.000113	0.237000	0.647000	141.013500	229627.000000	4.000000
max	100.000000	0.957000	0.965000	11.000000	-0.484000	1.000000	0.564000	0.985000	0.922000	0.844000	0.961000	205.432000	588139.000000	5.000000

	popularity	danceability	energy	loudness	speechiness	acousticness	instrumentalness	liveness	valence	tempo	...	key_7	key_8	key_9	key_10	key_11	mode_0	mode_1	time_signature_0	time_signature_1	time_signature_2
count	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	...	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000	758.000000
mean	0.195220	0.040052	-0.040686	-0.025121	0.047757	0.031731	-0.011913	-0.012605	0.021278	-0.003372	...	0.102902	0.076517	0.077836	0.044855	0.097625	0.449868	0.550132	0.071240	0.914248	0.014512
std	1.015686	1.013521	0.951108	0.961295	1.024353	0.984303	1.003060	1.009090	1.000854	1.034064	...	0.304032	0.265999	0.268091	0.207122	0.297003	0.497809	0.497809	0.257395	0.280182	0.119667
min	-0.855759	-3.187398	-2.757992	-3.691287	-0.769914	-0.864562	-0.207790	-1.194396	-1.983555	-2.147700	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	-0.828839	-0.627373	-0.659277	-0.537223	-0.603192	-0.699477	-0.207790	-0.678862	-0.746704	-0.832818	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000
50%	0.234499	0.136080	0.060530	0.116481	-0.405958	-0.372045	-0.207749	-0.421905	0.007418	-0.043832	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000	1.000000	0.000000
75%	1.190157	0.770164	0.722521	0.646873	0.203177	0.479859	-0.205132	0.356260	0.761541	0.743577	...	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	1.000000	0.000000	1.000000	0.000000
max	1.836235	2.286136	1.734298	2.406498	4.316751	2.951384	9.958407	5.385962	2.120742	3.019289	...	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000

Business Problem¶

Data Collection¶

Analysis Performed¶

Limitations¶

Import needed Libaries¶

Follow steps in the Data Section for cid and secret¶

LOGISTIC REGRESSION¶

Classification Model¶

Primary Model (Scaled AutoSklearn Model)¶

Conclusion¶

Summary¶

produces execessive results so uncomment when you need information on models.¶

	popularity	danceability	energy	key	loudness	mode	speechiness	acousticness	instrumentalness	liveness	valence	tempo	duration_ms	time_signature
count	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000	821.000000
mean	38.969549	0.639764	0.641998	5.135201	-6.478495	0.548112	0.100601	0.245340	0.015266	0.181883	0.473063	121.526676	206687.836784	3.947625
std	37.533956	0.145194	0.179986	3.638546	2.794170	0.497983	0.097780	0.265665	0.081852	0.130706	0.229622	28.921098	50531.145410	0.297854
min	0.000000	0.206000	0.056200	0.000000	-23.023000	0.000000	0.023200	0.000146	0.000000	0.032200	0.038500	40.319000	28428.000000	3.000000
25%	1.000000	0.539000	0.538000	2.000000	-7.664000	0.000000	0.038500	0.042800	0.000000	0.096700	0.297000	98.194000	177507.000000	4.000000
50%	41.000000	0.657000	0.666000	5.000000	-5.941000	1.000000	0.057700	0.131000	0.000002	0.126000	0.468000	120.020000	200787.000000	4.000000
75%	76.000000	0.743000	0.782000	8.000000	-4.695000	1.000000	0.114000	0.378000	0.000116	0.230000	0.643000	141.889000	229805.000000	4.000000
max	100.000000	0.957000	0.965000	11.000000	-0.484000	1.000000	0.564000	0.985000	0.922000	0.844000	0.961000	205.432000	588139.000000	5.000000

	rank	ensemble_weight	type	cost	duration
model_id
91	1	0.08	gradient_boosting	0.297997	19.332773
145	2	0.10	gradient_boosting	0.298564	3.993547
148	3	0.04	gradient_boosting	0.302721	3.276362
16	4	0.16	gradient_boosting	0.304044	3.787258
159	5	0.02	gradient_boosting	0.305367	4.559798
142	6	0.08	gradient_boosting	0.310280	3.938094
141	7	0.02	gradient_boosting	0.316327	2.423466
119	8	0.02	gradient_boosting	0.319728	18.579422
65	9	0.02	gradient_boosting	0.322940	1.031392
124	10	0.02	gradient_boosting	0.323318	3.257089
20	11	0.02	gradient_boosting	0.324263	1.393689
125	12	0.28	gradient_boosting	0.326342	1.549060
153	13	0.02	gradient_boosting	0.330310	22.883854
73	14	0.02	gradient_boosting	0.336168	28.889164
74	15	0.02	gradient_boosting	0.336357	2.618066
66	16	0.02	gradient_boosting	0.345805	1.070136
93	17	0.02	random_forest	0.350151	2.028976
121	18	0.02	gradient_boosting	0.359505	1.935010
136	19	0.02	adaboost	0.360544	1.258639