Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 2

# -*- coding: utf-8 -*-

"""
Created on Wed Sep 18 08:40:18 2019

@author: kashif
"""

import copy
import itertools

import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import make_blobs

from mpl_toolkits.mplot3d import Axes3D


import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(123)

def toy_experiment(data_pars):
"""Performs grid search on data generated subject to the input parameters"""
default_data_pars = dict(n_samples=300, n_features=3, centers=2,
random_state=0, cluster_std=10)
default_data_pars.update(data_pars)
X, y = make_blobs(**default_data_pars)

tuning_pars = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],


'C': [1, 10, 100, 1000]},
{'kernel': ['poly'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]}]

return GridSearchCV(SVC(), tuning_pars).fit(X, y)

def search_variance_as_func_of_data_shape(search_pars, verbose=True):


"""
Runs `toy_experiment` on permutations of arguments provided in search_pars.
Summarizes each experiment by extracting the mean_test_score from GridSearchCV,
and reporting the variance and count of unique values in mean_test_score.
"""
results = []
arg_names = search_pars.keys()
candidate_vals = search_pars.values()
candidates = itertools.product(*candidate_vals)
for candidate in candidates:
data_pars = {k:v for k,v in zip(arg_names, candidate)}
scores = toy_experiment(data_pars).cv_results_['mean_test_score']
data_pars['n_unq'] = len(np.unique(scores))
data_pars['var'] = np.var(scores)
results.append(data_pars)
if verbose:
print(data_pars)
return pd.DataFrame(results)

def surface_plot(df, x, y, z, title):


fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(df[x], df[y], df[z], cmap=plt.cm.viridis, linewidth=0.2)
plt.title(title)
plt.show()

cluster_std_candidates = np.concatenate([np.linspace(0.1, 0.9, 9), np.linspace(1,


10, 10)])
search_pars = {'cluster_std':cluster_std_candidates, 'n_samples':[100, 300, 500,
1000]}
df = search_variance_as_func_of_data_shape(search_pars)
df2 = df[df['cluster_std']<2]

surface_plot(df, 'n_samples', 'cluster_std', 'n_unq', "Unique CV accuracies")


surface_plot(df2, 'n_samples', 'cluster_std', 'n_unq', "Unique CV accuracies,
0<cluster_std<=1")

You might also like