GitHub


Untitled

Untitled

Untitled

Dataset Information

Bock,R.. (2007). MAGIC Gamma Telescope. UCI Machine Learning Repository. https://doi.org/10.24432/C52C8B.

Step 0. Install Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
cols = ['fLength', 'fWidth', 'fSize','fConc', 'fConcl', 'fAsym', 'fM3Long', 'fM3Trans','fAlpha', 'fDist', 'class' ]
df = pd.read_csv('/content/magic04.data', names=cols)
df.head()
df['class'] = (df['class'] == "g").astype(int)

df.head(2)
for label in cols[:-1]:
  plt.hist(df[df['class']== 1][label], color='blue', label='gamma', alpha= 0.7, density=True)
  plt.hist(df[df['class']== 0][label], color='red', label='gamma', alpha= 0.7, density=True)
  plt.title(label)
  plt.ylabel("Propability")
  plt.xlabel(label)
  plt.legend()
  plt.show()

Train, validation, test dataset

def scale_dataset(dataframe, oversample=False):
    y = dataframe[dataframe.columns[-1]].values
    X = dataframe[dataframe.columns[:-1]].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    if oversample:
        ros = RandomOverSampler()
        X_scaled, y = ros.fit_resample(X_scaled, y)

    data = np.hstack((X_scaled, np.reshape(y, (-1, 1))))

    return data, X_scaled, y
train, valid, test = np.split(df.sample(frac=1), [int(0.6*len(df)), int(0.8*len(df))])
train, X_train, y_train = scale_dataset(train, oversample=True)
valid, X_valid, y_valid = scale_dataset(valid, oversample=True)
test, X_test, y_test = scale_dataset(test, oversample=False)

Untitled

KNN