GitHub
Bock,R.. (2007). MAGIC Gamma Telescope. UCI Machine Learning Repository. https://doi.org/10.24432/C52C8B.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
cols = ['fLength', 'fWidth', 'fSize','fConc', 'fConcl', 'fAsym', 'fM3Long', 'fM3Trans','fAlpha', 'fDist', 'class' ]
df = pd.read_csv('/content/magic04.data', names=cols)
df.head()
df['class'] = (df['class'] == "g").astype(int)
df.head(2)
for label in cols[:-1]:
plt.hist(df[df['class']== 1][label], color='blue', label='gamma', alpha= 0.7, density=True)
plt.hist(df[df['class']== 0][label], color='red', label='gamma', alpha= 0.7, density=True)
plt.title(label)
plt.ylabel("Propability")
plt.xlabel(label)
plt.legend()
plt.show()
def scale_dataset(dataframe, oversample=False):
y = dataframe[dataframe.columns[-1]].values
X = dataframe[dataframe.columns[:-1]].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
if oversample:
ros = RandomOverSampler()
X_scaled, y = ros.fit_resample(X_scaled, y)
data = np.hstack((X_scaled, np.reshape(y, (-1, 1))))
return data, X_scaled, y
train, valid, test = np.split(df.sample(frac=1), [int(0.6*len(df)), int(0.8*len(df))])
train, X_train, y_train = scale_dataset(train, oversample=True)
valid, X_valid, y_valid = scale_dataset(valid, oversample=True)
test, X_test, y_test = scale_dataset(test, oversample=False)
KNN