29 lines
873 B
Python
29 lines
873 B
Python
|
import pandas as pd
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.ensemble import RandomForestClassifier
|
||
|
from sklearn.svm import SVR
|
||
|
from sklearn import preprocessing
|
||
|
from sklearn import utils
|
||
|
|
||
|
data = pd.read_csv('starclusters-global-parameters2.dat',skiprows=1 ,delim_whitespace=True, header=None)
|
||
|
|
||
|
shuffled_data = data.sample(frac=1, random_state=42)
|
||
|
n = int(0.8 * len(shuffled_data))
|
||
|
|
||
|
data_train = shuffled_data[:n]
|
||
|
data_test = shuffled_data[n:]
|
||
|
|
||
|
X_train = data_train.iloc[:, 1:-1].values
|
||
|
y_train = data_train.iloc[:, -1].values
|
||
|
|
||
|
lab = preprocessing.LabelEncoder()
|
||
|
y_transformed = lab.fit_transform(y_train)
|
||
|
|
||
|
X_test = data_test.iloc[:, 1:-1].values
|
||
|
|
||
|
classifier = RandomForestClassifier()
|
||
|
classifier.fit(X_train, y_transformed)
|
||
|
y_pred = classifier.predict(X_test)
|
||
|
|
||
|
data_test['8'] = list(y_pred)
|
||
|
data_test.to_csv('prediction.dat', sep=' ', index=False)
|