
import numpy as np

from sklearn.metrics import  auc,  precision_recall_curve,f1_score

from horosvm import horo_svm

dataset = 'worker'
data_path = 'data/wordnet' + dataset +'/'
dim = 2

hsvm = horo_svm(C = 5, batch_size = 10, verbose = 0, multiclass = False)

F1 = np.zeros(3)

for i in range(3):
    data_name = dataset + '_' + str(dim)  + '_poincare_embedding.npz'
    data = np.load(data_path + data_name)
    n_positives = len(data['x_train'][data['y_train'] == 1])
    print('The data set {} has {} positive samples'.format(data_name, n_positives))
    idx0 = np.where(data['y_train'] == 0)[0]
    idx0 = np.random.choice(idx0, n_positives*(dim-1), replace=True)
    idx1 = np.where(data['y_train'] == 1)[0]
    idx1 = np.random.choice(idx1, n_positives*(dim-1), replace=True)
    idx = np.concatenate((idx0, idx1))
    X_train = data['x_train'][idx]
    y_train = data['y_train'][idx]
    print('The data set {} has {} negative samples and {} positive samples after sampling'.format(data_name, len(idx0), len(idx1)))

    hsvm.fit(X_train, y_train)
    y_pred = hsvm.predict(data['x_test'])
    print('The accuracy of the model on the test set is {}'.format(hsvm.accuracy(data['x_test'], data['y_test'])))
    f1 = f1_score(data['y_test'],y_pred)
    print('The F1 score of the model on the test set is {}'.format(f1))
    F1[i] = f1


print('The average F1 score of the model on the test set is {}'.format(np.mean(F1)))
print('The standard deviation of the F1 score of the model on the test set is {}'.format(np.std(F1)))

