1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
| import random import numpy as np import sklearn.svm as svm from sklearn.datasets.samples_generator import make_classification from sklearn.externals import joblib import warnings; warnings.filterwarnings(action='ignore')
class TSVM(object): def __init__(self, kernel='linear'): self.Cl, self.Cu = 1.5, 0.001 self.kernel = kernel self.clf = svm.SVC(C=1.5, kernel=self.kernel)
def train(self, X1, Y1, X2): N = len(X1) + len(X2) sample_weight = np.ones(N) sample_weight[len(X1):] = self.Cu
self.clf.fit(X1, Y1) Y2 = self.clf.predict(X2) Y2 = Y2.reshape(-1,1) X = np.vstack([X1, X2]) Y = np.vstack([Y1, Y2]) Y2_id = np.arange(len(X2)) while self.Cu < self.Cl: self.clf.fit(X, Y, sample_weight=sample_weight) while True: Y2_decision = self.clf.decision_function(X2) Y2 = Y2.reshape(-1) epsilon = 1 - Y2 * Y2_decision negative_max_id = Y2_id[epsilon==min(epsilon)]
print(epsilon[negative_max_id][0]) if epsilon[negative_max_id][0] > 0: pool = list(set(np.unique(Y1))-set(Y2[negative_max_id])) Y2[negative_max_id] = random.choice(pool) Y2 = Y2.reshape(-1, 1) Y = np.vstack([Y1, Y2]) self.clf.fit(X, Y, sample_weight=sample_weight) else: break self.Cu = min(2*self.Cu, self.Cl) sample_weight[len(X1):] = self.Cu
def score(self, X, Y): return self.clf.score(X, Y)
def predict(self, X): return self.clf.predict(X)
def save(self, path='./TSVM.model'): joblib.dump(self.clf, path)
def load(self, model_path='./TSVM.model'): self.clf = joblib.load(model_path)
if __name__ == '__main__': features, labels = make_classification(n_samples=200, n_features=3, n_redundant=1, n_repeated=0, n_informative=2, n_clusters_per_class=2) n_given = 70 X1 = np.copy(features)[:n_given] X2 = np.copy(features)[n_given:] Y1 = np.array(np.copy(labels)[:n_given]).reshape(-1,1) Y2_labeled = np.array(np.copy(labels)[n_given:]).reshape(-1,1)
model = TSVM() model.train(X1, Y1, X2)
accuracy = model.score(X2, Y2_labeled) print(accuracy)
|