Skip to content

Commit

Permalink
Clueanup 1
Browse files Browse the repository at this point in the history
  • Loading branch information
JKomorniczak committed Oct 24, 2023
1 parent 11736ca commit 267a983
Show file tree
Hide file tree
Showing 58 changed files with 32 additions and 81 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
9 changes: 2 additions & 7 deletions extract_real.py → E1_extract_real.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Przygotowanie strumieni i meta - rzeczywiste
Experiment 1 -- collect streams and metafetaures -- real-world
"""

import numpy as np
Expand Down Expand Up @@ -38,7 +38,7 @@
fname=(f.split('/')[1]).split('.')[0]

drfs = np.load('real_streams_gt/%s.npy' % fname)

concept=0
out = []

Expand All @@ -48,7 +48,6 @@
# GET CONCEPT
if chunk in drfs:
concept+=1
# print(concept)

# CALCULATE
try:
Expand All @@ -58,17 +57,13 @@
break

if len(np.unique(y))<2:
# print('skip', chunk)
continue
# exit()

mfe = MFE(groups=[measure_key])
mfe.fit(X,y)
ft_labels, ft = mfe.extract()
ft.append(concept)

out.append(ft)
# print(np.array(out).shape)
# exit()

np.save('res/real_%i_%s.npy' % (f_id, measure_key), np.array(out))
2 changes: 1 addition & 1 deletion extract_semi.py → E1_extract_semi.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Przygotowanie strumieni i meta - semi
Experiment 1 -- collect streams and metafetaures -- semi-synthetic
"""

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion extract_synthetic.py → E1_extract_synthetic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Przygotowanie strumieni i meta - synthetic
Experiment 1 -- collect streams and metafetaures -- synthetic
"""

import numpy as np
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ def sqspace(start, end, num):
for d_id, drift_type in enumerate(['Sudden', 'Gradual', 'Incremental']):
clf_temp = clf[d_id]
clf_temp_mean = np.mean(clf[d_id], axis=(0,2))

#print('A', clf.shape)
#exit()


for cm_id, cm in enumerate(clf_temp_mean.T):
ax[d_id].plot(n_features, cm, label=base_clfs[cm_id], c=c[cm_id])
ax[d_id].set_title('%s drifts' % drift_type)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added __pycache__/utils.cpython-310.pyc
Binary file not shown.
Binary file removed bar.png
Binary file not shown.
Binary file removed baz.png
Binary file not shown.
File renamed without changes
4 changes: 4 additions & 0 deletions figure-one.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
'''
Script generating sample figure for manuscript
'''
import matplotlib.pyplot as plt
import numpy as np
from strlearn.streams import StreamGenerator
Expand Down
2 changes: 1 addition & 1 deletion metric_combine.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Sklejenie meta z obiecujących podgrup
Combina metagratures from promising froups
"""
import numpy as np

Expand Down
Binary file removed olaboga.png
Binary file not shown.
Binary file removed olaboga2.png
Binary file not shown.
24 changes: 11 additions & 13 deletions real_gt.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
"""
Oznaczanie momentów dryfu w rzeczywistych
Script for marking moments of drift for real-world datastreams.
"""
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from tqdm import tqdm
import strlearn as sl
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from utils import ELMI
real_streams = [
'real_streams/covtypeNorm-1-2vsAll-pruned.arff',
'real_streams/electricity.npy',
Expand All @@ -21,10 +20,8 @@

stream_static = { 'chunk_size': 300 }


pbar = tqdm(total=len(real_streams))


for f_id, f in enumerate(real_streams):
out = []

Expand Down Expand Up @@ -86,21 +83,22 @@
if f_id==5:
drfs=[9,35,60,180,220]

# clf = [GaussianNB(), MLPClassifier()]
# clf = [GaussianNB(), MLPClassifier(), ELMI(probing_rate=1., update_rate=1.)]
clf = [GaussianNB(), MLPClassifier(), DecisionTreeClassifier()]
clf = [GaussianNB(), MLPClassifier()]

evaluator = sl.evaluators.TestThenTrain()
evaluator.process(stream, clf)

fig, ax = plt.subplots(1,1,figsize=(10,5))

for i in range(len(clf)):
plt.plot(evaluator.scores[i,:,1], alpha=0.3, label=['gnb', 'mlp', 'elm9', 'elm1'][i])
plt.vlines(drfs,0.5,1, color='r')
# plt.xticks(np.linspace(0,250,30))
plt.legend()
plt.grid()
ax.plot(evaluator.scores[i,:,1], alpha=0.9, label=['GNB', 'MLP'][i], c=['blue','tomato'][i], lw=1)
plt.xticks(drfs, rotation=90)
plt.legend(frameon=False)
plt.grid(ls=':')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xlabel('chunk')
ax.set_ylabel('BAC')

plt.tight_layout()
plt.savefig('real_streams_gt/%s.png' % fname)
Expand Down
Binary file modified real_streams_gt/INSECTS-abrupt_imbalanced_norm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified real_streams_gt/INSECTS-gradual_imbalanced_norm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified real_streams_gt/INSECTS-incremental_imbalanced_norm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added real_streams_gt/clf_electricity.npy
Binary file not shown.
Binary file added real_streams_gt/clf_poker-lsn-1-2vsAll-pruned.npy
Binary file not shown.
Binary file modified real_streams_gt/covtypeNorm-1-2vsAll-pruned.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified real_streams_gt/electricity.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified real_streams_gt/poker-lsn-1-2vsAll-pruned.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified res/real_0_clustering.npy
Binary file not shown.
Binary file modified res/real_0_complexity.npy
Binary file not shown.
Binary file modified res/real_1_clustering.npy
Binary file not shown.
Binary file modified res/real_1_complexity.npy
Binary file not shown.
Binary file modified res/real_2_clustering.npy
Binary file not shown.
Binary file modified res/real_2_complexity.npy
Binary file not shown.
Binary file modified res/real_3_clustering.npy
Binary file not shown.
Binary file modified res/real_3_complexity.npy
Binary file not shown.
Binary file modified res/real_4_clustering.npy
Binary file not shown.
Binary file modified res/real_4_complexity.npy
Binary file not shown.
Binary file modified res/real_5_clustering.npy
Binary file not shown.
Binary file modified res/real_5_complexity.npy
Binary file not shown.
65 changes: 11 additions & 54 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,57 +51,14 @@ def find_real_drift(chunks, drifts):
selected2_indexes.append(m_id)

selected2_measure_names = measure_labels_selected_flat[selected2_indexes]

from sklearn.base import BaseEstimator, ClassifierMixin

class ELMI(BaseEstimator, ClassifierMixin):
def __init__(self, hidden_layer_size=1024,
probing_rate=.1,
update_rate=.1):
self.hidden_layer_size = hidden_layer_size
self.probing_rate = probing_rate
self.update_rate = update_rate

def partial_fit(self, X, y, classes=None):

if classes is None:
classes = np.unique(y)
if not hasattr(self, 'enc'):
self.enc = np.arange(len(classes))
_y = (np.array([yi==self.enc for yi in y]).astype(int))

# Check if first
if not hasattr(self, 'beta_'):
# Get problem info
self.n_classes = _y.shape[1]
self.n_features = X.shape[1]

# Initialize W
self.coefs_ = np.random.uniform(-1, 1, size=(self.n_features,
self.hidden_layer_size))
# Initialize bias
self.intercepts_ = np.random.normal(size=(self.hidden_layer_size,))

# Initialize empty beta
self.beta_ = np.zeros((self.hidden_layer_size, self.n_classes))

pmask = np.random.uniform(size=_y.shape[0]) < self.probing_rate

H = self.activation(X[pmask].dot(self.coefs_) + self.intercepts_) # Propagate
H_pinv = np.linalg.pinv(H) # Inverse by Moore–Penrose

# Calculate partial beta and update beta
partial_beta = H_pinv.dot(_y[pmask])
self.beta_ = self.beta_ * (1-self.update_rate) + partial_beta * self.update_rate

return self

def predict_proba(self, X):
H = self.activation(X.dot(self.coefs_) + self.intercepts_)
return H.dot(self.beta_)

def predict(self, X):
return np.argmax(self.predict_proba(X), axis=1)

def activation(self, x):
return 1. / (1. + np.exp(-x))

drift_gt = {
'covtypeNorm-1-2vsAll-pruned': [ 57, 121, 131, 155, 205, 260, 295, 350],
'electricity': [ 20, 38, 55, 115, 145],
'poker-lsn-1-2vsAll-pruned': [ 45, 90, 110, 120, 160, 182, 245, 275, 292, 320, 358, 400, 450, 468,
480, 516, 540, 550, 590, 600, 640, 710, 790, 831, 850, 880, 900, 920,
965, 1000, 1010],
'INSECTS-abrupt_imbalanced_norm': [125],
'INSECTS-gradual_imbalanced_norm': [ 9, 60, 90, 125, 190],
'INSECTS-incremental_imbalanced_norm': [ 9, 35, 60, 180, 220]
}

0 comments on commit 267a983

Please sign in to comment.