-
Notifications
You must be signed in to change notification settings - Fork 12
/
lfr.py
111 lines (97 loc) · 3.44 KB
/
lfr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pickle
import os
import numpy as np
import csv
import scipy.optimize as optim
from helper import *
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
# k = number of propotypes
k = 10
firstLine = True # assume csv has header column
with open('', 'rb') as f:
filk = csv.reader(f)
dat = []
for row in filk:
if firstLine:
firstLine = False
continue
dat.append([float(r) for r in row])
print('finished reading data')
data = np.array(dat)
y = np.array(data[:,-1]).flatten()
data = data[:,:-1]
sensitive = data[:,-1]
data = preprocessing.scale(data)
data = data[:,:-1]
#data = data[:, :387]
sensitive_idx = np.array(np.where(sensitive==1))[0].flatten()
nonsensitive_idx = np.array(np.where(sensitive!=1))[0].flatten()
data_sensitive = data[sensitive_idx,:]
data_nonsensitive = data[nonsensitive_idx,:]
y_sensitive = y[sensitive_idx]
y_nonsensitive = y[nonsensitive_idx]
with open('', 'rb') as f:
indices = pickle.load(f)
idx=indices[0]
training_sensitive = data_sensitive[idx,:]
ytrain_sensitive = y_sensitive[idx]
idx2=indices[1]
test_sensitive = data_sensitive[idx2,:]
ytest_sensitive = y_sensitive[idx2]
indices.append(idx)
indices.append(idx2)
idx=indices[2]
training_nonsensitive = data_nonsensitive[idx,:]
ytrain_nonsensitive = y_nonsensitive[idx]
idx2=indices[3]
test_nonsensitive = data_nonsensitive[idx2,:]
ytest_nonsensitive = y_nonsensitive[idx2]
#indices = []
#
#idx=np.array(list(set(np.random.randint(0, data_sensitive.shape[0], 3000))))
#training_sensitive = data_sensitive[idx,:]
#ytrain_sensitive = y_sensitive[idx]
#idx2=np.array([i for i in range(data_sensitive.shape[0]) if i not in idx])
#test_sensitive = data_sensitive[idx2,:]
#ytest_sensitive = y_sensitive[idx2]
#indices.append(idx)
#indices.append(idx2)
#
#idx=np.array(list(set(np.random.randint(0, data_nonsensitive.shape[0], 6000))))
#training_nonsensitive = data_nonsensitive[idx,:]
#ytrain_nonsensitive = y_nonsensitive[idx]
#idx2=np.array([i for i in range(data_nonsensitive.shape[0]) if i not in idx])
#test_nonsensitive = data_nonsensitive[idx2,:]
#ytest_nonsensitive = y_nonsensitive[idx2]
#indices.append(idx)
#indices.append(idx2)
#
##with open('d:/dropbox/crime_lab_ny/fair_algorithms/data/indices_zemel.csv', 'wb') as f:
## pickle.dump(indices, f)
training = np.concatenate((training_sensitive, training_nonsensitive))
ytrain = np.concatenate((ytrain_sensitive, ytrain_nonsensitive))
test = np.concatenate((test_sensitive, test_nonsensitive))
ytest = np.concatenate((ytest_sensitive, ytest_nonsensitive))
src= ''
if os.path.isfile(src):
with open(src, 'rb') as f:
rez = f.read().split('\n')[:-1]
rez = np.array([float(r) for r in rez])
print LFR(rez, training_sensitive, training_nonsensitive, ytrain_sensitive,
ytrain_nonsensitive, k, 1e-4, 0.1, 1000, 0)
else:
print 'not loading'
rez = np.random.uniform(size=data.shape[1] * 2 + k + data.shape[1] * k)
bnd = []
for i, k2 in enumerate(rez):
if i < data.shape[1] * 2 or i >= data.shape[1] * 2 + k:
bnd.append((None, None))
else:
bnd.append((0, 1))
rez = optim.fmin_l_bfgs_b(LFR, x0=rez, epsilon=1e-5,
args=(training_sensitive, training_nonsensitive,
ytrain_sensitive, ytrain_nonsensitive, k, 1e-4,
0.1, 1000, 0),
bounds = bnd, approx_grad=True, maxfun=150000,
maxiter=150000)