-
Notifications
You must be signed in to change notification settings - Fork 1
/
split_datasets.py
158 lines (117 loc) · 4.53 KB
/
split_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import glob
import random
from package.config import config
def enumerate_train_test(filenames, num_train):
all_train_examples = []
all_test_examples = []
# DSAC does not shuffle filenames, just cut them off at the right numbers
for i, name in enumerate(filenames):
if i < num_train:
all_train_examples.append(name)
else:
all_test_examples.append(name)
return all_train_examples, all_test_examples
def enumerate_train_val_test(filenames, num_img):
random.shuffle(filenames)
num_train = int(num_img*0.6)
num_val = int(num_img*0.2)
all_train_examples = []
all_val_examples = []
all_test_examples = []
# DSAC does not shuffle filenames, just cut them off at the right numbers
for i, name in enumerate(filenames):
if i < num_train:
all_train_examples.append(name)
elif i < (num_train+num_val):
all_val_examples.append(name)
else:
all_test_examples.append(name)
return all_train_examples, all_val_examples, all_test_examples
def enumerate_train_val(filenames, num_img):
random.shuffle(filenames)
num_train = int(num_img*0.8)
num_val = int(num_img*0.2)
train_examples = []
val_examples = []
# DSAC does not shuffle filenames, just cut them off at the right numbers
for i, name in enumerate(filenames):
if i < num_train:
train_examples.append(name)
else:
val_examples.append(name)
train_examples.sort()
val_examples.sort()
return train_examples, val_examples
# split vaihingen
def split_vaihingen(num_train=100):
cfg = config['vaihingen']
data_path = cfg['data_path']
image_glob = os.path.join(data_path, 'building_[0-9]*.tif')
image_paths = glob.glob(image_glob)
train_examples, test_examples = enumerate_train_test(sorted(image_paths), num_train)
pretrain_examples, preval_examples = enumerate_train_val(sorted(train_examples), num_train)
train_file = cfg['train']
test_file = cfg['test']
pretrain_file = config['pretrain_vaihingen']['train']
preval_file = config['pretrain_vaihingen']['test']
with open(train_file, 'w') as file:
for name in train_examples:
file.write('{}\n'.format(name))
with open(test_file, 'w') as file:
for name in test_examples:
file.write('{}\n'.format(name))
with open(pretrain_file, 'w') as file:
for name in pretrain_examples:
file.write('{}\n'.format(name))
with open(preval_file, 'w') as file:
for name in preval_examples:
file.write('{}\n'.format(name))
# split bing
def split_bing(num_train=335):
cfg = config['bing']
data_path = cfg['data_path']
image_glob = os.path.join(data_path, 'building_[0-9]*.png')
image_paths = glob.glob(image_glob)
train_examples, test_examples = enumerate_train_test(sorted(image_paths), num_train)
pretrain_examples, preval_examples = enumerate_train_val(sorted(train_examples), num_train)
train_file = cfg['train']
test_file = cfg['test']
pretrain_file = config['pretrain_bing']['train']
preval_file = config['pretrain_bing']['test']
with open(train_file, 'w') as file:
for name in train_examples:
file.write('{}\n'.format(name))
with open(test_file, 'w') as file:
for name in test_examples:
file.write('{}\n'.format(name))
with open(pretrain_file, 'w') as file:
for name in pretrain_examples:
file.write('{}\n'.format(name))
with open(preval_file, 'w') as file:
for name in preval_examples:
file.write('{}\n'.format(name))
def split_inria():
cfg = config['inria']
data_path = cfg['img_path']
image_glob = os.path.join(data_path, '*.tif')
image_paths = glob.glob(image_glob)
image_paths = [os.path.basename(image_path) for image_path in image_paths]
num_img = len(image_paths)
train_examples, val_examples, test_examples = enumerate_train_val_test(image_paths, num_img)
train_file = cfg['train']
val_file = cfg['val']
test_file = cfg['test']
with open(train_file, 'w') as file:
for name in train_examples:
file.write('{}\n'.format(name))
with open(val_file, 'w') as file:
for name in val_examples:
file.write('{}\n'.format(name))
with open(test_file, 'w') as file:
for name in test_examples:
file.write('{}\n'.format(name))
if __name__ == "__main__":
split_vaihingen()
split_bing()
split_inria()