-
Notifications
You must be signed in to change notification settings - Fork 0
/
file_utils.py
120 lines (102 loc) · 3.85 KB
/
file_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import argparse
import random
import os
import time
import shutil
def gen_files(path, num_samples=None):
''' Get the paths of the image files.
Params:
path (str): path of the root folder
num_samples (int): number of samples per class
Returns:
list: list of file paths
Author:
Natasha (documentation)
Date:
2021/05/19
'''
files_list = []
for roots, _, files in os.walk(path):
sample_count = 0
if num_samples:
random.shuffle(files)
for file in files:
if file.endswith(args.postfix):
files_list.append(os.path.join(roots, file))
if num_samples:
sample_count +=1
if sample_count >= num_samples: break
len_files = len(files_list)
print("数据总数量: ", len_files)
return files_list
def get_class_names(args):
''' Get the folder names of each class.
Params:
args (namespace): argparse arguments
Returns:
list: list of class names
Author:
Natasha (documentation)
Date:
2021/05/19
'''
root = args.data_path
folder_list = []
for roots, _, files in os.walk(root):
class_name = os.path.split(roots)[-1]
folder_list.append(class_name)
print("类别总数量: ", len(folder_list))
print(folder_list)
def paths2txt(args):
''' Get the paths of all the samples in train, val, test
and write them to a file. Used in place of gen_train_val
when the classes are already separated.
Params:
args (namespace): argparse arguments
Returns:
dict:
'train': train paths
'val': val paths
Author:
Natasha
Date:
2021/05/20
'''
sample_dict = {'train': '', 'val': ''}
for phase in ['train','val']:
folder = f"{args.data_path}/{phase}/"
samples = gen_files(folder, args.num_samples)
random.shuffle(samples)
sample_dict[phase] = [sample + "\n" for sample in samples]
if args.write:
for phase in ['train','val']:
if not os.path.exists(args.txt_path):
os.mkdir(args.txt_path)
with open(args.txt_path + f'/CFN-208_{phase}.txt', 'w') as f: # remember to edit this to be whichever project you want
f.writelines(sample_dict[phase])
return sample_dict
def new_subset(args):
sample_dict = paths2txt(args)
for phase in ['train', 'val']:
for sample in sample_dict[phase]:
_, _, _, stage, lbl, filename = sample.split('/')
path = sample[:-4] # without extention and \n
copy_dir = f"{args.subset_path}/{stage}/{lbl}/"
if not os.path.exists(copy_dir):
os.makedirs(copy_dir)
for filetype in ['jpg', 'txt']:
save_path = f"{copy_dir}{filename[:-4]}{filetype}"
shutil.copy(path+filetype, save_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='文件处理')
parser.add_argument('--data_path', type=str, help='文件路径', default=r'../data/ChineseFoodNet-208') # need to do NOTE: without trailing /
parser.add_argument('--postfix', type=str, help='要操作的文件后缀名', default=('.jpg','.jpeg'))
parser.add_argument('--txt_path', type=str, help='path to save the files to', default=r'../data/ChineseFoodNet-208/')
parser.add_argument('--subset_path', type=str, help='path to save the subset to', default=r'../data/CFN-208_subset400')
parser.add_argument('--num_samples', type=int, help='要操作的文件后缀名', default=400)
parser.add_argument('--write', type=bool, help='write to file', default=False)
args = parser.parse_args()
start = time.time()
new_subset(args)
print("Finished!")
print(f"Operation completed in {time.time() - start}")