-
Notifications
You must be signed in to change notification settings - Fork 1
/
filter_common_animals.py
62 lines (46 loc) · 2.06 KB
/
filter_common_animals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import click
import os
@click.command()
@click.option("--csv_path", help="path to the csv with species classification results")
@click.option(
"--common_to_filter", help="common name to filter (can use more than once)", multiple=True
)
@click.option("--threshold", help="threshold to filter common", type=float)
def main(csv_path, common_to_filter, threshold):
df = pd.read_csv(csv_path, names=["path", "folder", "query", "sciname", "common", "likelihood"])
paths_to_remove = filter_by_likelihood(df, "common_likelihood", common_to_filter, threshold)
p_len = len(paths_to_remove)
commons = " ".join(common_to_filter)
if p_len > 0:
print(f"Removing {p_len} paths with {commons}")
for path in paths_to_remove:
path = os.path.join(os.getcwd(), paths_to_remove[0][1:])
os.remove(path)
def sum_common_likelihood(df_group, common_names):
"""gets the likelihood count of a list of detected common names in an image.
Args:
df_group ([type]): detection rows for a single image. a dataframe subset
common_names ([type]): list of common names ["hare", "rabbit", "jackrabbit"] for example. or ["sheep"]
Returns:
[type]: [description]
"""
is_common = [False] * len(df_group)
for name in common_names:
is_common_r = df_group.loc[:, "common"].str.contains(name)
is_common = [a or b for a, b in zip(is_common, is_common_r)]
return df_group.likelihood[is_common].sum()
def filter_by_likelihood(df, cname, common_names, threshold):
"""Filters imgs by likelihood of common_names appearing.
Args:
cname ([type]): Name of the likelihood column
common_names ([type]): common names
threshold ([type]): likelihood threshold
Returns:
[type]: df with images without classes in common names.
"""
likelihood = df.groupby("path").apply(lambda x: sum_common_likelihood(x, common_names))
df = df.set_index("path")
return likelihood[likelihood > threshold].index.tolist()
if __name__ == "__main__":
main()