-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
62 lines (53 loc) · 1.93 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pandas as pd
class DataPrep:
def __init__(self):
self.alias_dict = self.prepare_alias_dict()
self.nickname_dict = self.prepare_nickname_dict()
def prepare_alias_dict(self):
"""
to prepare alias name dict
:return:
"""
alias_dict = {}
alias_data = pd.read_csv('variant.txt', sep=" ", header=None)
for i in range(alias_data.size):
set = alias_data.iloc[i][0].lower().split('\t')
for k in set:
setcopy = set.copy()
setcopy.remove(k)
if k in alias_dict.keys():
alias_dict[k] = alias_dict[k]+setcopy
else:
alias_dict[k] = setcopy
# with open('variant_noformat.txt') as fin:
# for line in fin:
# set = line.lower().split()
# for k in set:
# setcopy=set.copy()
# setcopy.remove(k)
# alias_dict[k]=setcopy
return alias_dict
def prepare_nickname_dict(self):
'''
prepare nickname dict
:return:
'''
nickname_dict = {}
nickname_data = pd.read_csv('nickname.txt', sep=" ", header=None)
for i in range(nickname_data.size):
set = nickname_data.iloc[i][0].lower().split('\t')
for k in set:
setcopy = set.copy()
setcopy.remove(k)
if k in nickname_dict.keys():
nickname_dict[k] = nickname_dict[k]+setcopy
else:
nickname_dict[k] = setcopy
# with open('nickname_noformat.txt') as fin:
# for line in fin:
# set = line.lower().split()
# for k in set:
# setcopy = set.copy()
# setcopy.remove(k)
# nickname_dict[k] = setcopy
return nickname_dict