Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debias #23

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion recstudio/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from recstudio.data.dataset import TripletDataset, SeqDataset, UserDataset, FullSeqDataset
from recstudio.data.dataset import TripletDataset, SeqDataset, UserDataset, FullSeqDataset, DICEDataset, UBPRDataset
from recstudio.data.advance_dataset import ALSDataset

import os
Expand Down
5 changes: 5 additions & 0 deletions recstudio/data/config/all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,9 @@ mapped_feat_field: [*u, *i]
network_feat_field: [[[source_id:token, target_id:token]], [[head_id:token, tail_id:token, relation_id:token], [*i, entity_id:token]]]
network_feat_header: [0, 0]

# interactions that are missing completely at random
mcar_feat_name: ~
inter_feat_field: [*u, *i, *r, *t]
inter_feat_header: ~

save_cache: False # whether to save processed dataset to cache.
2 changes: 1 addition & 1 deletion recstudio/data/config/amazon-beauty.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ field_separator: ","
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: 3.0
low_rating_thres: 3.0
max_seq_len: 50

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
2 changes: 1 addition & 1 deletion recstudio/data/config/amazon-books.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ field_separator: ","
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: ~
low_rating_thres: ~
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
2 changes: 1 addition & 1 deletion recstudio/data/config/amazon-electronics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ field_separator: ","
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: 3
low_rating_thres: 3
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
47 changes: 47 additions & 0 deletions recstudio/data/config/coat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
url: https://rec.ustc.edu.cn/share/b097d230-c7e1-11ed-997a-bba57c4b600a
user_id_field: &u user_id:token # TODO: comments for &u and *u
item_id_field: &i item_id:token
rating_field: &r rating:float
time_field: ~
time_format: ~


inter_feat_name: coat.mnar
inter_feat_field: [*u, *i, *r]
inter_feat_header: 0

user_feat_name: [coat.user]
user_feat_field: [[*u, ugender(men):token, ugender(women):token, age(20-30):token, age(30-40):token, age(40-50):token, age(50-60):token, age(over 60):token, age(under 20):token, location(rural):token, location(suburban):token, location(urban):token, fashioninterest(moderately):token, fashioninterest(not at all):token, fashioninterest(very):token]]
user_feat_header: 0


item_feat_name: [coat.item]
item_feat_field: [[*i, igender(men):token, igender(women):token, jackettype(bomber):token, jackettype(cropped):token, jackettype(field):token, jackettype(fleece):token, jackettype(insulated):token, jackettype(motorcycle):token, jackettype(other):token, jackettype(packable):token, jackettype(parkas):token, jackettype(pea):token, jackettype(rain):token, jackettype(shells):token, jackettype(track):token, jackettype(trench):token, jackettype(vests):token, jackettype(waterproof):token, color(beige):token, color(black):token, color(blue):token, color(brown):token, color(gray):token, color(green):token, color(multi):token, color(navy):token, color(olive):token, color(other):token, color(pink):token, color(purple):token, color(red):token, onfrontpage(yes):token, onfrontpage(no):token]]
item_feat_header: 0


field_separator: "\t"
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_thres: 3.0
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
network_feat_name: ~
mapped_feat_field: ~
network_feat_field: ~
network_feat_header: ~

# interactions that are missing completely at random
mcar_feat_name: coat.mcar
mcar_feat_field: [*u, *i, *r]
mcar_feat_header: 0

# propensities of each (u, i) pair
# propensity_feat_name: coat.propensities
# propensity_feat_field: [*u, *i, propensity:float]
# propensity_feat_header: 0


save_cache: True # whether to save processed dataset to cache.
2 changes: 1 addition & 1 deletion recstudio/data/config/gowalla.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ seq_separator: " "
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: ~
low_rating_thres: ~
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
4 changes: 2 additions & 2 deletions recstudio/data/config/ml-100k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ item_feat_header: 0


field_separator: "\t"
min_user_inter: 0
min_item_inter: 0
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_thres: 3.0
max_seq_len: 20
Expand Down
2 changes: 1 addition & 1 deletion recstudio/data/config/ml-10m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ field_separator: "::"
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: 3.0
low_rating_thres: 3.0
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
2 changes: 1 addition & 1 deletion recstudio/data/config/ml-20m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ field_separator: ","
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: ~
low_rating_thres: ~
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
2 changes: 1 addition & 1 deletion recstudio/data/config/tmall.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ field_separator: "::"
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: ~
low_rating_thres: ~
max_seq_len: 50

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
42 changes: 42 additions & 0 deletions recstudio/data/config/yahoor3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
url: https://rec.ustc.edu.cn/share/e8e98ae0-c700-11ed-b3cf-cb390cf64bc2
user_id_field: &u user_id:token # TODO: comments for &u and *u
item_id_field: &i item_id:token
rating_field: &r rating:float
time_field: ~
time_format: ~


inter_feat_name: yahooR3.inter
inter_feat_field: [*u, *i, *r]
inter_feat_header: 0

user_feat_name: ~
user_feat_field: ~
user_feat_header: ~


item_feat_name: ~
item_feat_field: ~
item_feat_header: ~


field_separator: ","
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_thres: 3.0
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
network_feat_name: ~
mapped_feat_field: ~
network_feat_field: ~
network_feat_header: ~

# interactions that are missing completely at random
mcar_feat_name: yahooR3.mcar
mcar_feat_field: [*u, *i, *r]
mcar_feat_header: 0


save_cache: True # whether to save processed dataset to cache.
2 changes: 1 addition & 1 deletion recstudio/data/config/yelp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ field_separator: ","
min_user_inter: 5
min_item_inter: 5
field_max_len: ~
low_rating_threshold: 3
low_rating_thres: 3
max_seq_len: 20

# network feature, including social network and knowledge graph, the first two fields are remapped the corresponding features
Expand Down
Loading