Skip to content

Commit

Permalink
revert group change
Browse files Browse the repository at this point in the history
  • Loading branch information
guipenedo committed Jul 15, 2024
1 parent 1687ba3 commit 71f94cf
Showing 1 changed file with 0 additions and 2 deletions.
2 changes: 0 additions & 2 deletions src/datatrove/pipeline/stats/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ def run(self, data: DocumentsPipeline, rank: int = 0, world_size: int = 1) -> Do

with self.output_folder.open(f"{folder}/{STATS_MERGED_NAME}", "wt") as f:
group_name = Path(folder).parent.name
if "__" in group_name:
group_name = group_name.split("__")[0]
if group_name in self.top_k_config.top_k_groups:
top_k_keys = heapq.nlargest(self.top_k_config.top_k, stat, key=lambda x: stat.get(x).n)
stat = MetricStatsDict(init={s: stat.get(s) for s in top_k_keys})
Expand Down

0 comments on commit 71f94cf

Please sign in to comment.