Skip to content

Commit

Permalink
Merge pull request #337 from MannLabs/fix-incorrect-mobility-outputac…
Browse files Browse the repository at this point in the history
…cumulator

Fix incorrect mobility outputaccumulator
  • Loading branch information
GeorgWa authored Sep 11, 2024
2 parents 8a62937 + a8597e4 commit 3867112
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 12 deletions.
7 changes: 3 additions & 4 deletions alphadia/outputaccumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,14 @@ def parse_output_folder(
psm_df["raw_name"] = foldername

# remove decoy precursors
psm_df = psm_df[psm_df["decoy"] == 0]
# assert that decoy is int
psm_df["decoy"] = psm_df["decoy"].astype(int)
psm_df = psm_df[psm_df["decoy"] == 0].reset_index(drop=True)

self._precursor_df = pd.DataFrame()
for col in psm_df.columns:
self._precursor_df[col] = psm_df[col]

self._precursor_df["decoy"] = self._precursor_df["decoy"].astype(int)
self._precursor_df = psm_df[psm_df["decoy"] == 0].reset_index(drop=True)

# self._precursor_df.set_index('precursor_idx', inplace=True)
# Change the data type of the mods column to string
self._precursor_df["mods"] = self._precursor_df["mods"].astype(str)
Expand Down
19 changes: 11 additions & 8 deletions tests/unit_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def mock_precursor_df(
A mock precursor dataframe
"""

precursor_idx = np.arange(n_precursor)
precursor_mz = np.random.rand(n_precursor) * 2000 + 500
precursor_charge = np.random.choice([2, 3], size=n_precursor)

Expand All @@ -61,18 +60,18 @@ def mock_precursor_df(
for __ in range(6):
sequence += chr(np.random.randint(65, 91))
sequences.append(sequence)
return pd.DataFrame(

df = pd.DataFrame(
{
"precursor_idx": precursor_idx,
"decoy": decoy,
"mz_library": precursor_mz,
"rt_library": random_rt,
"mobility_library": random_mobility,
"mz_observed": precursor_mz,
"rt_observed": random_rt,
"mobility_observed": random_mobility,
"mz_calibrated": precursor_mz,
"rt_calibrated": random_rt,
"mz_observed": precursor_mz + np.random.rand(n_precursor) * 0.1,
"rt_observed": random_rt + np.random.rand(n_precursor) * 0.1,
"mobility_observed": random_mobility + np.random.rand(n_precursor) * 0.1,
"mz_calibrated": precursor_mz + np.random.rand(n_precursor) * 0.1,
"rt_calibrated": random_rt + np.random.rand(n_precursor) * 0.1,
"charge": precursor_charge,
"proteins": proteins,
"genes": genes,
Expand All @@ -85,6 +84,10 @@ def mock_precursor_df(
}
)

df = df.sample(frac=1).reset_index(drop=True)
df["precursor_idx"] = np.arange(len(df))
return df


def mock_fragment_df(n_fragments: int = 10, n_precursor: int = 20):
"""Create a mock fragment dataframe as it's found as the individual search outputs
Expand Down
36 changes: 36 additions & 0 deletions tests/unit_tests/test_outputaccumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,39 @@ def test_keep_top_constraint():
), f"{len(built_lib.precursor_df[built_lib.precursor_df['precursor_idx'] == precursor_idx])} != {keep_top}"

shutil.rmtree(temp_folder)


def test_default_column_assignment():
"""
Test that col [rt,mobility,mz] columns are correctly assigned where:
col = col_observed if col_observed is in columns
col = col_calibrated if col_observed is not in columns
col = col_library if col_observed is not in columns and col_calibrated is not in columns
"""
# Given:
config, temp_folder, raw_folders, psm_dfs, fragment_dfs = prepare_input_data()
keep_top = 2
config["transfer_library"]["top_k_samples"] = keep_top

# When:
output = outputtransform.SearchPlanOutput(config, temp_folder)
_ = output.build_transfer_library(raw_folders, save=True)
built_lib = SpecLibBase()
built_lib.load_hdf(
os.path.join(temp_folder, f"{output.TRANSFER_OUTPUT}.hdf"), load_mod_seq=True
)

# Then: The columns rt, mobility, mz should be correctly assigned
for col in ["rt", "mobility", "mz"]:
if f"{col}_observed" in built_lib.precursor_df.columns:
assert built_lib.precursor_df[f"{col}"].equals(
built_lib.precursor_df[f"{col}_observed"]
), f"{col} != {col}_observed"
elif f"{col}_calibrated" in built_lib.precursor_df.columns:
assert built_lib.precursor_df[f"{col}"].equals(
built_lib.precursor_df[f"{col}_calibrated"]
), f"{col} != {col}_calibrated"
else:
assert built_lib.precursor_df[f"{col}"].equals(
built_lib.precursor_df[f"{col}_library"]
), f"{col} != {col}_library"

0 comments on commit 3867112

Please sign in to comment.