Skip to content

Commit

Permalink
feat: Add functionality to upload and store data sources
Browse files Browse the repository at this point in the history
The code changes include adding a new function `add_data_sources` that allows users to upload JSON files as data sources. The uploaded files are stored in the `data` directory. This functionality is added to the `setup.impl.jac` file.

This commit message follows the established convention of using a verb in the imperative form to describe the purpose of the code changes.
  • Loading branch information
chandralegend committed May 3, 2024
1 parent f2920e0 commit 470ea17
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 23 deletions.
40 changes: 21 additions & 19 deletions app/src/components/setup/setup.impl.jac
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,36 @@ import:py pandas as pd;
return (min_n_responses_needed_per_model, n_model_pairs);
}

'''Add Data Sources to the Configurator'''
can add_data_sources {
with st.form("upload_datasources", clear_on_submit=True) {
uploaded_json_files = st.file_uploader("Upload data sources", accept_multiple_files=True, <>type="json");
submitted = st.form_submit_button("Submit");
if submitted and uploaded_json_files {
os.makedirs("data", exist_ok=True);
for uploaded_json_file in uploaded_json_files {
json_file = json.load(uploaded_json_file);
json.dump(json_file, open(os.path.join("data", uploaded_json_file.name), "w"));
}
st.rerun();
}
}
}

:can:hv_configurator {
st.subheader("Human Evaluation Configuration");
(hv_config_1_col, hv_config_2_col, hv_config_3_col) = st.columns(3);
with hv_config_1_col {
n_workers = st.number_input("Number of workers", min_value=10, step=1, value=st.session_state.config["config"]["n_workers"], help="Number of Evaluators going to participate");
n_questions_per_worker = st.number_input("Number of questions per worker", min_value=2, max_value=100, step=1, value=st.session_state.config["config"]["n_questions_per_worker"], help="Number of questions shown to an Evaluator");
show_captcha = st.checkbox("Show Captcha (Human Verification)", value=st.session_state.config["config"]["show_captcha"]);
ability_to_tie = st.selectbox("Ability to Choose Both", ["Allow", "Not Allowed"], index=["Allow", "Not Allowed"].index(st.session_state.config["config"]["ability_to_tie"]), help="Select whether the evaluator can choose both options as the same.");
evenly_distributed = st.checkbox("Usecases are Evenly distributed among the workers", value=st.session_state.config["config"]["evenly_distributed"], help="If checked, the usecases will be evenly distributed among the workers. for example, if there are 2 usecases and 10 workers, each worker will get 1 question from each usecase. If not checked, the questions will be randomly distributed.");
}
with hv_config_2_col {
json_files = [f for f in os.listdir("data") if f.endswith(".json")] if os.path.exists("data") else [];
data_sources = st.multiselect("Data sources (Usecases)", json_files, default=st.session_state.config["config"]["data_sources"], help="Select the data sources for the evaluation. Each file should represent a usecase (sigle prompt) you want to evaluate.");
ability_to_tie = st.selectbox("Ability to Choose Both", ["Allow", "Not Allowed"], index=["Allow", "Not Allowed"].index(st.session_state.config["config"]["ability_to_tie"]), help="Select whether the evaluator can choose both options as the same.");
evenly_distributed = st.checkbox("Usecases are Evenly distributed among the workers", value=st.session_state.config["config"]["evenly_distributed"], help="If checked, the usecases will be evenly distributed among the workers. for example, if there are 2 usecases and 10 workers, each worker will get 1 question from each usecase. If not checked, the questions will be randomly distributed.");
add_data_sources();
}
with hv_config_3_col {
st.caption("Following is to check if the configuration is valid.");
Expand All @@ -77,21 +94,6 @@ import:py pandas as pd;
return (n_workers, ability_to_tie, data_sources, n_questions_per_worker, evenly_distributed, show_captcha);
}

:can:add_data_sources {
with st.form("upload_datasources", clear_on_submit=True) {
uploaded_json_files = st.file_uploader("Upload data sources", accept_multiple_files=True, <>type="json");
submitted = st.form_submit_button("Submit");
if submitted and uploaded_json_files {
os.makedirs("data", exist_ok=True);
for uploaded_json_file in uploaded_json_files {
json_file = json.load(uploaded_json_file);
json.dump(json_file, open(os.path.join("data", uploaded_json_file.name), "w"));
}
st.rerun();
}
}
}

:can:get_question_pairs(models_responses_all: dict) -> list {
n_usecases = len(models_responses_all);
models = list(list(models_responses_all.values())[0].keys());
Expand Down Expand Up @@ -195,8 +197,8 @@ import:py pandas as pd;
(tab_input, tab_preview) = st.tabs(["Prompt Information", "Preview"]);
with tab_input {
prompt_infos[data_source]["usecase_id"] = st.text_input("Usecase Identifier", key=f"{data_source}_usecase_id", value=str(uuid.uuid4()), help="Unique ID for the usecase.");
prompt_infos[data_source]["prompt_disc"] = st.text_area("Prompt Description", key=f"{data_source}_prompt_disc", value=data.get("prompt_disc", ""), help="Display description of the prompt. Use markdown/html for Nice looks.");
prompt_infos[data_source]["prompt_simple_disc"] = st.text_area("Simple Description", key=f"{data_source}_prompt_simple_disc", value="", help="Simple text description of the usecase. Note: This will be used in the Auto Evaluation.");
prompt_infos[data_source]["prompt_disc"] = st.text_area("Human Evaluation Description (MarkDown)", key=f"{data_source}_prompt_disc", value=data.get("prompt_disc", ""), help="Display description of the prompt. Use markdown/html for Nice looks.");
prompt_infos[data_source]["prompt_simple_disc"] = st.text_area("Auto Evaluation Description", key=f"{data_source}_prompt_simple_disc", value="", help="Simple text description of the usecase. Note: This will be used in the Auto Evaluation.");
}
with tab_preview {
st.caption("Preview of the prompt description. Note: Following is a representation of what evaluators will see.");
Expand Down
4 changes: 0 additions & 4 deletions app/src/components/setup/setup.jac
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ can hv_configurator;
'''Retrieve Prompt Information from the datasources show in a Editable View'''
can get_prompt_info(data_sources: list) -> dict;

'''Add Data Sources to the Configurator'''
can add_data_sources;

'''Create Necessary Files for the Evaluation'''
can create_neccessary_file(data_sources: list, _prompt_infos: dict);

Expand All @@ -43,7 +40,6 @@ can setup {
}
with st.container(border=True) {
(n_workers, ability_to_tie, data_sources, n_questions_per_worker, evenly_distributed, show_captcha) = hv_configurator();
add_data_sources();
}
if data_sources {
prompt_infos = get_prompt_info(data_sources);
Expand Down

0 comments on commit 470ea17

Please sign in to comment.