Spaces:

ludvigolsen
/

plot_confusion_matrix

Running

App Files Files Community

Ludvig commited on Jun 5, 2023

Commit

c38363b

1 Parent(s): e29363a

Adds sub col option

Browse files

Files changed (6) hide show

README.md +1 -3
app.py +33 -3
data.py +7 -1
plot.R +14 -0
text_sections.py +13 -3
utils.py +29 -0

README.md CHANGED Viewed

@@ -20,6 +20,4 @@ Streamlit application for plotting a confusion matrix.
 - Add option to change zero-tile background (e.g. to black for black backgrounds)
 - Add option to format total-count tile in sum tiles
 - Selectable templates (for 2,3,4,5 classes - one selects num classes and pick a color scheme and other common defaults)
-- Add extra column in `Upload counts` that replaces whichever value is the bottom value (normally counts). Requires changes to cvms.
-- Allow handling tick text - e.g. for long class names or many classes.
-- Enable class order reversal after cvms arrow bug is fixed

 - Add option to change zero-tile background (e.g. to black for black backgrounds)
 - Add option to format total-count tile in sum tiles
 - Selectable templates (for 2,3,4,5 classes - one selects num classes and pick a color scheme and other common defaults)
+- Allow handling tick text - e.g. for long class names or many classes.

app.py CHANGED Viewed

@@ -151,6 +151,11 @@ elif input_choice == "Upload counts":
             n_col = st.selectbox(
                 "Counts column", options=list(st.session_state["count_data"].columns)
             )
             if st.form_submit_button(label="Set columns"):
                 st.session_state["step"] = 2
@@ -240,6 +245,7 @@ elif input_choice == "Enter counts":
             st.session_state["count_data"] = pd.DataFrame(
                 all_pairs, columns=["Target", "Prediction"]
             )
             st.session_state["count_data"]["N"] = 0
             st.session_state["step"] = 1
@@ -247,7 +253,17 @@ elif input_choice == "Enter counts":
     if st.session_state["step"] >= 1:
         with st.form(key="enter_counts_form"):
             st.write(
-                "Fill in the counts by pressing each cell in the `N` column and inputting the counts."
             )
             new_counts = st.data_editor(
@@ -256,6 +272,12 @@ elif input_choice == "Enter counts":
                 column_config={
                     "Target": st.column_config.TextColumn(disabled=True),
                     "Prediction": st.column_config.TextColumn(disabled=True),
                     "N": st.column_config.NumberColumn(
                         disabled=False, min_value=0, step=1
                     ),
@@ -280,6 +302,8 @@ elif input_choice == "Enter counts":
         target_col = "Target"
         prediction_col = "Prediction"
         n_col = "N"
 if st.session_state["step"] >= 2:
     data_is_ready = False
@@ -302,7 +326,7 @@ if st.session_state["step"] >= 2:
             df[prediction_col] = clean_str_column(df[prediction_col])
             # Save to tmp directory to allow reading in R script
-            df.to_csv(data_store_path)
             # Extract unique classes
             st.session_state["classes"] = sorted(
@@ -316,7 +340,10 @@ if st.session_state["step"] >= 2:
                 st.write(f"{df.shape} (Showing first 5 rows)")
     else:
-        st.session_state["count_data"].to_csv(data_store_path)
         data_is_ready = True
     if data_is_ready:
@@ -365,6 +392,9 @@ if st.session_state["step"] >= 2:
                 f"{selected_classes_string}",
             ]
             if st.session_state["input_type"] == "counts":
                 # The input data are counts
                 plotting_args += ["--n_col", f"{n_col}", "--data_are_counts"]

             n_col = st.selectbox(
                 "Counts column", options=list(st.session_state["count_data"].columns)
             )
+            sub_col = st.selectbox(
+                "Sub column",
+                options=["--"] + list(st.session_state["count_data"].columns),
+                help="Optional! This column will replace the bottom text in the middle of the tiles.",
+            )
             if st.form_submit_button(label="Set columns"):
                 st.session_state["step"] = 2
             st.session_state["count_data"] = pd.DataFrame(
                 all_pairs, columns=["Target", "Prediction"]
             )
+            st.session_state["count_data"]["Sub"] = ""
             st.session_state["count_data"]["N"] = 0
             st.session_state["step"] = 1
     if st.session_state["step"] >= 1:
         with st.form(key="enter_counts_form"):
             st.write(
+                "Fill in the counts by pressing each cell in the `N` column and inputting the counts. "
+            )
+            st.markdown(
+                "(**Optional**) If you wish to specify the bottom text in the middle of the tiles, "
+                "you can fill in the `Sub` column.",
+                help="The `sub` column text replaces the bottom text (counts by default). "
+                "The design settings for the replaced element (e.g. counts) are used for this text instead.",
+            )
+            st.info(
+                "Note: Please click outside the cell before "
+                "pressing `Generate data` to register your change."
             )
             new_counts = st.data_editor(
                 column_config={
                     "Target": st.column_config.TextColumn(disabled=True),
                     "Prediction": st.column_config.TextColumn(disabled=True),
+                    "Sub": st.column_config.TextColumn(
+                        help="This text replaces the bottom text (in the middle of the tiles). "
+                        "By default, the counts are replaced. "
+                        "Note that the settings for this text are named "
+                        "by the text element it replaces (e.g. **Fonts**>>*Counts*)."
+                    ),
                     "N": st.column_config.NumberColumn(
                         disabled=False, min_value=0, step=1
                     ),
         target_col = "Target"
         prediction_col = "Prediction"
         n_col = "N"
+        sub_col = "Sub" if any(st.session_state["count_data"]["Sub"]) else None
 if st.session_state["step"] >= 2:
     data_is_ready = False
             df[prediction_col] = clean_str_column(df[prediction_col])
             # Save to tmp directory to allow reading in R script
+            df.to_csv(data_store_path, index=False)
             # Extract unique classes
             st.session_state["classes"] = sorted(
                 st.write(f"{df.shape} (Showing first 5 rows)")
     else:
+        count_data_clean = st.session_state["count_data"].copy()
+        if not any(count_data_clean["Sub"]):
+            del count_data_clean["Sub"]
+        count_data_clean.to_csv(data_store_path, index=False)
         data_is_ready = True
     if data_is_ready:
                 f"{selected_classes_string}",
             ]
+            if "sub_col" in locals() and sub_col is not None and sub_col != "--":
+                plotting_args += ["--sub_col", f"{sub_col}"]
             if st.session_state["input_type"] == "counts":
                 # The input data are counts
                 plotting_args += ["--n_col", f"{n_col}", "--data_are_counts"]

data.py CHANGED Viewed

@@ -57,7 +57,13 @@ class DownloadHeader:
     @staticmethod
     def header_and_data_download(
-        header, data, file_name, col_sizes=[9, 2], key=None, label="Download", help="Download data"
     ):
         col1, col2 = st.columns(col_sizes)
         with col1:

     @staticmethod
     def header_and_data_download(
+        header,
+        data,
+        file_name,
+        col_sizes=[9, 2],
+        key=None,
+        label="Download",
+        help="Download data",
     ):
         col1, col2 = st.columns(col_sizes)
         with col1:

plot.R CHANGED Viewed

@@ -36,6 +36,10 @@ option_list <- list(
         type = "character",
         help = "Count column (when `--data_are_counts`)."
     ),
     make_option(c("--classes"),
         type = "character",
         help = paste0(
@@ -82,6 +86,15 @@ if (!is.null(opt$n_col)) {
     n_col <- stringr::str_replace_all(n_col, " ", ".")
 }
 # Read and prepare data frame
 df <- tryCatch(
     {
@@ -282,6 +295,7 @@ confusion_matrix_plot <- tryCatch(
     {
         cvms::plot_confusion_matrix(
             confusion_matrix,
             class_order = classes,
             add_sums = design_settings$show_sums,
             add_counts = design_settings$show_counts,

         type = "character",
         help = "Count column (when `--data_are_counts`)."
     ),
+    make_option(c("--sub_col"),
+        type = "character",
+        help = "Sub column (when `--data_are_counts`)."
+    ),
     make_option(c("--classes"),
         type = "character",
         help = paste0(
     n_col <- stringr::str_replace_all(n_col, " ", ".")
 }
+sub_col <- NULL
+if (!is.null(opt$sub_col)) {
+    if (!data_are_counts) {
+        stop("`sub_col` can only be specified when data are counts.")
+    }
+    sub_col <- stringr::str_squish(opt$sub_col)
+    sub_col <- stringr::str_replace_all(sub_col, " ", ".")
+}
 # Read and prepare data frame
 df <- tryCatch(
     {
     {
         cvms::plot_confusion_matrix(
             confusion_matrix,
+            sub_col = sub_col,
             class_order = classes,
             add_sums = design_settings$show_sums,
             add_counts = design_settings$show_counts,

text_sections.py CHANGED Viewed

@@ -45,6 +45,12 @@ def get_example_counts():
         {
             "Target": ["cl1", "cl2", "cl1", "cl2"],
             "Prediction": ["cl1", "cl2", "cl2", "cl1"],
             "N": [12, 10, 3, 5],
         }
     )
@@ -149,12 +155,14 @@ def upload_counts_text():
             "2) A `predicted classes` column. \n\n"
             "3) A `combination count` column for the "
             "combination frequency of 1 and 2. \n\n"
             "Other columns are currently ignored. "
             "In the next step, you will be asked to select the names of these two columns. "
         )
     with col2:
         st.write("Example of such a file:")
-        st.write(get_example_counts())
 def upload_predictions_text():
@@ -171,7 +179,7 @@ def upload_predictions_text():
         )
     with col2:
         st.write("Example of such a file:")
-        st.write(get_example_data())
 def columns_text():
@@ -184,7 +192,9 @@ def columns_text():
 def design_text():
     st.subheader("Design your plot")
     st.write("This is where you customize the design of your confusion matrix plot.")
-    st.markdown("We suggest you go directly to `Generate plot` to see the starting point. Then go back and tweak to your liking!")
     st.markdown(
         "The *width* and *height* settings are usually necessary to adjust as they "
         "change the relative size of the elements. Try adjusting 100px at a "

         {
             "Target": ["cl1", "cl2", "cl1", "cl2"],
             "Prediction": ["cl1", "cl2", "cl2", "cl1"],
+            "Sub*": [
+                "(57/60)",
+                "(46/50)",
+                "(12/15)",
+                "(23/25)",
+            ],
             "N": [12, 10, 3, 5],
         }
     )
             "2) A `predicted classes` column. \n\n"
             "3) A `combination count` column for the "
             "combination frequency of 1 and 2. \n\n"
+            "4) (\\***Optionally**) a `sub` column with text "
+            "that replaces the bottom text in the middle of tiles. \n\n"
             "Other columns are currently ignored. "
             "In the next step, you will be asked to select the names of these two columns. "
         )
     with col2:
         st.write("Example of such a file:")
+        st.dataframe(get_example_counts(), hide_index=True)
 def upload_predictions_text():
         )
     with col2:
         st.write("Example of such a file:")
+        st.dataframe(get_example_data(), hide_index=True)
 def columns_text():
 def design_text():
     st.subheader("Design your plot")
     st.write("This is where you customize the design of your confusion matrix plot.")
+    st.markdown(
+        "We suggest you go directly to `Generate plot` to see the starting point. Then go back and tweak to your liking!"
+    )
     st.markdown(
         "The *width* and *height* settings are usually necessary to adjust as they "
         "change the relative size of the elements. Try adjusting 100px at a "

utils.py CHANGED Viewed

@@ -1,5 +1,13 @@
 import subprocess
 import re
 def call_subprocess(call_, message, return_output=False, encoding="UTF-8"):
@@ -8,6 +16,27 @@ def call_subprocess(call_, message, return_output=False, encoding="UTF-8"):
         try:
             out = subprocess.check_output(call_, shell=True, encoding=encoding)
         except subprocess.CalledProcessError as e:
             print(f"{message}: {call_}")
             raise e
         return out

 import subprocess
 import re
+import streamlit as st
+import json
+def show_error(msg, action):
+    st.error(
+        f"Failed to {action}:\n\n...{msg}\n\nPlease [report](https://github.com/LudvigOlsen/plot_confusion_matrix/issues) this issue."
+    )
 def call_subprocess(call_, message, return_output=False, encoding="UTF-8"):
         try:
             out = subprocess.check_output(call_, shell=True, encoding=encoding)
         except subprocess.CalledProcessError as e:
+            if "Failed to create plot from confusion matrix." in e.output:
+                msg = e.output.split("Failed to create plot from confusion matrix.")[-1]
+                show_error(msg=msg, action="plot confusion matrix")
+            elif "Failed to read design settings as a json file" in e.output:
+                msg = e.output.split("Failed to read design settings as a json file")[
+                    -1
+                ]
+                show_error(msg=msg, action="read design settings")
+            elif "Failed to read data from" in e.output:
+                msg = e.output.split("Failed to read data from")[-1]
+                show_error(msg=msg, action="read data")
+            elif "Failed to ggsave plot to:" in e.output:
+                msg = e.output.split("Failed to ggsave plot to:")[-1]
+                show_error(msg=msg, action="save plot")
+            else:
+                msg = e.output.split("\n\n")[-1]
+                st.error(
+                    f"Unknown type of error: {msg}.\n\n"
+                    "Please [report](https://github.com/LudvigOlsen/plot_confusion_matrix/issues) this issue."
+                )
+            print(e.output)
             print(f"{message}: {call_}")
             raise e
         return out