.gitattributes CHANGED
@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- *.whl filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
Dockerfile CHANGED
@@ -1,10 +1,7 @@
1
- # 使用 Python 3.11 slim 镜像作为基础镜像
2
- FROM python:3.11-slim
3
 
4
- # 设置工作目录
5
  WORKDIR /app
6
 
7
- # 安装系统依赖
8
  RUN apt-get update && apt-get install -y \
9
  build-essential \
10
  curl \
@@ -12,27 +9,13 @@ RUN apt-get update && apt-get install -y \
12
  git \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
- # 复制依赖文件
16
  COPY requirements.txt ./
 
17
 
18
- # 安装 Python 依赖
19
- RUN pip install --no-cache-dir -r requirements.txt
20
 
21
- # 复制应用代码
22
- COPY app/ ./app/
23
- COPY utils/ ./utils/
24
- COPY app.py ./
25
-
26
- # 暴露 Streamlit 默认端口
27
  EXPOSE 8501
28
 
29
- # 健康检查
30
- HEALTHCHECK CMD curl --fail http://localhost:8501 || exit 1
31
-
32
- # 设置环境变量
33
- ENV PYTHONPATH=/app
34
- ENV STREAMLIT_SERVER_PORT=8501
35
- ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
36
 
37
- # 启动命令
38
- ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ FROM python:3.9-slim
 
2
 
 
3
  WORKDIR /app
4
 
 
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  curl \
 
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
12
  COPY requirements.txt ./
13
+ COPY src/ ./src/
14
 
15
+ RUN pip3 install -r requirements.txt
 
16
 
 
 
 
 
 
 
17
  EXPOSE 8501
18
 
19
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
 
 
 
 
 
20
 
21
+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
README.md CHANGED
@@ -1,12 +1,19 @@
1
  ---
2
- title: RTEB
3
- emoji: 📊
4
- colorFrom: yellow
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
 
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
  ---
2
+ title: Streamlit Rteb
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: docker
7
+ app_port: 8501
8
+ tags:
9
+ - streamlit
10
  pinned: false
11
+ short_description: Streamlit template space
12
  ---
13
 
14
+ # Welcome to Streamlit!
15
+
16
+ Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
+
18
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
+ forums](https://discuss.streamlit.io).
app.py DELETED
@@ -1,46 +0,0 @@
1
- import os
2
-
3
- import streamlit as st
4
-
5
- from st_pages import get_nav_from_toml, add_page_title
6
-
7
- from app.backend.app_init_func import LI_CSS, init_leaderboard, init_pages
8
- from app.backend.data_engine import DataEngine
9
-
10
- # init global data engine
11
- data_engine = DataEngine()
12
-
13
- st.session_state["data_engine"] = data_engine
14
- st.set_page_config(layout="wide")
15
-
16
- # init leaderboard and pages
17
- leaderboard_change, page_change = init_leaderboard()
18
-
19
- init_pages(leaderboard_change, page_change)
20
-
21
- # load page tree
22
- nav = get_nav_from_toml(
23
- "app/ui/pages_sections.toml"
24
- )
25
-
26
- # Add custom CSS
27
- css = "\n".join(LI_CSS)
28
- st.markdown(f"""
29
- <style>
30
-
31
- div[data-testid="stToolbar"] {{visibility: hidden; height: 0px;}}
32
-
33
- footer {{visibility: hidden;}}
34
- </style>
35
-
36
- <style>
37
- {css}
38
- </style>
39
- """
40
- , unsafe_allow_html=True)
41
-
42
- pg = st.navigation(nav)
43
-
44
- # add_page_title(pg)
45
-
46
- pg.run()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/__init__.py DELETED
@@ -1 +0,0 @@
1
-
 
 
app/backend/__init__.py DELETED
@@ -1 +0,0 @@
1
-
 
 
app/backend/app_init_func.py DELETED
@@ -1,84 +0,0 @@
1
- import streamlit as st
2
- import os
3
-
4
- from app.backend.constant import LEADERBOARD_ICON_MAP
5
-
6
- LEADERBOARD_MAP = {}
7
- LI_CSS = []
8
- PAGE_SECTIONS = []
9
-
10
-
11
- def init_leaderboard():
12
- data_engine = st.session_state["data_engine"]
13
- leaderboard_map = {}
14
- page_sections = []
15
- li_css = []
16
- sort_id = 0
17
- leaderboard_change = False
18
- page_change = False
19
-
20
- for dataset in data_engine.datasets:
21
- sort_id += 1
22
- leaderboard = dataset["leaderboard"]
23
- name = dataset["name"]
24
-
25
- leaderboard_section = f"{leaderboard.capitalize()} Leaderboard"
26
- if leaderboard_section not in leaderboard_map:
27
- leaderboard_map[leaderboard_section] = []
28
- if name.lower() == leaderboard.lower():
29
- leaderboard_map[leaderboard_section].append((name, 0))
30
- else:
31
- leaderboard_map[leaderboard_section].append((name, sort_id))
32
- li_css.append(f"""
33
- ul[data-testid="stSidebarNavItems"] li:nth-child({sort_id}) {{
34
- text-indent: 2rem;
35
- }}
36
- """)
37
- page_name = leaderboard_section if name.lower() == leaderboard.lower() else name.capitalize()
38
- page_sections.append(f"""
39
- [[pages]]
40
- path = "app/ui/pages/{name}.py"
41
- name = "{page_name}"
42
- icon = "{LEADERBOARD_ICON_MAP.get(page_name, "")}"
43
- """)
44
-
45
- # ensure leaderboard is first
46
- for k, v in leaderboard_map.items():
47
- v.sort(key=lambda x: x[1])
48
-
49
- if leaderboard_map != LEADERBOARD_MAP:
50
- LEADERBOARD_MAP.update(leaderboard_map)
51
- leaderboard_change = True
52
- if page_sections != PAGE_SECTIONS:
53
- PAGE_SECTIONS.clear()
54
- PAGE_SECTIONS.extend(page_sections)
55
- page_change = True
56
- if li_css != LI_CSS:
57
- LI_CSS.clear()
58
- LI_CSS.extend(li_css)
59
-
60
- return leaderboard_change, page_change
61
-
62
-
63
- def init_pages(leaderboard_change, page_change):
64
- # init pages
65
- if leaderboard_change:
66
- with open("app/ui/pages/data_page.py", "r", encoding="utf-8") as f:
67
- data_page = f.read()
68
- for leaderboard, group_names in LEADERBOARD_MAP.items():
69
-
70
- for group_name in group_names:
71
- path = os.path.join("app/ui/pages", f"{group_name[0]}.py")
72
- with open(path, "w", encoding="utf-8") as f:
73
- f.write(data_page.replace("$group_name$", group_name[0])
74
- )
75
- if page_change:
76
- with open("app/ui/pages_sections.toml", "w", encoding="utf-8") as f:
77
- f.write("\n".join(PAGE_SECTIONS))
78
-
79
-
80
- if __name__ == '__main__':
81
- init_leaderboard()
82
- init_pages()
83
- print("\n".join(PAGE_SECTIONS))
84
- print("\n".join(LI_CSS))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/backend/constant.py DELETED
@@ -1,90 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class Navigation(Enum):
5
- TEXT_LEADERBOARD = "Text Leaderboard"
6
- MULTIMODAL_LEADERBOARD = "Multimodal Leaderboard"
7
-
8
-
9
- class TaskCategory(Enum):
10
- LAW = "Law"
11
- CODE = "Code"
12
- CONVERSATIONAL = "Conversational"
13
- TECH = "Tech"
14
- LONG_CONTEXT = "Long-context"
15
- MULTILINGUAL = "Multilingual"
16
-
17
-
18
- class ModelProvider(Enum):
19
- OPENAI = "OpenAI"
20
- VOYAGEAI = "VoyageAI"
21
- COHERE = "Cohere"
22
- OTHERS = "Others"
23
-
24
-
25
- class EvaluationMetric(Enum):
26
- NDCG_1 = "NDCG@1"
27
- NDCG_3 = "NDCG@3"
28
- NDCG_5 = "NDCG@5"
29
- NDCG_10 = "NDCG@10"
30
- NDCG_20 = "NDCG@20"
31
- NDCG_50 = "NDCG@50"
32
- NDCG_100 = "NDCG@100"
33
- RECALL_1 = "RECALL@1"
34
- RECALL_3 = "RECALL@3"
35
- RECALL_5 = "RECALL@5"
36
- RECALL_10 = "RECALL@10"
37
- RECALL_20 = "RECALL@20"
38
- RECALL_50 = "RECALL@50"
39
- RECALL_100 = "RECALL@100"
40
- PRECISION_1 = "PRECISION@1"
41
- PRECISION_3 = "PRECISION@3"
42
- PRECISION_5 = "PRECISION@5"
43
- PRECISION_10 = "PRECISION@10"
44
- PRECISION_20 = "PRECISION@20"
45
- PRECISION_50 = "PRECISION@50"
46
- PRECISION_100 = "PRECISION@100"
47
-
48
-
49
- class EmbdDtype(Enum):
50
- ALL = "all"
51
- FLOAT_32 = "float32"
52
- INT_8 = "int8"
53
- BINARY = "binary"
54
-
55
-
56
- class EmbdDim(Enum):
57
- OP1 = "<=1k"
58
- OP2 = "1k-2k"
59
- OP3 = "2k-5k"
60
- OP4 = ">=5k"
61
-
62
-
63
- class Similarity(Enum):
64
- ALL = "all"
65
- COSINE = "cosine"
66
- DOT = "dot"
67
- EUCLIDEAN = "euclidean"
68
-
69
-
70
- LEADERBOARD_ICON_MAP = {
71
- "Text Leaderboard": "📚",
72
- "Law": "⚖️",
73
- "Multilingual": "🌎",
74
- "German": "🇩🇪",
75
- "Code": "💻",
76
- "Tech": "🛠️",
77
- "Legal": "📜",
78
- "English": "🇬🇧",
79
- "Healthcare": "🏥",
80
- "Finance": "💰",
81
- "French": "🇫🇷",
82
- "Japanese": "🇯🇵",
83
-
84
- }
85
-
86
- USERNAME = "embedding-benchmark"
87
- SPACENAME = "RTEB"
88
-
89
- # https://{UserName}-{SpaceName}.hf.space/
90
- BASE_URL = f"https://{USERNAME}-{SPACENAME}.hf.space/"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/backend/data_engine.py DELETED
@@ -1,179 +0,0 @@
1
- """
2
- Data service provider
3
- """
4
- import json
5
- from typing import List
6
-
7
- import pandas as pd
8
-
9
- from utils.cache_decorator import cache_df_with_custom_key, cache_dict_with_custom_key
10
- from utils.http_utils import get
11
-
12
- COLUMNS = ['model_name',
13
- 'embd_dtype', 'embd_dim', 'num_params', 'max_tokens', 'similarity',
14
- 'query_instruct', 'corpus_instruct',
15
-
16
- ]
17
- COLUMNS_TYPES = ["markdown",
18
- 'str', 'str', 'number', 'number', 'str',
19
- 'str', 'str',
20
-
21
- ]
22
-
23
- BRANCH = 'main'
24
- GIT_URL = f"https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/{BRANCH}/results/"
25
- DATASET_URL = f"{GIT_URL}datasets.json"
26
- MODEL_URL = f"{GIT_URL}models.json"
27
- RESULT_URL = f"{GIT_URL}results.json"
28
-
29
-
30
- class DataEngine:
31
-
32
- def __init__(self):
33
- self.df = self.init_dataframe()
34
-
35
- @property
36
- @cache_dict_with_custom_key("models")
37
- def models(self):
38
- """
39
- Get models data
40
- """
41
- res = get(MODEL_URL)
42
- if res.status_code == 200:
43
- return res.json()
44
- return {}
45
-
46
- @property
47
- @cache_dict_with_custom_key("datasets")
48
- def datasets(self):
49
- """
50
- Get tasks data
51
- """
52
- res = get(DATASET_URL)
53
- if res.status_code == 200:
54
- return res.json()
55
- return {}
56
-
57
- @property
58
- @cache_dict_with_custom_key("results")
59
- def results(self):
60
- """
61
- Get results data
62
- """
63
- res = get(RESULT_URL)
64
- if res.status_code == 200:
65
- return res.json()
66
- return {}
67
-
68
- def init_dataframe(self):
69
- """
70
- Initialize DataFrame
71
- """
72
- d = {"hello": [123], "world": [456]}
73
- return pd.DataFrame(d)
74
-
75
- @cache_df_with_custom_key("json_result")
76
- def jsons_to_df(self):
77
-
78
- results_list = self.results
79
- df_results_list = []
80
- for result_dict in results_list:
81
- dataset_name = result_dict["dataset_name"]
82
- df_result_row = pd.DataFrame(result_dict["results"])
83
- df_result_row["dataset_name"] = dataset_name
84
- df_results_list.append(df_result_row)
85
- df_result = pd.concat(df_results_list)
86
-
87
- df_result = df_result[["model_name", "dataset_name", "ndcg_at_10", "embd_dim", "embd_dtype"]]
88
-
89
- df_result["ndcg_at_10"] = (df_result["ndcg_at_10"] * 100).round(2)
90
-
91
- df_datasets_list = []
92
- for item in self.datasets:
93
- dataset_names = item["datasets"]
94
- df_dataset_row = pd.DataFrame(
95
- {
96
- "group_name": [item["name"] for _ in range(len(dataset_names))],
97
- "dataset_name": dataset_names,
98
- "leaderboard": [item["leaderboard"] for _ in range(len(dataset_names))]
99
- }
100
- )
101
- df_datasets_list.append(df_dataset_row)
102
- df_dataset = pd.concat(df_datasets_list).drop_duplicates()
103
-
104
- models_list = self.models
105
-
106
- df_model = pd.DataFrame(models_list)
107
-
108
- # Replace None values in num_params with "Unknown"
109
- if 'num_params' in df_model.columns:
110
- df_model['num_params'] = df_model['num_params'].fillna("Unknown")
111
-
112
- # Replace blank/None values in vendor with "Open source"
113
- if 'vendor' in df_model.columns:
114
- df_model['vendor'] = df_model['vendor'].fillna("Open source")
115
- df_model['vendor'] = df_model['vendor'].replace('', "Open source")
116
- # Also handle whitespace-only strings
117
- df_model['vendor'] = df_model['vendor'].apply(lambda x: "Open source" if isinstance(x, str) and x.strip() == '' else x)
118
-
119
- # Create mapping for model names/aliases
120
- if 'alias' in df_model.columns:
121
- # Create a lookup table for alias to model_name mapping
122
- alias_mapping = df_model[df_model['alias'].notna()].set_index('alias')['model_name'].to_dict()
123
-
124
- # Add rows for aliases to enable joining
125
- alias_rows = []
126
- for _, row in df_model[df_model['alias'].notna()].iterrows():
127
- alias_row = row.copy()
128
- alias_row['model_name'] = row['alias']
129
- alias_rows.append(alias_row)
130
-
131
- if alias_rows:
132
- df_model_extended = pd.concat([df_model, pd.DataFrame(alias_rows)], ignore_index=True)
133
- else:
134
- df_model_extended = df_model
135
- else:
136
- df_model_extended = df_model
137
-
138
- df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
139
-
140
- # set dataset default value to 0
141
- df = df.pivot(index=["model_name", "embd_dim", "embd_dtype", "group_name"], columns="dataset_name",
142
- values=["ndcg_at_10"]).fillna(0).stack(level=1).reset_index()
143
- df = pd.merge(df, df_dataset, on=["group_name","dataset_name"], how="inner")
144
-
145
- # dataset_num_map = {}
146
- # grouped_dataset_count = df.groupby(["group_name"]).agg({
147
- # "dataset_name": "nunique"
148
- # }).reset_index()
149
- #
150
- # for _, row in grouped_dataset_count.iterrows():
151
- # dataset_num_map[row["group_name"]] = row["dataset_name"]
152
-
153
- grouped_model = df.groupby(["model_name", "group_name", "embd_dim", "embd_dtype"]).agg({
154
- "ndcg_at_10": "mean",
155
- }).reset_index()
156
-
157
- pivot = grouped_model.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="group_name",
158
- values=["ndcg_at_10"]).round(2).fillna(0)
159
-
160
- # Rename columns
161
- pivot.columns = list(
162
- map(lambda x: f"{x[1].capitalize()} Average" if x[1] != 'text' else f"Average", pivot.columns))
163
-
164
- pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
165
-
166
- df = pd.merge(df_model_extended, pivot, on=["model_name", "embd_dim", "embd_dtype"])
167
- df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
168
-
169
- if df.empty:
170
- return pd.DataFrame(columns=COLUMNS + ["reference"])
171
- return df
172
-
173
- def filter_df(self, group_name: str):
174
- """
175
- filter_by_providers
176
- """
177
- df = self.jsons_to_df()
178
-
179
- return df[df["group_name"] == group_name][COLUMNS][:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/backend/data_page.py DELETED
@@ -1,442 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # @Date : 2025/2/5 16:26
3
- # @Author : q275343119
4
- # @File : data_page.py
5
- # @Description:
6
- import io
7
-
8
- from st_aggrid import AgGrid, JsCode, ColumnsAutoSizeMode
9
-
10
- import streamlit as st
11
-
12
- from app.backend.data_engine import DataEngine
13
- from app.backend.multi_header_util import get_header_options
14
- from utils.st_copy_to_clipboard import st_copy_to_clipboard
15
- from streamlit_theme import st_theme
16
-
17
- from app.backend.app_init_func import LEADERBOARD_MAP
18
- from app.backend.constant import LEADERBOARD_ICON_MAP, BASE_URL
19
- from app.backend.json_util import compress_msgpack, decompress_msgpack
20
-
21
- COLUMNS = ['model_name', 'vendor',
22
- 'embd_dtype', 'embd_dim', 'num_params', 'max_tokens', 'similarity',
23
- 'query_instruct', 'corpus_instruct', 'reference'
24
-
25
- ]
26
- HEADER_STYLE = {'fontSize': '18px'}
27
- CELL_STYLE = {'fontSize': '18px'}
28
-
29
-
30
- def is_section(group_name):
31
- for k, v in LEADERBOARD_MAP.items():
32
- leaderboard_name = v[0][0]
33
-
34
- if group_name == leaderboard_name:
35
- return True
36
- return False
37
-
38
-
39
- def get_closed_dataset():
40
- data_engine = st.session_state.get("data_engine", DataEngine())
41
- closed_list = []
42
- results = data_engine.results
43
- for result in results:
44
- if result.get("is_closed"):
45
- closed_list.append(result.get("dataset_name"))
46
- return closed_list
47
-
48
-
49
- def convert_df_to_csv(df):
50
- output = io.StringIO()
51
- df.to_csv(output, index=False)
52
- return output.getvalue()
53
-
54
-
55
- def get_column_state():
56
- """
57
- get column state from url
58
- """
59
- query_params = st.query_params.get("grid_state", None)
60
- sider_bar_hidden = st.query_params.get("sider_bar_hidden", "False")
61
- table_only = st.query_params.get("table_only", "False")
62
- if query_params:
63
- grid_state = decompress_msgpack(query_params)
64
- st.session_state.grid_state = grid_state
65
- if sider_bar_hidden.upper() == 'FALSE':
66
- st.session_state.sider_bar_hidden = False
67
- if table_only.upper() == 'FALSE':
68
- st.session_state.table_only = False
69
- return None
70
-
71
-
72
- def sidebar_css():
73
- """
74
-
75
- :return:
76
- """
77
- if st.session_state.get("sider_bar_hidden"):
78
- st.markdown("""
79
- <style>
80
- [data-testid="stSidebar"] {
81
- display: none !important;
82
- }
83
- [data-testid="stSidebarNav"] {
84
- display: none !important;
85
- }
86
-
87
- [data-testid="stBaseButton-headerNoPadding"] {
88
- display: none !important;
89
- }
90
-
91
- h1#retrieval-embedding-benchmark-rteb {
92
- text-align: center;
93
- }
94
-
95
- </style>
96
- """, unsafe_allow_html=True)
97
-
98
-
99
- def table_only_css():
100
- if st.session_state.get("table_only"):
101
- st.markdown("""
102
- <style>
103
-
104
- [data-testid="stMainBlockContainer"] {
105
- padding-top: 0px;
106
- padding-left: 0px;
107
- padding-bottom: 0px;
108
- padding-right: 0px;
109
- }
110
-
111
- [data-testid="stHeader"] {
112
- height: 0px;
113
- }
114
-
115
- [data-testid="stApp"] {
116
- height: 456px;
117
- }
118
-
119
- .st-emotion-cache-1dp5vir {
120
-
121
- height: 0px;
122
-
123
- }
124
- """, unsafe_allow_html=True)
125
-
126
-
127
- def table_area(group_name, grid_state, data_engine=None, df=None):
128
- """
129
- table_area
130
- :param group_name:
131
- :param grid_state:
132
- :param data_engine:
133
- :param df:
134
- :return:
135
- """
136
- table_only_css()
137
-
138
- if data_engine is None:
139
- data_engine = st.session_state.get("data_engine", DataEngine())
140
- if df is None:
141
- df = data_engine.jsons_to_df().copy()
142
-
143
- # get columns
144
- column_list = []
145
- avg_column = None
146
- if is_section(group_name):
147
- avg_columns = []
148
- for column in df.columns:
149
-
150
- if column.startswith("Average"):
151
- avg_columns.insert(0, column)
152
- continue
153
- if "Average" in column:
154
- avg_columns.append(column)
155
- continue
156
- avg_column = avg_columns[0]
157
- column_list.extend(avg_columns)
158
- else:
159
- for column in df.columns:
160
-
161
- if column.startswith(group_name.capitalize() + " "):
162
- avg_column = column
163
-
164
- column_list.append(avg_column)
165
-
166
- dataset_list = []
167
-
168
- for dataset_dict in data_engine.datasets:
169
- if dataset_dict["name"] == group_name:
170
- dataset_list = dataset_dict["datasets"]
171
- if not is_section(group_name):
172
- column_list.extend(dataset_list)
173
- closed_list = get_closed_dataset()
174
- close_avg_list = list(set(dataset_list) & set(closed_list))
175
- df["Closed average"] = df[close_avg_list].mean(axis=1).round(2)
176
- column_list.append("Closed average")
177
-
178
- open_avg_list = list(set(dataset_list) - set(closed_list))
179
- df["Open average"] = df[open_avg_list].mean(axis=1).round(2)
180
- column_list.append("Open average")
181
-
182
- df = df[COLUMNS + column_list].sort_values(by=avg_column, ascending=False)
183
-
184
- # rename avg column name
185
- if not is_section(group_name):
186
- new_column = avg_column.replace(group_name.capitalize(), "").strip()
187
- df.rename(columns={avg_column: new_column}, inplace=True)
188
- column_list.remove(avg_column)
189
- avg_column = new_column
190
-
191
- # setting column config - 优化缓存机制,减少不必要的session_state更新
192
- grid_options = st.session_state.get(f"{group_name}_grid_options")
193
- if grid_options is None:
194
- grid_options = get_header_options(column_list, avg_column, is_section(group_name))
195
- st.session_state[f"{group_name}_grid_options"] = grid_options
196
-
197
- grid_options["initialState"] = grid_state
198
-
199
- custom_css = {
200
- # Model Name Cell
201
- ".a-cell": {
202
- "display": "inline-block",
203
- # "white-space": "nowrap",
204
- # "overflow": "hidden",
205
- # "text-overflow": "ellipsis",
206
- "width": "100%",
207
- "min-width": "0"
208
- },
209
- # Header
210
- ".ag-header": {
211
- "overflow": "visible !important",
212
- },
213
- ".multi-line-header": {
214
- "text-overflow": "clip",
215
- "overflow": "visible",
216
- "white-space": "nowrap",
217
- "height": "auto",
218
- "font-family": 'Arial',
219
- "font-size": "14px",
220
- "font-weight": "bold",
221
- "padding": "10px",
222
- "text-align": "left",
223
- },
224
- # Custom header and cell styles to replace headerStyle and cellStyle
225
- ".custom-header-style": {
226
- "text-overflow": "clip",
227
- "overflow": "visible",
228
- "white-space": "normal",
229
- "height": "auto",
230
- "font-family": 'Arial',
231
- "font-size": "14px",
232
- "font-weight": "bold",
233
- "padding": "10px",
234
- "text-align": "left",
235
- # "width": "100%"
236
- },
237
- ".custom-cell-style": {
238
- "font-size": "14px",
239
- "color": "inherit",
240
- },
241
- # Filter Options and Input
242
- ".ag-theme-streamlit .ag-popup": {
243
- "font-family": 'Arial',
244
- "font-size": "14px",
245
-
246
- }
247
- , ".ag-picker-field-display": {
248
- "font-family": 'Arial',
249
- "font-size": "14px",
250
-
251
- },
252
- ".ag-input-field-input .ag-text-field-input": {
253
- "font-family": 'Arial',
254
- "font-size": "14px",
255
-
256
- }
257
- # , ".link-header-component": {
258
- # "overflow": "visible !important",
259
- # "min-width": "auto !important",
260
- # }
261
- #
262
- # , ".link-header-component > span ": {
263
- # "overflow": "visible !important",
264
- # "white-space": "nowrap !important",
265
- # "text-overflow": "unset !important",
266
- # "flex": "unset !important",
267
- # },
268
- # ,".custom-link-header-style": {
269
- #
270
- # "display": "inline-block",
271
- # }
272
- , ".ag-header-cell-text": {
273
- "overflow": "visible !important",
274
- "width": "fit-content"
275
-
276
- }
277
- , ".ag-header-cell-label": {
278
- "overflow": "visible !important",
279
- "width": "fit-content"
280
-
281
- }
282
- , "div.link-header-component > span": {
283
- "white-space": "pre-wrap !important"
284
- }
285
- }
286
-
287
- grid = AgGrid(
288
- df,
289
- enable_enterprise_modules=False,
290
- gridOptions=grid_options,
291
- allow_unsafe_jscode=True,
292
- columns_auto_size_mode=ColumnsAutoSizeMode.FIT_ALL_COLUMNS_TO_VIEW,
293
- theme="streamlit",
294
- custom_css=custom_css,
295
- update_on=["selectionChanged", "filterChanged"], # 减少WebSocket触发频率,只在重要变化时触发
296
- )
297
-
298
- return grid
299
-
300
-
301
- def main_page(group_name, grid_state):
302
- """
303
- main_page
304
- :param group_name:
305
- :param grid_state:
306
- :return:
307
- """
308
-
309
- # Add theme color and grid styles
310
- st.title("Retrieval Embedding Benchmark (RTEB)")
311
- st.markdown("""
312
- <style>
313
- :root {
314
- --theme-color: rgb(129, 150, 64);
315
- --theme-color-light: rgba(129, 150, 64, 0.2);
316
- }
317
-
318
- /* AG Grid specific overrides */
319
- .ag-theme-alpine {
320
- --ag-selected-row-background-color: var(--theme-color-light) !important;
321
- --ag-row-hover-color: var(--theme-color-light) !important;
322
- --ag-selected-tab-color: var(--theme-color) !important;
323
- --ag-range-selection-border-color: var(--theme-color) !important;
324
- --ag-range-selection-background-color: var(--theme-color-light) !important;
325
- }
326
-
327
- .ag-row-hover {
328
- background-color: var(--theme-color-light) !important;
329
- }
330
-
331
- .ag-row-selected {
332
- background-color: var(--theme-color-light) !important;
333
- }
334
-
335
- .ag-row-focus {
336
- background-color: var(--theme-color-light) !important;
337
- }
338
-
339
- .ag-cell-focus {
340
- border-color: var(--theme-color) !important;
341
- }
342
-
343
- /* Keep existing styles */
344
- .center-text {
345
- text-align: center;
346
- color: var(--theme-color);
347
- }
348
- .center-image {
349
- display: block;
350
- margin-left: auto;
351
- margin-right: auto;
352
- }
353
- h2 {
354
- color: var(--theme-color) !important;
355
- }
356
- .ag-header-cell {
357
- background-color: var(--theme-color) !important;
358
- color: white !important;
359
- }
360
- a {
361
- color: var(--theme-color) !important;
362
- }
363
- a:hover {
364
- color: rgba(129, 150, 64, 0.8) !important;
365
- }
366
- /* Download Button */
367
- button[data-testid="stBaseButton-secondary"] {
368
- float: right;
369
-
370
- }
371
- /* Toast On The Top*/
372
- div[data-testid="stToastContainer"] {
373
- position: fixed !important;
374
- z-index: 2147483647 !important;
375
- }
376
-
377
- </style>
378
-
379
- """, unsafe_allow_html=True)
380
-
381
- # logo
382
- # st.markdown('<img src="https://www.voyageai.com/logo.svg" class="center-image" width="200">', unsafe_allow_html=True)
383
- title = f'<h2 class="center-text">{LEADERBOARD_ICON_MAP.get(group_name.capitalize(), "")} {group_name.capitalize()}</h2>'
384
- if is_section(group_name):
385
- title = f'<h2 class="center-text">{LEADERBOARD_ICON_MAP.get(group_name.capitalize() + " Leaderboard", "")} {group_name.capitalize() + " Leaderboard"}</h2>'
386
- # title
387
- st.markdown(title, unsafe_allow_html=True)
388
-
389
- data_engine = st.session_state.get("data_engine", DataEngine())
390
-
391
- df = data_engine.jsons_to_df().copy()
392
-
393
- csv = convert_df_to_csv(df)
394
- file_name = f"{group_name.capitalize()} Leaderboard" if is_section(group_name) else group_name.capitalize()
395
-
396
- grid = table_area(group_name, grid_state, data_engine, df)
397
-
398
- @st.dialog("URL")
399
- def share_url():
400
- state = grid.grid_state
401
- if state:
402
- share_link = f'{BASE_URL.replace("_", "-")}{group_name}/?grid_state={compress_msgpack(state)}' if not is_section(
403
- group_name) else f'{BASE_URL.replace("_", "-")}?grid_state={compress_msgpack(state)}'
404
- else:
405
- share_link = f'{BASE_URL.replace("_", "-")}{group_name}'
406
- st.write(share_link)
407
- theme = st_theme()
408
- if theme:
409
- theme = theme.get("base")
410
- else:
411
- theme = "light"
412
- st_copy_to_clipboard(share_link, before_copy_label='📋Push to copy', after_copy_label='✅Text copied!',
413
- theme=theme)
414
-
415
- col1, col2 = st.columns([1, 1])
416
- with col1:
417
- st.download_button(
418
- label="Download CSV",
419
- data=csv,
420
- file_name=f"{file_name}.csv",
421
- mime="text/csv",
422
- icon=":material/download:",
423
- )
424
- with col2:
425
- share_btn = st.button("Share this page", icon=":material/share:")
426
-
427
- if share_btn:
428
- share_url()
429
-
430
-
431
- def render_page(group_name):
432
- grid_state = st.session_state.get("grid_state", {})
433
- st.session_state.sider_bar_hidden = True
434
- st.session_state.table_only = True
435
- get_column_state()
436
-
437
- sidebar_css()
438
-
439
- if st.session_state.get("table_only"):
440
- table_area(group_name, grid_state)
441
- else:
442
- main_page(group_name, grid_state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/backend/json_util.py DELETED
@@ -1,15 +0,0 @@
1
- import msgpack
2
- import gzip
3
- import base64
4
-
5
-
6
- def compress_msgpack(data):
7
- packed = msgpack.packb(data)
8
- compressed = gzip.compress(packed)
9
- return base64.urlsafe_b64encode(compressed).decode('utf-8')
10
-
11
-
12
- def decompress_msgpack(compressed_str):
13
- compressed = base64.urlsafe_b64decode(compressed_str)
14
- unpacked = gzip.decompress(compressed)
15
- return msgpack.unpackb(unpacked, raw=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/backend/multi_header_util.py DELETED
@@ -1,286 +0,0 @@
1
- """
2
- handle multi_header options
3
- """
4
- from st_aggrid import JsCode
5
- from streamlit_theme import st_theme
6
-
7
- HEADER_STYLE = {'fontSize': '18px'}
8
- CELL_STYLE = {'fontSize': '18px'}
9
- LINK = ' https://huggingface.co/datasets/embedding-benchmark/'
10
-
11
-
12
- def get_dataset_url_name(field_name):
13
- """Convert field name to proper URL format for closed datasets"""
14
- # Handle field names like "ClosedDataset 2 (German Legal Sentences)"
15
- if field_name.startswith("ClosedDataset "):
16
- # Extract the number and format it as ClosedDataset_X
17
- if "(" in field_name:
18
- # Extract number from "ClosedDataset 2 (description)" -> "2"
19
- number_part = field_name.split("ClosedDataset ")[1].split(" ")[0]
20
- return f"ClosedDataset_{number_part}"
21
- else:
22
- # Handle cases where it might already be in the right format or no parentheses
23
- return field_name.replace(" ", "_")
24
-
25
- # Return original field_name for open datasets
26
- return field_name
27
-
28
-
29
- def get_closed_dataset_column_name(field_name: str):
30
- """Convert field name to tow-line for closed datasets"""
31
- if field_name.startswith("ClosedDataset ") and "(" in field_name:
32
- idx = field_name.find("(")
33
-
34
- return field_name[:idx] + '\r\n' + field_name[idx:]
35
-
36
- return field_name
37
-
38
-
39
- def mutil_header_options(column_list: list, avg_column: str, is_section: bool):
40
- """
41
- get mutil_header_options - 优化版本,减少组件实例化
42
- :param column_list:
43
- :param avg_column:
44
- :param is_section:
45
- :return:
46
- """
47
- if is_section:
48
- column_def_list = [
49
- {'headerName': column if "Average" not in column else column.replace("Average", "").strip().capitalize(),
50
- 'field': column,
51
- 'headerClass': 'custom-header-style',
52
- 'cellClass': 'custom-cell-style',
53
- 'headerTooltip': column if "Average" not in column else column.replace("Average",
54
- "").strip().capitalize()
55
- # 'suppressSizeToFit': True
56
- } for column in column_list if
57
- column not in (avg_column, "Closed average", "Open average")]
58
-
59
- return column_def_list
60
-
61
- mutil_column_list = [column for column in column_list if
62
- column not in (avg_column, "Closed average", "Open average")]
63
- close_group_list = list(filter(lambda x: x.startswith('_') or x.startswith("ClosedDataset "), mutil_column_list))
64
- open_group_list = list(
65
- filter(lambda x: not x.startswith('_') and not x.startswith("ClosedDataset "), mutil_column_list))
66
-
67
- close_group_def = {
68
- 'headerName': 'CLOSED DATASETS',
69
- 'width': 'fit-content',
70
- 'headerClass': 'custom-header-style',
71
- 'cellClass': 'custom-cell-style',
72
- 'autoHeaderHeight': True,
73
- 'children': [
74
- {'headerName': get_closed_dataset_column_name(column.replace('_', '')),
75
- 'field': column,
76
-
77
- "headerComponentParams": {
78
- "innerHeaderComponent": "linkHeaderComponent",
79
- "url": LINK + get_dataset_url_name(column),
80
- "headerName": get_closed_dataset_column_name(column.replace('_', ''))
81
- },
82
-
83
- 'headerClass': 'custom-header-style',
84
- 'cellClass': 'custom-cell-style',
85
- 'sortable': True,
86
- # 'width': 150,
87
- "suppressColumnVirtualisation": True,
88
-
89
- } for column in close_group_list
90
- ],
91
-
92
- }
93
- open_group_def = {
94
- 'headerName': 'OPEN DATASETS',
95
- 'width': 'fit-content',
96
- 'headerClass': 'custom-header-style',
97
- 'cellClass': 'custom-cell-style',
98
- 'autoHeaderHeight': True,
99
- 'children': [
100
- {'headerName': column,
101
- 'field': column,
102
- "headerComponentParams": {
103
- "innerHeaderComponent": "linkHeaderComponent",
104
- "url": LINK + get_dataset_url_name(column),
105
- "headerName": column
106
- },
107
- 'headerClass': 'custom-header-style',
108
- 'cellClass': 'custom-cell-style',
109
- 'sortable': True,
110
- # 'width': 150,
111
- "suppressColumnVirtualisation": True,
112
-
113
- } for column in open_group_list
114
- ],
115
-
116
- }
117
- return [close_group_def, open_group_def]
118
-
119
-
120
- def get_header_options(column_list: list, avg_column: str, is_section: bool):
121
- grid_options = {
122
- 'columnDefs': [
123
- {
124
- 'headerName': 'Model Name',
125
- 'field': 'model_name',
126
- 'pinned': 'left',
127
- 'sortable': False,
128
- 'headerClass': 'custom-header-style',
129
- 'cellClass': 'custom-cell-style',
130
- 'autoHeaderHeight': True,
131
- "tooltipValueGetter": JsCode(
132
- """
133
- function(p) {
134
- const value = p.value || "";
135
- const link = p.data && p.data.reference ? p.data.reference : "";
136
- return link ? `${value} (${link})` : value;
137
- }
138
- """
139
- ),
140
- "width": 250,
141
- 'cellRenderer': JsCode("""class CustomHTML {
142
- init(params) {
143
- const link = params.data.reference;
144
- this.eGui = document.createElement('div');
145
- this.eGui.innerHTML = link ?
146
- `<a href="${link}" class="a-cell" target="_blank">${params.value} </a>` :
147
- params.value;
148
- }
149
-
150
- getGui() {
151
- return this.eGui;
152
- }
153
- }"""),
154
- 'suppressSizeToFit': True,
155
- 'headerTooltip': 'Model Name',
156
-
157
- },
158
- {'headerName': "Vendor",
159
- 'field': 'vendor',
160
- 'headerClass': 'custom-header-style',
161
- 'cellClass': 'custom-cell-style',
162
- # 'suppressSizeToFit': True,
163
- 'headerTooltip': 'Vendor',
164
- },
165
- {'headerName': "Overall Score",
166
- 'field': avg_column,
167
- 'headerClass': 'custom-header-style',
168
- 'cellClass': 'custom-cell-style',
169
- # 'suppressSizeToFit': True,
170
- 'headerTooltip': 'Overall Score',
171
-
172
- },
173
-
174
- # Add Open average column definition
175
- {'headerName': 'Open Average',
176
- 'field': 'Open average',
177
- 'headerClass': 'custom-header-style',
178
- 'cellClass': 'custom-cell-style',
179
- # 'suppressSizeToFit': True,
180
- 'headerTooltip': 'Open Average',
181
-
182
- },
183
-
184
- {'headerName': 'Closed Average',
185
- 'field': 'Closed average',
186
- 'headerClass': 'custom-header-style',
187
- 'cellClass': 'custom-cell-style',
188
- # 'suppressSizeToFit': True,
189
- 'headerTooltip': 'Closed Average',
190
-
191
- },
192
-
193
- {
194
- 'headerName': 'Embd Dtype',
195
- 'field': 'embd_dtype',
196
- 'headerClass': 'custom-header-style',
197
- 'cellClass': 'custom-cell-style',
198
- # 'suppressSizeToFit': True,
199
- 'headerTooltip': 'Embd Dtype',
200
-
201
- },
202
- {
203
- 'headerName': 'Embd Dim',
204
- 'field': 'embd_dim',
205
- 'headerClass': 'custom-header-style',
206
- 'cellClass': 'custom-cell-style',
207
- # 'suppressSizeToFit': True,
208
- 'valueFormatter': JsCode(
209
- """
210
- function(params) {
211
- const num = params.value;
212
- if (typeof num !== "number") return num;
213
- if (Math.abs(num) >= 10000) {
214
- return num.toLocaleString("en-US");
215
- }
216
- return num;
217
- }
218
- """
219
- ),
220
- 'headerTooltip': 'Embd Dim',
221
-
222
- },
223
- {
224
- 'headerName': 'Number of Parameters',
225
- 'field': 'num_params',
226
- 'cellDataType': 'number',
227
- "colId": "num_params",
228
- 'headerClass': 'custom-header-style',
229
- 'cellClass': 'custom-cell-style',
230
- 'valueFormatter': JsCode(
231
- """function(params) {
232
- const num = params.value;
233
- if (num >= 1e9) return (num / 1e9).toFixed(2) + "B";
234
- if (num >= 1e6) return (num / 1e6).toFixed(2) + "M";
235
- if (num >= 1e3) return (num / 1e3).toFixed(2) + "K";
236
- return num;
237
- }"""
238
- ),
239
- "width": 120,
240
- # 'suppressSizeToFit': True,
241
- 'headerTooltip': 'Number of Parameters',
242
-
243
- },
244
- {
245
- 'headerName': 'Context Length',
246
- 'field': 'max_tokens',
247
- 'headerClass': 'custom-header-style',
248
- 'cellClass': 'custom-cell-style',
249
- # 'suppressSizeToFit': True,
250
- 'valueFormatter': JsCode(
251
- """
252
- function(params) {
253
- const num = params.value;
254
- if (typeof num !== "number") return num;
255
- if (Math.abs(num) >= 10000) {
256
- return num.toLocaleString("en-US");
257
- }
258
- return num;
259
- }
260
- """
261
- ),
262
- 'headerTooltip': 'Context Length',
263
-
264
- },
265
-
266
- *mutil_header_options(column_list, avg_column, is_section)
267
- ],
268
- 'defaultColDef': {
269
- 'filter': True,
270
- 'sortable': True,
271
- 'resizable': True,
272
- 'headerClass': "multi-line-header",
273
- 'autoHeaderHeight': True,
274
- 'width': 105
275
- },
276
-
277
- "autoSizeStrategy": {
278
- "type": 'fitCellContents',
279
- "colIds": [column for column in column_list if column not in (avg_column, "Closed average", "Open average")]
280
- },
281
- "tooltipShowDelay": 500,
282
- "suppressColumnVirtualisation": True
283
-
284
- }
285
-
286
- return grid_options
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/ui/__init__.py DELETED
File without changes
app/ui/pages/__init__.py DELETED
File without changes
app/ui/pages/data_page.py DELETED
@@ -1,3 +0,0 @@
1
- from app.backend.data_page import render_page
2
-
3
- render_page("$group_name$")
 
 
 
 
app/ui/pages_sections.toml DELETED
@@ -1,54 +0,0 @@
1
-
2
- [[pages]]
3
- path = "app/ui/pages/text.py"
4
- name = "Text Leaderboard"
5
- icon = "📚"
6
-
7
-
8
- [[pages]]
9
- path = "app/ui/pages/legal.py"
10
- name = "Legal"
11
- icon = "📜"
12
-
13
-
14
- [[pages]]
15
- path = "app/ui/pages/english.py"
16
- name = "English"
17
- icon = "🇬🇧"
18
-
19
-
20
- [[pages]]
21
- path = "app/ui/pages/code.py"
22
- name = "Code"
23
- icon = "💻"
24
-
25
-
26
- [[pages]]
27
- path = "app/ui/pages/healthcare.py"
28
- name = "Healthcare"
29
- icon = "🏥"
30
-
31
-
32
- [[pages]]
33
- path = "app/ui/pages/french.py"
34
- name = "French"
35
- icon = "🇫🇷"
36
-
37
-
38
- [[pages]]
39
- path = "app/ui/pages/finance.py"
40
- name = "Finance"
41
- icon = "💰"
42
-
43
-
44
- [[pages]]
45
- path = "app/ui/pages/german.py"
46
- name = "German"
47
- icon = "🇩🇪"
48
-
49
-
50
- [[pages]]
51
- path = "app/ui/pages/japanese.py"
52
- name = "Japanese"
53
- icon = "🇯🇵"
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,6 +1,3 @@
1
- streamlit==1.47.1
2
- st-pages==1.0.1
3
- msgpack==1.1.0
4
- zstandard==0.23.0
5
- st-theme==1.2.3
6
- https://github.com/embedding-benchmark/streamlit-aggrid/releases/download/v0.01/streamlit_aggrid-1.1.7-py3-none-any.whl
 
1
+ altair
2
+ pandas
3
+ streamlit
 
 
 
src/streamlit_app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import altair as alt
2
+ import numpy as np
3
+ import pandas as pd
4
+ import streamlit as st
5
+
6
+ """
7
+ # Welcome to Streamlit!
8
+
9
+ Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
+ forums](https://discuss.streamlit.io).
12
+
13
+ In the meantime, below is an example of what you can do with just a few lines of code:
14
+ """
15
+
16
+ num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
+ num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
+
19
+ indices = np.linspace(0, 1, num_points)
20
+ theta = 2 * np.pi * num_turns * indices
21
+ radius = indices
22
+
23
+ x = radius * np.cos(theta)
24
+ y = radius * np.sin(theta)
25
+
26
+ df = pd.DataFrame({
27
+ "x": x,
28
+ "y": y,
29
+ "idx": indices,
30
+ "rand": np.random.randn(num_points),
31
+ })
32
+
33
+ st.altair_chart(alt.Chart(df, height=700, width=700)
34
+ .mark_point(filled=True)
35
+ .encode(
36
+ x=alt.X("x", axis=None),
37
+ y=alt.Y("y", axis=None),
38
+ color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
+ size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
+ ))
utils/__init__.py DELETED
File without changes
utils/cache_decorator.py DELETED
@@ -1,54 +0,0 @@
1
- import time
2
- from functools import wraps
3
- import pandas as pd
4
-
5
- CACHE = {}
6
- TTL = 3600
7
-
8
-
9
- def cache_df_with_custom_key(cache_key: str):
10
- def decorator(func):
11
- @wraps(func)
12
- def wrapper(*args, **kwargs):
13
- if cache_key in CACHE and CACHE[cache_key].get("expiry") - time.time() < TTL:
14
- return CACHE[cache_key]["data"]
15
-
16
- result: pd.DataFrame = func(*args, **kwargs)
17
- if result is not None and not result.empty:
18
- d = {"expiry": time.time(), "data": result}
19
- CACHE[cache_key] = d
20
- return result
21
-
22
- CACHE[cache_key]["expiry"] += TTL
23
- return CACHE[cache_key]["data"]
24
-
25
- return wrapper
26
-
27
- return decorator
28
-
29
-
30
- def cache_dict_with_custom_key(cache_key: str):
31
- def decorator(func):
32
- @wraps(func)
33
- def wrapper(*args, **kwargs):
34
- if cache_key in CACHE and time.time() - CACHE[cache_key].get("expiry") < TTL:
35
- return CACHE[cache_key]["data"]
36
-
37
- result: dict = func(*args, **kwargs)
38
- if result:
39
- d = {"expiry": time.time(), "data": result}
40
- CACHE[cache_key] = d
41
- return result
42
-
43
- CACHE[cache_key]["expiry"] += TTL
44
- return CACHE[cache_key]["data"]
45
-
46
- return wrapper
47
-
48
- return decorator
49
-
50
-
51
- if __name__ == '__main__':
52
- a = time.time()
53
- time.sleep(5)
54
- print(time.time() - a)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/http_utils.py DELETED
@@ -1,7 +0,0 @@
1
- import requests
2
-
3
-
4
- def get(url: str, params: str = None, verify: bool = False):
5
- return requests.get(url, params, verify=verify)
6
-
7
-
 
 
 
 
 
 
 
 
utils/st_copy_to_clipboard/__init__.py DELETED
@@ -1,66 +0,0 @@
1
- from pathlib import Path
2
- from typing import Optional
3
-
4
- import streamlit as st
5
- import streamlit.components.v1 as components
6
-
7
- # Tell streamlit that there is a component called streamlit_copy_to_clipboard,
8
- # and that the code to display that component is in the "frontend" folder
9
- frontend_dir = (Path(__file__).parent / "frontend").absolute()
10
- _component_func = components.declare_component(
11
- "streamlit_copy_to_clipboard", path=str(frontend_dir)
12
- )
13
-
14
-
15
- def st_copy_to_clipboard(
16
- text: str,
17
- before_copy_label: str = "📋",
18
- after_copy_label: str = "✅",
19
- show_text: bool = False,
20
- key: Optional[str] = None,
21
- theme: str = 'light', # default theme is 'light'
22
-
23
- ):
24
- """
25
- Streamlit component to copy text to clipboard.
26
-
27
- Parameters
28
- ----------
29
- text : str
30
- The text to be copied to the clipboard.
31
- before_copy_label : str
32
- Label of the button before text is copied.
33
- after_copy_label : str
34
- Label of the button after text is copied.
35
- show_text: bool
36
- If True, show text right before the button and make it clickable as well
37
- key : str or None
38
- An optional key that uniquely identifies the component.
39
- theme: str
40
- Set the current theme for the button.
41
- """
42
- component_value = _component_func(
43
- key=key,
44
- text=text,
45
- before_copy_label=before_copy_label,
46
- after_copy_label=after_copy_label,
47
- show_text=show_text,
48
- theme=theme,
49
- )
50
-
51
- return component_value
52
-
53
-
54
- def main():
55
- st.write("## Example")
56
- text = st.text_input("Enter text to copy to clipboard", value="Hello World")
57
- st_copy_to_clipboard(text)
58
- st_copy_to_clipboard(text, before_copy_label='📋Push to copy', after_copy_label='✅Text copied!')
59
- st_copy_to_clipboard(text, before_copy_label='Push to copy', after_copy_label='Text copied!', show_text=True)
60
- st_copy_to_clipboard(text, before_copy_label='Push to copy', after_copy_label='copied!', show_text=True, theme="dark")
61
-
62
-
63
-
64
-
65
- if __name__ == "__main__":
66
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/st_copy_to_clipboard/frontend/index.html DELETED
@@ -1,19 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta http-equiv="X-UA-Compatible" content="IE=edge" />
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
- <title>st-copy-to-clipboard</title>
8
- <script src="./streamlit-component-lib.js"></script>
9
- <script src="./main.js"></script>
10
- <link rel="stylesheet" href="./style.css" />
11
- </head>
12
-
13
- <body>
14
- <div id="root">
15
- <button id="text-element" class="st-copy-to-clipboard-btn"></button>
16
- <button id="copy-button" class="st-copy-to-clipboard-btn">📋</button>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/st_copy_to_clipboard/frontend/main.js DELETED
@@ -1,61 +0,0 @@
1
- // The `Streamlit` object exists because our html file includes
2
- // `streamlit-component-lib.js`.
3
- // If you get an error about "Streamlit" not being defined, that
4
- // means you're missing that file.
5
-
6
- function sendValue(value) {
7
- Streamlit.setComponentValue(value);
8
- }
9
-
10
- /**
11
- * The component's render function. This will be called immediately after
12
- * the component is initially loaded, and then again every time the
13
- * component gets new data from Python.
14
- */
15
- function onRender(event) {
16
- // Only run the render code the first time the component is loaded.
17
- if (!window.rendered) {
18
- const { text, before_copy_label, after_copy_label, show_text, theme } = event.detail.args;
19
-
20
- const container = document.querySelector('#container');
21
- const button = document.querySelector('#copy-button');
22
- const textElement = document.querySelector('#text-element');
23
-
24
- if (theme == 'dark') {
25
- button.style.border = '1px solid rgba(250, 250, 250, 0.2)';
26
- button.style.color = 'white';
27
- }
28
-
29
- button.textContent = before_copy_label; // Set initial label
30
-
31
- // Show text if show_text is true
32
- if (show_text) {
33
- textElement.textContent = text;
34
- textElement.style.display = 'inline';
35
- } else {
36
- textElement.style.display = 'none';
37
- }
38
-
39
- function copyToClipboard() {
40
- navigator.clipboard.writeText(text);
41
-
42
- button.textContent = after_copy_label; // Change label after copying
43
-
44
- setTimeout(() => {
45
- if (!button) return;
46
- button.textContent = before_copy_label; // Revert to original label after 1 second
47
- }, 1000);
48
- }
49
- button.addEventListener('click', copyToClipboard);
50
- textElement.addEventListener('click', copyToClipboard);
51
-
52
- window.rendered = true;
53
- }
54
- }
55
-
56
- // Render the component whenever python send a "render event"
57
- Streamlit.events.addEventListener(Streamlit.RENDER_EVENT, onRender);
58
- // Tell Streamlit that the component is ready to receive events
59
- Streamlit.setComponentReady();
60
- // Render with the correct height, if this is a fixed-height component
61
- Streamlit.setFrameHeight(100);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/st_copy_to_clipboard/frontend/streamlit-component-lib.js DELETED
@@ -1,36 +0,0 @@
1
- // Borrowed minimalistic Streamlit API from Thiago
2
- // https://discuss.streamlit.io/t/code-snippet-create-components-without-any-frontend-tooling-no-react-babel-webpack-etc/13064
3
- function sendMessageToStreamlitClient(type, data) {
4
- console.log(type, data);
5
- const outData = Object.assign(
6
- {
7
- isStreamlitMessage: true,
8
- type: type,
9
- },
10
- data
11
- );
12
- window.parent.postMessage(outData, '*');
13
- }
14
-
15
- const Streamlit = {
16
- setComponentReady: function () {
17
- sendMessageToStreamlitClient('streamlit:componentReady', { apiVersion: 1 });
18
- },
19
- setFrameHeight: function (height) {
20
- sendMessageToStreamlitClient('streamlit:setFrameHeight', { height: height });
21
- },
22
- setComponentValue: function (value) {
23
- sendMessageToStreamlitClient('streamlit:setComponentValue', { value: value });
24
- },
25
- RENDER_EVENT: 'streamlit:render',
26
- events: {
27
- addEventListener: function (type, callback) {
28
- window.addEventListener('message', function (event) {
29
- if (event.data.type === type) {
30
- event.detail = event.data;
31
- callback(event);
32
- }
33
- });
34
- },
35
- },
36
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/st_copy_to_clipboard/frontend/style.css DELETED
@@ -1,31 +0,0 @@
1
- .st-copy-to-clipboard-btn {
2
- display: inline-flex;
3
- -moz-box-align: center;
4
- align-items: center;
5
- -moz-box-pack: center;
6
- justify-content: center;
7
- font-weight: 400;
8
- padding: 0.25rem 0.75rem;
9
- border-radius: 0.5rem;
10
- min-height: 38.4px;
11
- margin: 0px;
12
- line-height: 1.6;
13
- color: inherit;
14
- width: auto;
15
- user-select: none;
16
- background-color: transparent; /* set bgcolor to transparent to adjust to any theme */
17
- border: 1px solid rgba(49, 51, 63, 0.2);
18
- cursor: pointer;
19
- float: right;
20
- }
21
-
22
- .st-copy-to-clipboard-btn:hover {
23
- border-color: rgb(255, 75, 75);
24
- color: rgb(255, 75, 75);
25
- }
26
-
27
- .st-copy-to-clipboard-btn:active {
28
- border-color: rgb(255, 75, 75);
29
- background-color: rgb(255, 75, 75);
30
- color: rgb(255, 255, 255);
31
- }