import streamlit as st import pandas as pd import plotly.express as px import matplotlib.pyplot as plt import matplotlib.colors as mcolors import folium from streamlit_folium import st_folium from shapely.geometry import Point, box import geopandas as gpd import duckdb import rasterio from huggingface_hub import HfFileSystem import shapely.wkb from dotenv import load_dotenv import os from pathlib import Path # Import extracted utilities (shared with app_local.py) from utils import ( FLAIR_COLORS, CLASS_LABELS, process_mosaic ) # --- Cloud Configuration --- load_dotenv() env_path = Path(__file__).parent.parent / "DL_cologne_green" / ".env" if env_path.exists(): load_dotenv(env_path) HF_TOKEN = os.getenv("HF_TOKEN") DATASET_ID = os.getenv("DATASET_ID", "Rahul-fix/cologne-green-data") BASE_URL = f"hf://datasets/{DATASET_ID}" STORAGE_OPTS = {"token": HF_TOKEN} if HF_TOKEN else None # --- DuckDB Connection --- @st.cache_resource def get_db_connection(): con = duckdb.connect(database=":memory:") con.execute("INSTALL spatial; LOAD spatial;") con.execute("INSTALL httpfs; LOAD httpfs;") if HF_TOKEN: fs = HfFileSystem(token=HF_TOKEN) con.register_filesystem(fs) return con con = get_db_connection() # --- Cloud Data Loading Functions (mirror utils.py structure) --- def safe_load_wkb(x): try: return shapely.wkb.loads(bytes(x)) except: return None @st.cache_data(ttl=3600) def load_quarters_with_stats(): """Load Veedel boundaries with stats - Cloud version of utils.load_quarters_with_stats""" try: query = f""" SELECT v.name, ST_AsWKB(v.geometry) as geometry, COALESCE(s.green_area_m2, 0) as green_area_m2, COALESCE(s.ndvi_mean, 0) as ndvi_mean, v.Shape_Area, s.area_0, s.area_1, s.area_2, s.area_3, s.area_4, s.area_5, s.area_6, s.area_7, s.area_8, s.area_9, s.area_10, s.area_11, s.area_12, s.area_13, s.area_14, s.area_15, s.area_16, s.area_17, s.area_18 FROM '{BASE_URL}/data/boundaries/Stadtviertel.parquet' v LEFT JOIN '{BASE_URL}/data/stats/extended_stats.parquet' s ON v.name = s.name """ df = con.execute(query).fetchdf() df['geometry'] = df['geometry'].apply(safe_load_wkb) df = df.dropna(subset=['geometry']) gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") # CRS Check (remote data may be in EPSG:25832) if not gdf.empty and gdf.total_bounds[0] > 180: gdf.crs = "EPSG:25832" gdf = gdf.to_crs("EPSG:4326") # Calculate green_pct if 'green_area_m2' in gdf.columns and 'Shape_Area' in gdf.columns: gdf['green_pct'] = (gdf['green_area_m2'] / gdf['Shape_Area']) * 100 else: gdf['green_pct'] = 0.0 return gdf except Exception as e: st.error(f"Error loading quarters: {e}") return gpd.GeoDataFrame() @st.cache_data(ttl=3600) def load_boroughs(): """Load borough boundaries - Cloud version of utils.load_boroughs""" try: query = f"SELECT STB_NAME as name, ST_AsWKB(geometry) as geometry FROM '{BASE_URL}/data/boundaries/Stadtbezirke.parquet'" df = con.execute(query).fetchdf() df['geometry'] = df['geometry'].apply(safe_load_wkb) df = df.dropna(subset=['geometry']) gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") if not gdf.empty and gdf.total_bounds[0] > 180: gdf.crs = "EPSG:25832" gdf = gdf.to_crs("EPSG:4326") return gdf except Exception as e: st.error(f"Error loading boroughs: {e}") return gpd.GeoDataFrame() @st.cache_data(ttl=3600) def get_tile_to_veedel_mapping(): """Get tile-to-Veedel mapping - Cloud version of utils.get_tile_to_veedel_mapping""" try: tiles_df = pd.read_csv(f"{BASE_URL}/data/metadata/cologne_tiles.csv", storage_options=STORAGE_OPTS) geometries = [box(r['Koordinatenursprung_East'], r['Koordinatenursprung_North'], r['Koordinatenursprung_East']+1000, r['Koordinatenursprung_North']+1000) for _, r in tiles_df.iterrows()] tiles_gdf = gpd.GeoDataFrame(tiles_df, geometry=geometries, crs="EPSG:25832") q_gdf = gpd.read_parquet(f"{BASE_URL}/data/boundaries/Stadtviertel.parquet", storage_options=STORAGE_OPTS) if q_gdf.crs != "EPSG:25832": q_gdf = q_gdf.to_crs("EPSG:25832") joined = gpd.sjoin(tiles_gdf, q_gdf, how="inner", predicate="intersects") return joined.groupby('name')['Kachelname'].apply(list).to_dict() except Exception as e: return {} @st.cache_data(ttl=3600) def list_available_tiles(): """List available tiles from cloud (processed, web_optimized, or raw)""" try: fs = HfFileSystem(token=HF_TOKEN) tiles = set() # Check processed masks processed_files = fs.glob(f"datasets/{DATASET_ID}/data/processed/*_mask.tif") for f in processed_files: tiles.add(Path(f).stem.replace("_mask", "")) # Check web_optimized masks web_opt_files = fs.glob(f"datasets/{DATASET_ID}/data/web_optimized/*_mask.tif") for f in web_opt_files: tiles.add(Path(f).stem.replace("_mask", "")) # Also include raw tiles (for satellite view) raw_files = fs.glob(f"datasets/{DATASET_ID}/data/raw/*.jp2") for f in raw_files: tiles.add(Path(f).stem) return list(tiles) except: return [] def get_mosaic_data_remote(tile_names, layer_type): """Load and mosaic tiles - Cloud version of utils.get_mosaic_data_local""" fs = HfFileSystem(token=HF_TOKEN) sources = [] memfiles = [] try: for tile in tile_names: suffix = "_mask" if "Land Cover" in layer_type else "_ndvi" if "NDVI" in layer_type else "" paths = [ f"datasets/{DATASET_ID}/data/web_optimized/{tile}{suffix}.tif", f"datasets/{DATASET_ID}/data/processed/{tile}{suffix}.tif", ] if layer_type == "Satellite": paths.append(f"datasets/{DATASET_ID}/data/raw/{tile}.jp2") found_bytes = None for p in paths: try: with fs.open(p, "rb") as f: found_bytes = f.read() break except: continue if found_bytes: m = rasterio.MemoryFile(found_bytes) memfiles.append(m) sources.append(m.open()) # Use shared processing logic result = process_mosaic(sources, layer_type) # Cleanup for s in sources: s.close() for m in memfiles: m.close() return result except Exception as e: return None, None # 1. Page Configuration st.set_page_config(page_title="GreenCologne (Cloud)", layout="wide") st.title("🌿 GreenCologne (Cloud Dashboard)") # --- Sidebar: Dataset Info --- with st.sidebar: st.header("ℹ️ About This Dataset") with st.expander("📡 Data Sources", expanded=False): st.markdown(""" **Satellite Imagery** [OpenNRW DOP10](https://www.bezreg-koeln.nrw.de/geobasis-nrw/produkte-und-dienste/luftbild-und-satellitenbildinformationen/aktuelle-luftbild-und-0) – 10cm resolution aerial photos (2022-2025) **Administrative Boundaries** [Offene Daten Köln](https://www.offenedaten-koeln.de/) – Stadtviertel & Stadtbezirke **Coverage** 840 tiles covering Cologne's 86 Veedels """) with st.expander("🤖 Model & Methodology", expanded=False): st.markdown(""" **Land Cover Classification** [FLAIR-Hub](https://huggingface.co/IGNF/FLAIR-HUB_LC-A_IR_swinbase-upernet) – Deep learning semantic segmentation trained on French aerial imagery, adapted for German urban landscapes. **19 Land Cover Classes** Buildings, deciduous trees, herbaceous vegetation, water, impervious surfaces, agricultural land, and more. **NDVI Calculation** Normalized Difference Vegetation Index computed from NIR and Red bands: `NDVI = (NIR - Red) / (NIR + Red)` **Green Area Detection** Classes 4 (Deciduous), 5 (Coniferous), 17 (Herbaceous), and 18 (Agricultural) are classified as green areas. """) with st.expander("🙏 Acknowledgments", expanded=False): st.markdown(""" - [CorrelAid](https://correlaid.org/) – Data-for-good community - [OpenNRW](https://www.opengeodata.nrw.de/) – Open geospatial data - [IGNF/FLAIR-Hub](https://huggingface.co/IGNF/FLAIR-HUB_LC-A_IR_swinbase-upernet) – Segmentation model - [Stadt Köln](https://www.stadt-koeln.de/) – Open administrative data """) if not HF_TOKEN: st.warning("⚠️ HF_TOKEN missing. Set in .env or Streamlit Secrets.") # 2. Data Loading gdf_quarters = load_quarters_with_stats() gdf_boroughs = load_boroughs() tile_mapping = get_tile_to_veedel_mapping() available_tiles = list_available_tiles() # 3. State Management if 'selected_veedel' not in st.session_state: st.session_state['selected_veedel'] = "All" if 'map_center' not in st.session_state: st.session_state['map_center'] = [50.9375, 6.9603] # Cologne if 'map_zoom' not in st.session_state: st.session_state['map_zoom'] = 11 if 'map_click_counter' not in st.session_state: st.session_state['map_click_counter'] = 0 # --- Helper Functions --- def update_zoom_for_veedel(veedel_name): if veedel_name == "All": st.session_state['map_center'] = [50.9375, 6.9603] st.session_state['map_zoom'] = 10 elif gdf_quarters is not None and not gdf_quarters.empty: match = gdf_quarters[gdf_quarters['name'] == veedel_name] if not match.empty: centroid = match.geometry.centroid.iloc[0] st.session_state['map_center'] = [centroid.y, centroid.x] st.session_state['map_zoom'] = 14 def on_veedel_change(): sel = st.session_state['selected_veedel_widget'] st.session_state['selected_veedel'] = sel update_zoom_for_veedel(sel) # --- Layout --- col_map, col_details = st.columns([0.65, 0.35], gap="medium") with col_details: st.markdown("### GreenCologne Analysis") tab_opts, tab_stats = st.tabs(["🛠️ Options", "📊 Statistics"]) veedel_list = ["All"] + sorted(gdf_quarters['name'].unique().tolist()) if gdf_quarters is not None and not gdf_quarters.empty else ["All"] # --- Tab 1: Options --- with tab_opts: # Sync Widget if 'selected_veedel_widget' in st.session_state and st.session_state['selected_veedel'] != st.session_state['selected_veedel_widget']: st.session_state['selected_veedel_widget'] = st.session_state['selected_veedel'] selected_veedel = st.selectbox( "Select Quarter (Veedel/Stadtviertel):", veedel_list, key='selected_veedel_widget', on_change=on_veedel_change, index=veedel_list.index(st.session_state['selected_veedel']) if st.session_state['selected_veedel'] in veedel_list else 0 ) # Tile Logic - Only load tiles when a specific veedel is selected tiles_to_display = [] if selected_veedel != "All": veedel_tiles = set(tile_mapping.get(selected_veedel, [])) filtered_tiles = [t for t in veedel_tiles if t in available_tiles] tiles_to_display = sorted(filtered_tiles) # Layer Selection layer_selection = st.radio( "Select Layer:", ["Satellite", "Land Cover", "NDVI"], index=2, horizontal=True ) # Legends st.markdown("#### Legends") st.markdown("**Veedel Health (Mean NDVI)**") st.markdown("""
0.0 (Low)0.30.6+ (High)
*Average vegetation index per district.
""", unsafe_allow_html=True) if layer_selection == "Land Cover": st.markdown("**Land Cover Classes**") legend_html = "
" for cls_id, label in CLASS_LABELS.items(): if cls_id == 13: continue c = FLAIR_COLORS[cls_id] legend_html += f"
{label}
" st.markdown(legend_html + "
", unsafe_allow_html=True) elif layer_selection == "NDVI": st.markdown("**Pixel Vegetation Index (NDVI)**") st.markdown("""
-0.4 (Water)0.31.0 (Dense)
""", unsafe_allow_html=True) # --- Tab 2: Statistics --- with tab_stats: if gdf_quarters is not None and not gdf_quarters.empty: title = "Cologne (All Veedels) Stats" area_m2 = 0 total_area_m2 = 1 class_data_source = None if selected_veedel != "All": row = gdf_quarters[gdf_quarters['name'] == selected_veedel] if not row.empty: title = f"{selected_veedel} Stats" area_m2 = row['green_area_m2'].values[0] if 'green_area_m2' in row else 0 total_area_m2 = row['Shape_Area'].values[0] if 'Shape_Area' in row else 1 class_data_source = row else: st.warning(f"No stats for {selected_veedel}") else: area_m2 = gdf_quarters['green_area_m2'].sum() if 'green_area_m2' in gdf_quarters else 0 total_area_m2 = gdf_quarters['Shape_Area'].sum() if 'Shape_Area' in gdf_quarters else 1 class_cols = [c for c in gdf_quarters.columns if str(c).startswith('area_')] if class_cols: class_data_source = pd.DataFrame([{c: gdf_quarters[c].sum() for c in class_cols}]) st.markdown(f"#### {title}") c1, c2 = st.columns(2) c1.metric("Green Area", f"{(area_m2/10000):.2f} ha") c2.metric("Green Coverage", f"{(area_m2/total_area_m2)*100:.1f}%") st.divider() if class_data_source is not None and not class_data_source.empty: class_cols = [c for c in class_data_source.columns if str(c).startswith('area_')] if class_cols: class_data = class_data_source[class_cols].T.reset_index() class_data.columns = ['class_col', 'area_m2'] class_data['class_id'] = pd.to_numeric(class_data['class_col'].str.replace('area_', '', regex=False), errors='coerce').fillna(0).astype(int) class_data['class_name'] = class_data['class_id'].map(CLASS_LABELS) class_data['color'] = class_data['class_id'].map(lambda x: f"rgba({FLAIR_COLORS[x][0]},{FLAIR_COLORS[x][1]},{FLAIR_COLORS[x][2]}, 1)") class_data = class_data.sort_values(by='area_m2', ascending=False) fig_pie = px.pie( class_data, names='class_name', values='area_m2', title="Land Cover Distribution", color='class_name', color_discrete_map={row['class_name']: row['color'] for _, row in class_data.iterrows()}, labels={'class_name': 'Land Cover', 'area_m2': 'Area'} ) fig_pie.update_traces( textposition='inside', textinfo='percent', hovertemplate='%{label}
Area: %{value:,.0f} m² (%{percent})' ) st.plotly_chart(fig_pie, use_container_width=True) else: st.info("No detailed land cover data.") # --- Map View --- with col_map: # 1. Base Map m = folium.Map(location=st.session_state['map_center'], zoom_start=st.session_state['map_zoom'], tiles="CartoDB positron", crs='EPSG3857') # 2. Results: Districts if gdf_boroughs is not None and not gdf_boroughs.empty: folium.GeoJson( gdf_boroughs, name="Districts", style_function=lambda x: {'fillColor': 'none', 'color': '#333333', 'weight': 2, 'dashArray': '5, 5', 'fillOpacity': 0.0}, tooltip=folium.GeoJsonTooltip(fields=['name'], aliases=['Bezirk:']) ).add_to(m) # 3. Quarters (Veedel) if gdf_quarters is not None and not gdf_quarters.empty: min_ndvi = gdf_quarters['ndvi_mean'].min() if 'ndvi_mean' in gdf_quarters else 0 max_ndvi = gdf_quarters['ndvi_mean'].max() if 'ndvi_mean' in gdf_quarters else 0.6 if pd.isna(min_ndvi): min_ndvi = 0 if pd.isna(max_ndvi): max_ndvi = 0.6 def get_style(feature): name = feature['properties']['name'] is_sel = (selected_veedel != "All" and name == selected_veedel) val = feature['properties'].get('ndvi_mean') fill_color = 'gray' if is_sel: fill_color = '#ffff00' elif val is not None and not pd.isna(val): norm = max(0, min(1, (val - min_ndvi) / (max_ndvi - min_ndvi + 1e-9))) fill_color = mcolors.to_hex(plt.get_cmap('RdYlGn')(norm)) return { 'fillColor': fill_color, 'color': 'black' if is_sel else '#666666', 'weight': 3 if is_sel else 1, 'fillOpacity': 0.0 if is_sel else 0.6 } folium.GeoJson( gdf_quarters, name="Veedel (NDVI)", style_function=get_style, tooltip=folium.GeoJsonTooltip( fields=['name', 'green_area_m2', 'green_pct', 'ndvi_mean'], aliases=['Veedel:', 'Green Area (m²):', 'Green Coverage (%):', 'Mean NDVI:'], localize=True, fmt='.2f' ) ).add_to(m) # 4. Tiles Grid (Mosaic) if tiles_to_display: with st.spinner(f"Loading {len(tiles_to_display)} tiles..."): mosaic_img, mosaic_bounds = get_mosaic_data_remote(tiles_to_display, layer_selection) if mosaic_img is not None and mosaic_bounds: folium.raster_layers.ImageOverlay( image=mosaic_img, bounds=mosaic_bounds, opacity=0.8 if "Land Cover" in layer_selection else 1.0, name=f"Mosaic - {layer_selection}", control=False ).add_to(m) folium.LayerControl().add_to(m) # 5. Click Logic (Hybrid) map_key = f"map_{st.session_state['selected_veedel']}_{st.session_state['map_zoom']}_{st.session_state['map_click_counter']}" map_output = st_folium(m, width=None, height=700, key=map_key, use_container_width=True, returned_objects=["last_object_clicked", "last_clicked"]) clicked_name_final = None if map_output: # A: Check Object Property if map_output.get('last_object_clicked'): props = map_output['last_object_clicked'].get('properties', {}) if props and 'name' in props: clicked_name_final = props['name'] # B: Spatial Query Fallback if not clicked_name_final and map_output.get('last_clicked'): lat = map_output['last_clicked']['lat'] lng = map_output['last_clicked']['lng'] if lat and lng and gdf_quarters is not None: p = Point(lng, lat) matches = gdf_quarters[gdf_quarters.geometry.contains(p)] if not matches.empty: clicked_name_final = matches['name'].iloc[0] if clicked_name_final and clicked_name_final in veedel_list and clicked_name_final != st.session_state['selected_veedel']: st.session_state['selected_veedel'] = clicked_name_final update_zoom_for_veedel(clicked_name_final) st.session_state['map_click_counter'] += 1 st.rerun()