import streamlit as st import pandas as pd import plotly.express as px import matplotlib.pyplot as plt import matplotlib.colors as mcolors import folium from streamlit_folium import st_folium from shapely.geometry import Point, box import geopandas as gpd import duckdb import rasterio from huggingface_hub import HfFileSystem import shapely.wkb from dotenv import load_dotenv import os from pathlib import Path # Import extracted utilities (shared with app_local.py) from utils import ( FLAIR_COLORS, CLASS_LABELS, process_mosaic ) # --- Cloud Configuration --- load_dotenv() env_path = Path(__file__).parent.parent / "DL_cologne_green" / ".env" if env_path.exists(): load_dotenv(env_path) HF_TOKEN = os.getenv("HF_TOKEN") DATASET_ID = os.getenv("DATASET_ID", "Rahul-fix/cologne-green-data") BASE_URL = f"hf://datasets/{DATASET_ID}" STORAGE_OPTS = {"token": HF_TOKEN} if HF_TOKEN else None # --- DuckDB Connection --- @st.cache_resource def get_db_connection(): con = duckdb.connect(database=":memory:") con.execute("INSTALL spatial; LOAD spatial;") con.execute("INSTALL httpfs; LOAD httpfs;") if HF_TOKEN: fs = HfFileSystem(token=HF_TOKEN) con.register_filesystem(fs) return con con = get_db_connection() # --- Cloud Data Loading Functions (mirror utils.py structure) --- def safe_load_wkb(x): try: return shapely.wkb.loads(bytes(x)) except: return None @st.cache_data(ttl=3600) def load_quarters_with_stats(): """Load Veedel boundaries with stats - Cloud version of utils.load_quarters_with_stats""" try: query = f""" SELECT v.name, ST_AsWKB(v.geometry) as geometry, COALESCE(s.green_area_m2, 0) as green_area_m2, COALESCE(s.ndvi_mean, 0) as ndvi_mean, v.Shape_Area, s.area_0, s.area_1, s.area_2, s.area_3, s.area_4, s.area_5, s.area_6, s.area_7, s.area_8, s.area_9, s.area_10, s.area_11, s.area_12, s.area_13, s.area_14, s.area_15, s.area_16, s.area_17, s.area_18 FROM '{BASE_URL}/data/boundaries/Stadtviertel.parquet' v LEFT JOIN '{BASE_URL}/data/stats/extended_stats.parquet' s ON v.name = s.name """ df = con.execute(query).fetchdf() df['geometry'] = df['geometry'].apply(safe_load_wkb) df = df.dropna(subset=['geometry']) gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") # CRS Check (remote data may be in EPSG:25832) if not gdf.empty and gdf.total_bounds[0] > 180: gdf.crs = "EPSG:25832" gdf = gdf.to_crs("EPSG:4326") # Calculate green_pct if 'green_area_m2' in gdf.columns and 'Shape_Area' in gdf.columns: gdf['green_pct'] = (gdf['green_area_m2'] / gdf['Shape_Area']) * 100 else: gdf['green_pct'] = 0.0 return gdf except Exception as e: st.error(f"Error loading quarters: {e}") return gpd.GeoDataFrame() @st.cache_data(ttl=3600) def load_boroughs(): """Load borough boundaries - Cloud version of utils.load_boroughs""" try: query = f"SELECT STB_NAME as name, ST_AsWKB(geometry) as geometry FROM '{BASE_URL}/data/boundaries/Stadtbezirke.parquet'" df = con.execute(query).fetchdf() df['geometry'] = df['geometry'].apply(safe_load_wkb) df = df.dropna(subset=['geometry']) gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") if not gdf.empty and gdf.total_bounds[0] > 180: gdf.crs = "EPSG:25832" gdf = gdf.to_crs("EPSG:4326") return gdf except Exception as e: st.error(f"Error loading boroughs: {e}") return gpd.GeoDataFrame() @st.cache_data(ttl=3600) def get_tile_to_veedel_mapping(): """Get tile-to-Veedel mapping - Cloud version of utils.get_tile_to_veedel_mapping""" try: tiles_df = pd.read_csv(f"{BASE_URL}/data/metadata/cologne_tiles.csv", storage_options=STORAGE_OPTS) geometries = [box(r['Koordinatenursprung_East'], r['Koordinatenursprung_North'], r['Koordinatenursprung_East']+1000, r['Koordinatenursprung_North']+1000) for _, r in tiles_df.iterrows()] tiles_gdf = gpd.GeoDataFrame(tiles_df, geometry=geometries, crs="EPSG:25832") q_gdf = gpd.read_parquet(f"{BASE_URL}/data/boundaries/Stadtviertel.parquet", storage_options=STORAGE_OPTS) if q_gdf.crs != "EPSG:25832": q_gdf = q_gdf.to_crs("EPSG:25832") joined = gpd.sjoin(tiles_gdf, q_gdf, how="inner", predicate="intersects") return joined.groupby('name')['Kachelname'].apply(list).to_dict() except Exception as e: return {} @st.cache_data(ttl=3600) def list_available_tiles(): """List available tiles from cloud (processed, web_optimized, or raw)""" try: fs = HfFileSystem(token=HF_TOKEN) tiles = set() # Check processed masks processed_files = fs.glob(f"datasets/{DATASET_ID}/data/processed/*_mask.tif") for f in processed_files: tiles.add(Path(f).stem.replace("_mask", "")) # Check web_optimized masks web_opt_files = fs.glob(f"datasets/{DATASET_ID}/data/web_optimized/*_mask.tif") for f in web_opt_files: tiles.add(Path(f).stem.replace("_mask", "")) # Also include raw tiles (for satellite view) raw_files = fs.glob(f"datasets/{DATASET_ID}/data/raw/*.jp2") for f in raw_files: tiles.add(Path(f).stem) return list(tiles) except: return [] def get_mosaic_data_remote(tile_names, layer_type): """Load and mosaic tiles - Cloud version of utils.get_mosaic_data_local""" fs = HfFileSystem(token=HF_TOKEN) sources = [] memfiles = [] try: for tile in tile_names: suffix = "_mask" if "Land Cover" in layer_type else "_ndvi" if "NDVI" in layer_type else "" paths = [ f"datasets/{DATASET_ID}/data/web_optimized/{tile}{suffix}.tif", f"datasets/{DATASET_ID}/data/processed/{tile}{suffix}.tif", ] if layer_type == "Satellite": paths.append(f"datasets/{DATASET_ID}/data/raw/{tile}.jp2") found_bytes = None for p in paths: try: with fs.open(p, "rb") as f: found_bytes = f.read() break except: continue if found_bytes: m = rasterio.MemoryFile(found_bytes) memfiles.append(m) sources.append(m.open()) # Use shared processing logic result = process_mosaic(sources, layer_type) # Cleanup for s in sources: s.close() for m in memfiles: m.close() return result except Exception as e: return None, None # 1. Page Configuration st.set_page_config(page_title="GreenCologne (Cloud)", layout="wide") st.title("🌿 GreenCologne (Cloud Dashboard)") # --- Sidebar: Dataset Info --- with st.sidebar: st.header("ℹ️ About This Dataset") with st.expander("📡 Data Sources", expanded=False): st.markdown(""" **Satellite Imagery** [OpenNRW DOP10](https://www.bezreg-koeln.nrw.de/geobasis-nrw/produkte-und-dienste/luftbild-und-satellitenbildinformationen/aktuelle-luftbild-und-0) – 10cm resolution aerial photos (2022-2025) **Administrative Boundaries** [Offene Daten Köln](https://www.offenedaten-koeln.de/) – Stadtviertel & Stadtbezirke **Coverage** 840 tiles covering Cologne's 86 Veedels """) with st.expander("🤖 Model & Methodology", expanded=False): st.markdown(""" **Land Cover Classification** [FLAIR-Hub](https://huggingface.co/IGNF/FLAIR-HUB_LC-A_IR_swinbase-upernet) – Deep learning semantic segmentation trained on French aerial imagery, adapted for German urban landscapes. **19 Land Cover Classes** Buildings, deciduous trees, herbaceous vegetation, water, impervious surfaces, agricultural land, and more. **NDVI Calculation** Normalized Difference Vegetation Index computed from NIR and Red bands: `NDVI = (NIR - Red) / (NIR + Red)` **Green Area Detection** Classes 4 (Deciduous), 5 (Coniferous), 17 (Herbaceous), and 18 (Agricultural) are classified as green areas. """) with st.expander("🙏 Acknowledgments", expanded=False): st.markdown(""" - [CorrelAid](https://correlaid.org/) – Data-for-good community - [OpenNRW](https://www.opengeodata.nrw.de/) – Open geospatial data - [IGNF/FLAIR-Hub](https://huggingface.co/IGNF/FLAIR-HUB_LC-A_IR_swinbase-upernet) – Segmentation model - [Stadt Köln](https://www.stadt-koeln.de/) – Open administrative data """) if not HF_TOKEN: st.warning("⚠️ HF_TOKEN missing. Set in .env or Streamlit Secrets.") # 2. Data Loading gdf_quarters = load_quarters_with_stats() gdf_boroughs = load_boroughs() tile_mapping = get_tile_to_veedel_mapping() available_tiles = list_available_tiles() # 3. State Management if 'selected_veedel' not in st.session_state: st.session_state['selected_veedel'] = "All" if 'map_center' not in st.session_state: st.session_state['map_center'] = [50.9375, 6.9603] # Cologne if 'map_zoom' not in st.session_state: st.session_state['map_zoom'] = 11 if 'map_click_counter' not in st.session_state: st.session_state['map_click_counter'] = 0 # --- Helper Functions --- def update_zoom_for_veedel(veedel_name): if veedel_name == "All": st.session_state['map_center'] = [50.9375, 6.9603] st.session_state['map_zoom'] = 10 elif gdf_quarters is not None and not gdf_quarters.empty: match = gdf_quarters[gdf_quarters['name'] == veedel_name] if not match.empty: centroid = match.geometry.centroid.iloc[0] st.session_state['map_center'] = [centroid.y, centroid.x] st.session_state['map_zoom'] = 14 def on_veedel_change(): sel = st.session_state['selected_veedel_widget'] st.session_state['selected_veedel'] = sel update_zoom_for_veedel(sel) # --- Layout --- col_map, col_details = st.columns([0.65, 0.35], gap="medium") with col_details: st.markdown("### GreenCologne Analysis") tab_opts, tab_stats = st.tabs(["🛠️ Options", "📊 Statistics"]) veedel_list = ["All"] + sorted(gdf_quarters['name'].unique().tolist()) if gdf_quarters is not None and not gdf_quarters.empty else ["All"] # --- Tab 1: Options --- with tab_opts: # Sync Widget if 'selected_veedel_widget' in st.session_state and st.session_state['selected_veedel'] != st.session_state['selected_veedel_widget']: st.session_state['selected_veedel_widget'] = st.session_state['selected_veedel'] selected_veedel = st.selectbox( "Select Quarter (Veedel/Stadtviertel):", veedel_list, key='selected_veedel_widget', on_change=on_veedel_change, index=veedel_list.index(st.session_state['selected_veedel']) if st.session_state['selected_veedel'] in veedel_list else 0 ) # Tile Logic - Only load tiles when a specific veedel is selected tiles_to_display = [] if selected_veedel != "All": veedel_tiles = set(tile_mapping.get(selected_veedel, [])) filtered_tiles = [t for t in veedel_tiles if t in available_tiles] tiles_to_display = sorted(filtered_tiles) # Layer Selection layer_selection = st.radio( "Select Layer:", ["Satellite", "Land Cover", "NDVI"], index=2, horizontal=True ) # Legends st.markdown("#### Legends") st.markdown("**Veedel Health (Mean NDVI)**") st.markdown("""