File size: 2,892 Bytes
2936872 3a742d6 d02218d 3a742d6 2936872 3a742d6 d02218d 3d448e7 edd4f90 d02218d 3a742d6 3d448e7 d02218d 3a742d6 d02218d 3a742d6 2ee7fcd 2936872 3a742d6 2ee7fcd 3a742d6 d02218d 2ee7fcd 3a742d6 2ee7fcd 2936872 2ee7fcd 2936872 2ee7fcd 2936872 3a742d6 d02218d 3a742d6 2936872 2ee7fcd 3d448e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
from google import generativeai as genai
API_KEY = os.getenv("GOOGLE_API_KEY")
if API_KEY:
genai.configure(api_key=API_KEY)
print("API ํค๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์ค์ ๋์์ต๋๋ค.")
else:
raise ValueError("API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. Hugging Face Spaces์ Repository secrets์ 'GOOGLE_API_KEY'๋ฅผ ์ค์ ํด์ฃผ์ธ์.")
df = pd.read_csv('https://raw.githubusercontent.com/kairess/mental-health-chatbot/master/wellness_dataset_original.csv')
df = df.drop(columns=['Unnamed: 3'], errors='ignore')
df = df.dropna(subset=['์ ์ ', '์ฑ๋ด'])
model = SentenceTransformer('jhgan/ko-sbert-nli')
print("๋ฐ์ดํฐ์
์๋ฒ ๋ฉ์ ๋ฏธ๋ฆฌ ๊ณ์ฐ ์ค์
๋๋ค. ์ด ๊ณผ์ ์ ์๊ฐ์ด ์์๋ฉ๋๋ค...")
df['embedding'] = df['์ ์ '].apply(lambda x: model.encode(x))
print("์๋ฒ ๋ฉ ๊ณ์ฐ์ด ์๋ฃ๋์์ต๋๋ค! ์ด์ ์ฑ๋ด ์๋ต์ด ํจ์ฌ ๋นจ๋ผ์ง๋๋ค.")
def call_gemini_api(question):
try:
llm_model = genai.GenerativeModel('gemini-2.0-flash')
response = llm_model.generate_content(question)
return response.text
except Exception as e:
print(f"API ํธ์ถ ์ค ์ค๋ฅ ๋ฐ์: {e}")
return f"์ฃ์กํฉ๋๋ค. API ํธ์ถ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}"
COSINE_SIMILARITY_THRESHOLD = 0.7
def chatbot(user_question):
try:
user_embedding = model.encode(user_question)
similarities = df['embedding'].apply(lambda x: cosine_similarity([user_embedding], [x])[0][0])
best_match_index = similarities.idxmax()
best_score = similarities.loc[best_match_index]
best_match_row = df.loc[best_match_index]
if best_score >= COSINE_SIMILARITY_THRESHOLD:
answer = best_match_row['์ฑ๋ด']
print(f"์ ์ฌ๋ ๊ธฐ๋ฐ ๋ต๋ณ. ์ ์: {best_score}")
return answer
else:
print(f"์ ์ฌ๋ ์๊ณ๊ฐ({COSINE_SIMILARITY_THRESHOLD}) ๋ฏธ๋ง. Gemini ๋ชจ๋ธ์ ํธ์ถํฉ๋๋ค. ์ ์: {best_score}")
return call_gemini_api(user_question)
except Exception as e:
print(f"์ฑ๋ด ์คํ ์ค ์ค๋ฅ ๋ฐ์: {e}")
return f"์ฃ์กํฉ๋๋ค. ์ฑ๋ด ์คํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}"
demo = gr.Interface(
fn=chatbot,
inputs=gr.Textbox(lines=2, placeholder="์ง๋ฌธ์ ์
๋ ฅํด ์ฃผ์ธ์...", label="์ง๋ฌธ", elem_id="user_question_input"),
outputs=gr.Textbox(lines=5, label="์ฑ๋ด ๋ต๋ณ"),
title="๋๋ ์๋ด ์ฑ๋ด",
description="5๋ถ ๋์ ๋ํํ์ฌ ์ฃผ์๊ณ ๋ค์์ ๋งํฌ๋ฅผ ํด๋ฆญํ์ฌ ๊ผญ ์ค๋ฌธ์กฐ์ฌ์ ์ฐธ์ฌํด์ฃผ์ธ์! https://forms.gle/eWtyejQaQntKbbxG8"
)
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
|