2025-11-13 - 20251113014 (코드 보기)

SKS 홈으로
# analysis & visualization
import pandas         as pd
import numpy          as np
import streamlit      as st
import plotly.express as px 

# llm
import re, os, json, openai, random, time
from   openai import OpenAI
from   dotenv import load_dotenv

# langchain 
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models       import ChatOpenAI 
from langchain.prompts           import ChatPromptTemplate
from langchain.vectorstores      import FAISS
from langchain.text_splitter     import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chains            import RetrievalQA, LLMChain, ConversationalRetrievalChain


load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

# 캐싱을 이용해서 데이터를 효율적으로 관리할 수 있다.
@st.cache_data
def load_data() :
    frm = pd.read_csv('../data/attack_stream_logs.csv')
    return frm

frm = load_data()

###########################
# 상태정보 초기화
if 'logs' not in st.session_state :
    st.session_state['logs'] = pd.DataFrame(columns=frm.columns)

# 세션 상태를 초기화
if "messages" not in st.session_state :
    st.session_state["messages"] = [
        {'role' : 'assistant', 'content' : "👋 보안팀 섭섭이입니다. 무엇을 도와드릴까요?"}
    ]
    
# langchain memory
if "chat_history" not in st.session_state :
    st.session_state["chat_history"] = []
###########################


# VectorDB(리소스)
# 캐싱의 리소스를 이용해서 데이터를 효율적으로 관리할 수 있다.
@st.cache_resource
def create_vector_store(frm) :
    database = []
    for idx, row in frm.iterrows() :
        txt = f"time: {row['time']}, ip: {row['ip']}, country: {row['country']}, attack: {row['attack_type']}, description: {row['description']}"
        database.append(txt)

    splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
    docs = splitter.create_documents(database)

    embeddings = OpenAIEmbeddings()
    vectorDB = FAISS.from_documents(docs, embeddings)

    return vectorDB

vectorDB = create_vector_store(frm)

#model
@st.cache_resource
def model() :
    retriever = vectorDB.as_retriever(search_kwargs={'k' : 10})
    llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.9)
    qaChain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever)

    return qaChain


def view():
    realtime, chatbot = st.columns(2)
    with realtime :
        st.subheader('실시간 모니터링')
        logPrt = st.empty()
        warningPrt = st.empty()
        st.markdown('---')
        if st.button('스트리밍 시작'):
                for idx, row in frm.iterrows() :
                    newLog = row.to_dict()

                    # 로그 누적을 위한 세션관리
                    st.session_state['logs'] = pd.concat([pd.DataFrame([newLog]), st.session_state['logs']])

                    if newLog['risk_score'] >= 85 :
                        warningPrt.warning(f'고위험 공격 감지!! 집중')
                    else:
                        warningPrt.info(f'시스템 정상 작동 중!! 집중하지마')

                    #갱신
                    logPrt.dataframe(st.session_state['logs'])
                    
                    time.sleep(5)


    with chatbot :
        st.subheader('챗봇 모니터링')
        for msg in st.session_state['messages'] :
            with st.chat_message(msg['role']) :
                st.markdown(msg['content'])

        # 사용자 입력
        prompt = st.chat_input('궁금한 것을 물어보세요 : ')
        if prompt :
            st.session_state['messages'].append({"role" : "user", "content" : prompt })
            with st.chat_message("user") :
                st.markdown(prompt)

            # model response
            with st.chat_message("assistant") :
                with st.spinner('분석중입니다...'):
                    chain = model()
                    response = chain({"question" : prompt, "chat_history" : st.session_state["chat_history"]})
                    answer = response['answer']
                    st.markdown(answer)

            # 대화 이력 업데이트
            st.session_state['chat_history'].append((prompt, answer))
            st.session_state['messages'].append({"role" : "assistant", "content" : answer })





if __name__=='__main__' :
    view()