# analysis & visualization
import pandas as pd
import numpy as np
import streamlit as st
import plotly.express as px
# llm
import re, os, json, openai, random, time
from openai import OpenAI
from dotenv import load_dotenv
# langchain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA, LLMChain, ConversationalRetrievalChain
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
# 캐싱을 이용해서 데이터를 효율적으로 관리할 수 있다.
@st.cache_data
def load_data() :
frm = pd.read_csv('../data/attack_stream_logs.csv')
return frm
frm = load_data()
###########################
# 상태정보 초기화
if 'logs' not in st.session_state :
st.session_state['logs'] = pd.DataFrame(columns=frm.columns)
# 세션 상태를 초기화
if "messages" not in st.session_state :
st.session_state["messages"] = [
{'role' : 'assistant', 'content' : "👋 보안팀 섭섭이입니다. 무엇을 도와드릴까요?"}
]
# langchain memory
if "chat_history" not in st.session_state :
st.session_state["chat_history"] = []
###########################
# VectorDB(리소스)
# 캐싱의 리소스를 이용해서 데이터를 효율적으로 관리할 수 있다.
@st.cache_resource
def create_vector_store(frm) :
database = []
for idx, row in frm.iterrows() :
txt = f"time: {row['time']}, ip: {row['ip']}, country: {row['country']}, attack: {row['attack_type']}, description: {row['description']}"
database.append(txt)
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = splitter.create_documents(database)
embeddings = OpenAIEmbeddings()
vectorDB = FAISS.from_documents(docs, embeddings)
return vectorDB
vectorDB = create_vector_store(frm)
#model
@st.cache_resource
def model() :
retriever = vectorDB.as_retriever(search_kwargs={'k' : 10})
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.9)
qaChain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever)
return qaChain
def view():
realtime, chatbot = st.columns(2)
with realtime :
st.subheader('실시간 모니터링')
logPrt = st.empty()
warningPrt = st.empty()
st.markdown('---')
if st.button('스트리밍 시작'):
for idx, row in frm.iterrows() :
newLog = row.to_dict()
# 로그 누적을 위한 세션관리
st.session_state['logs'] = pd.concat([pd.DataFrame([newLog]), st.session_state['logs']])
if newLog['risk_score'] >= 85 :
warningPrt.warning(f'고위험 공격 감지!! 집중')
else:
warningPrt.info(f'시스템 정상 작동 중!! 집중하지마')
#갱신
logPrt.dataframe(st.session_state['logs'])
time.sleep(5)
with chatbot :
st.subheader('챗봇 모니터링')
for msg in st.session_state['messages'] :
with st.chat_message(msg['role']) :
st.markdown(msg['content'])
# 사용자 입력
prompt = st.chat_input('궁금한 것을 물어보세요 : ')
if prompt :
st.session_state['messages'].append({"role" : "user", "content" : prompt })
with st.chat_message("user") :
st.markdown(prompt)
# model response
with st.chat_message("assistant") :
with st.spinner('분석중입니다...'):
chain = model()
response = chain({"question" : prompt, "chat_history" : st.session_state["chat_history"]})
answer = response['answer']
st.markdown(answer)
# 대화 이력 업데이트
st.session_state['chat_history'].append((prompt, answer))
st.session_state['messages'].append({"role" : "assistant", "content" : answer })
if __name__=='__main__' :
view()