# analysis & visualization
import pandas as pd
import numpy as np
import streamlit as st
import plotly.express as px
# llm
import re, os, json, openai, random, time
from openai import OpenAI
from dotenv import load_dotenv
# langchain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA, LLMChain
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
frm = pd.read_csv('../data/attack_logs_ko.csv')
def langchainModel() :
db = loadData()
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = splitter.create_documents(db)
# all_text = "\n".join(db)
# splitter = RecursiveCharacterTextSplitter(chunk_size=1000000, chunk_overlap=50)
# docs = splitter.create_documents([all_text])
embeddings = OpenAIEmbeddings()
vectorDB = FAISS.from_documents(docs, embeddings)
retriever = vectorDB.as_retriever(search_kwargs={'k' : 3})
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.3)
qaChain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)
return qaChain, retriever
def loadData():
#frm = pd.read_csv('../data/attack_logs_ko.csv')
database = []
for idx, row in frm.iterrows() :
txt = f"time: {row['time']}, ip: {row['ip']}, country: {row['country']}, attack: {row['attack_type']}, description: {row['description']}"
database.append(txt)
return database
def view():
st.set_page_config(page_title='AI 보안 탐정')
st.title('해킹 로그분석 RAG 시스템')
st.markdown('LangChain + FAISS(RAG) + Streamlit 활용한 로그분석 보드')
# 사용자 입력 인터페이스
query = st.text_input('분석을 위한 질문을 입력하세요 : ')
if query:
with st.spinner('탐정팀장님이 분석 중입니다.....'):
model, retriever = langchainModel()
response = model.run(query)
st.success('분석완료')
st.write(response)
# 청크 내용 출력
st.subheader("관련 로그 청크 미리보기")
docs = retriever.get_relevant_documents(query)
for i, doc in enumerate(docs, 1):
st.markdown(f"**[청크 {i}]**")
st.code(doc.page_content)
# 공격 유형 통계 시각화
st.subheader('공격 통계 시각화')
st.bar_chart(frm['attack_type'].value_counts())
if __name__=='__main__' :
view()