2025-11-13 - 20251113014 (코드 보기)

SKS 홈으로
# analysis & visualization
import pandas         as pd
import numpy          as np
import streamlit      as st
import plotly.express as px 
# llm
import re, os, json, openai, random, time
from   openai import OpenAI
from   dotenv import load_dotenv

# langchain 
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models       import ChatOpenAI 
from langchain.prompts           import ChatPromptTemplate
from langchain.vectorstores      import FAISS
from langchain.text_splitter     import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chains            import RetrievalQA, LLMChain

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

frm = pd.read_csv('../data/attack_logs_ko.csv')

def langchainModel() :
    db = loadData()
    splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
    docs = splitter.create_documents(db)

    # all_text = "\n".join(db)
    # splitter = RecursiveCharacterTextSplitter(chunk_size=1000000, chunk_overlap=50)
    # docs = splitter.create_documents([all_text])

    embeddings = OpenAIEmbeddings()
    vectorDB = FAISS.from_documents(docs, embeddings)

    retriever = vectorDB.as_retriever(search_kwargs={'k' : 3})
    llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.3)
    qaChain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

    return qaChain, retriever


def loadData():
    #frm = pd.read_csv('../data/attack_logs_ko.csv')
    database = []
    for idx, row in frm.iterrows() :
        txt = f"time: {row['time']}, ip: {row['ip']}, country: {row['country']}, attack: {row['attack_type']}, description: {row['description']}"
        database.append(txt)
    
    return database

def view():
    st.set_page_config(page_title='AI 보안 탐정')
    st.title('해킹 로그분석 RAG 시스템')
    st.markdown('LangChain + FAISS(RAG) + Streamlit 활용한 로그분석 보드')

    # 사용자 입력 인터페이스
    query = st.text_input('분석을 위한 질문을 입력하세요 : ')
    if query:
        with st.spinner('탐정팀장님이 분석 중입니다.....'):
            model, retriever = langchainModel()
            response = model.run(query)
        st.success('분석완료')
        st.write(response)

        # 청크 내용 출력
        st.subheader("관련 로그 청크 미리보기")
        docs = retriever.get_relevant_documents(query)
        for i, doc in enumerate(docs, 1):
            st.markdown(f"**[청크 {i}]**")
            st.code(doc.page_content)

    # 공격 유형 통계 시각화
    st.subheader('공격 통계 시각화')
    st.bar_chart(frm['attack_type'].value_counts())


if __name__=='__main__' :
    view()