예상지 AI PDF 프로그램 -SAMPLE

by aisports Likes 0
?

단축키

Prev이전 문서

Next다음 문서

ESC닫기

크게 작게 위로 아래로 댓글로 가기 인쇄
import os
import pandas as pd
import numpy as np
import PyPDF2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

# PDF에서 데이터 추출 함수
def extract_text_from_pdfs(pdf_folder):
    data = []
    for file in os.listdir(pdf_folder):
        if file.endswith(".pdf"):
            with open(os.path.join(pdf_folder, file), "rb") as pdf_file:
                reader = PyPDF2.PdfReader(pdf_file)
                text = " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
                data.append(text)
    return data

# 데이터 전처리 함수
def preprocess_data(text_data):
    processed_data = []
    for text in text_data:
        lines = text.split("\n")
        for line in lines:
            values = line.split()
            if len(values) > 1:
                processed_data.append(values)
    return pd.DataFrame(processed_data)

# LSTM 모델 생성 함수
def create_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, activation='relu', return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(50, activation='relu'),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# 예측 실행 함수
def train_and_predict_lstm(data, lookback=5):
    data = np.array(data, dtype=np.float32)
    data = data.reshape(-1, 1)
    generator = TimeseriesGenerator(data, data, length=lookback, batch_size=1)
    model = create_lstm_model((lookback, 1))
    model.fit(generator, epochs=20, verbose=1)
    predictions = model.predict(data[-lookback:].reshape(1, lookback, 1))
    return predictions

# 실행 예시
pdf_folder = "./pdf_data"  # PDF 파일이 저장된 폴더 경로
pdf_texts = extract_text_from_pdfs(pdf_folder)
df = preprocess_data(pdf_texts)

df.to_csv("processed_data.csv", index=False)

# 데이터 가공 및 예측 (예시로 1열의 데이터를 사용)
if df.shape[1] > 1:
    target_column = df.iloc[:, 1].dropna().astype(float)
    prediction = train_and_predict_lstm(target_column)
    print("예측 결과:", prediction)