로그인
Sign in
한국어
?

단축키

Prev이전 문서

Next다음 문서

크게 작게 위로 아래로 댓글로 가기 인쇄
?

단축키

Prev이전 문서

Next다음 문서

크게 작게 위로 아래로 댓글로 가기 인쇄
import os
import pandas as pd
import numpy as np
import PyPDF2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

# PDF에서 데이터 추출 함수
def extract_text_from_pdfs(pdf_folder):
    data = []
    for file in os.listdir(pdf_folder):
        if file.endswith(".pdf"):
            with open(os.path.join(pdf_folder, file), "rb") as pdf_file:
                reader = PyPDF2.PdfReader(pdf_file)
                text = " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
                data.append(text)
    return data

# 데이터 전처리 함수
def preprocess_data(text_data):
    processed_data = []
    for text in text_data:
        lines = text.split("\n")
        for line in lines:
            values = line.split()
            if len(values) > 1:
                processed_data.append(values)
    return pd.DataFrame(processed_data)

# LSTM 모델 생성 함수
def create_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, activation='relu', return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(50, activation='relu'),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# 예측 실행 함수
def train_and_predict_lstm(data, lookback=5):
    data = np.array(data, dtype=np.float32)
    data = data.reshape(-1, 1)
    generator = TimeseriesGenerator(data, data, length=lookback, batch_size=1)
    model = create_lstm_model((lookback, 1))
    model.fit(generator, epochs=20, verbose=1)
    predictions = model.predict(data[-lookback:].reshape(1, lookback, 1))
    return predictions

# 실행 예시
pdf_folder = "./pdf_data"  # PDF 파일이 저장된 폴더 경로
pdf_texts = extract_text_from_pdfs(pdf_folder)
df = preprocess_data(pdf_texts)

df.to_csv("processed_data.csv", index=False)

# 데이터 가공 및 예측 (예시로 1열의 데이터를 사용)
if df.shape[1] > 1:
    target_column = df.iloc[:, 1].dropna().astype(float)
    prediction = train_and_predict_lstm(target_column)
    print("예측 결과:", prediction)

?

  1. 29Apr
    by
    Views 797 

    Horse racing betting strategies and tactics

  2. No Image 29Apr
    by
    Views 803 

    Horse racing proverb

  3. No Image 27Aug
    by
    Views 3296 

    Definition of Leisure Sports According to AIsports.kr

  4. No Image 14Mar
    by aisports
    Views 4307 

    AI Predicted Exam PDF Program - SAMPLE

  5. No Image 03Dec
    by
    Views 8578 

    [Recruitment] Seeking 'Founding Members' and 'Co-Development Partners' for aisports.kr MVP Development

  6. AISPORTS.KR Patent Technology Portfolio

Board Pagination Prev 1 Next
/ 1