Skip to content

RE:- 4090 Cuda 12.8 Not Using GPU #1392

Open
@TonyDewhurst10

Description

@TonyDewhurst10

Image

Any ideas why it refuses to use GPU

`import os
import easyocr
import torch
import logging
from pdf2image import convert_from_path
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

--- Configuration ---

INPUT_FOLDER = r"C:\files"
OUTPUT_FOLDER = r"C:\files\output"
NUM_THREADS = 4 # Adjust based on your CPU cores
DPI = 150 # DPI for PDF to image conversion
LOG_FILE = os.path.join(OUTPUT_FOLDER, "ocr_log.txt")

--- Setup Logging ---

logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)

--- Ensure Output Folder Exists ---

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

--- Force GPU Usage ---

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch.cuda.set_device(0) # Force CUDA device 0
print(f"Using device: {device}")
logging.info(f"Using device: {device}")

--- Test PyTorch GPU ---

try:
test_tensor = torch.rand(1).cuda()
print(f"✅ PyTorch Test Tensor allocated on: {test_tensor.device}")
except Exception as e:
print(f"❌ PyTorch GPU Error: {e}")
logging.error(f"PyTorch GPU Error: {e}")

--- Initialize EasyOCR (Force GPU) ---

reader = easyocr.Reader(["en"], gpu=True, quantize=False)

Debug: Check if EasyOCR is using GPU

print(f"✅ EasyOCR running on: {reader.device}")
logging.info(f"EasyOCR running on: {reader.device}")

def process_pdf(pdf_file):
"""Extracts text from a single PDF using EasyOCR and saves it."""
text_path = os.path.join(OUTPUT_FOLDER, f"{os.path.splitext(pdf_file)[0]}.txt")

if os.path.exists(text_path):
    return f"Skipping {pdf_file}, already processed."

pdf_path = os.path.join(INPUT_FOLDER, pdf_file)

try:
    images = convert_from_path(pdf_path, dpi=DPI)
    if not images:
        logging.error(f"No images generated for {pdf_file}.")
        return f"Error: No images generated for {pdf_file}."

    extracted_text = ""
    for idx, img in enumerate(images):
        logging.debug(f"Processing image {idx + 1} of {len(images)}")
        result = reader.readtext(img, detail=0)
        logging.debug(f"OCR result for image {idx + 1}: {result}")
        extracted_text += "\n".join(result) + "\n"

    if not extracted_text:
        logging.warning(f"No text extracted for {pdf_file}.")

    with open(text_path, "w", encoding="utf-8") as text_file:
        text_file.write(extracted_text)

    return f"Processed: {pdf_file}"

except FileNotFoundError:
    logging.error(f"File not found: {pdf_path}")
    return f"Error: File not found: {pdf_file}"
except Exception as e:
    logging.exception(f"Error processing {pdf_file}: {e}")
    return f"Error processing {pdf_file}: {e}"

--- Main Execution ---

if name == "main":
pdf_files = [f for f in os.listdir(INPUT_FOLDER) if f.lower().endswith(".pdf")]
logging.info(f"Found {len(pdf_files)} PDF files to process.")

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    results = list(
        tqdm(
            executor.map(process_pdf, pdf_files),
            total=len(pdf_files),
            desc="Processing PDFs",
        )
    )

for result in results:
    print(result)

logging.info("OCR process completed.")
print("OCR process completed! Check the log file for details.")

`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions