from doclayout_yolo import YOLOv10
from huggingface_hub import hf_hub_download
filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench",
                           filename="doclayout_yolo_docstructbench_imgsz1024.pt")
model = YOLOv10(filepath)
import torch
device = torch.device((
  "cuda"
  if torch.cuda.is_available()
  else "mps"
  if torch.backends.mps.is_available()
  else "cpu"
))
det_res = model.predict("input/2003-D30-000-0013.jpg", imgsz=1024, device=device)
import cv2
annotated_frame = det_res[0].plot(pil=True, line_width=5, font_size=20)
cv2.imwrite("result.jpg", annotated_frame)
result = det_res[0]
table_class_id = None
for id, name in det_res[0].names.items():
  if name == "table":
    table_class_id = id
    break
table_class_id
table_boxes = []
for i, cls in enumerate(result.boxes.cls):
  if int(cls.item()) == table_class_id:
    
    box = result.boxes.xyxy[i].cpu().numpy()
    
    conf = result.boxes.conf[i].item()
    table_boxes.append({
      "xywh": box.tolist(),
      "confidence": conf
    })
table_boxes