from doclayout_yolo import YOLOv10
from huggingface_hub import hf_hub_download
filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench",
filename="doclayout_yolo_docstructbench_imgsz1024.pt")
model = YOLOv10(filepath)
import torch
device = torch.device((
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
))
det_res = model.predict("input/2003-D30-000-0013.jpg", imgsz=1024, device=device)
import cv2
annotated_frame = det_res[0].plot(pil=True, line_width=5, font_size=20)
cv2.imwrite("result.jpg", annotated_frame)
result = det_res[0]
table_class_id = None
for id, name in det_res[0].names.items():
if name == "table":
table_class_id = id
break
table_class_id
table_boxes = []
for i, cls in enumerate(result.boxes.cls):
if int(cls.item()) == table_class_id:
box = result.boxes.xyxy[i].cpu().numpy()
conf = result.boxes.conf[i].item()
table_boxes.append({
"xywh": box.tolist(),
"confidence": conf
})
table_boxes