Source code for pdfbeaver.state_tracker

# src/pdfbeaver/state_tracker.py
"""Tracks the PDF Text State parameters as defined in the PDF 1.7 Reference.

Attributes:
    char_spacing (float): Character spacing ($Tc$). Defaults to 0.0.
    word_spacing (float): Word spacing ($Tw$). Defaults to 0.0.
    horiz_scaling (float): Horizontal scaling ($Tz$). Defaults to 100.0.
    leading (float): Text leading ($Tl$). Defaults to 0.0.
    font_name (Optional[str]): The resource name of the current font (e.g. '/F1'),
        if available.
    fontsize (float): The current font size ($Tfs$).
    render_mode (int): The text rendering mode ($Tr$).
    rise (float): Text rise ($Ts$). Defaults to 0.0.
    knockout (bool): Text knockout flag. Defaults to True.
    matrix (List[float]): The Text Matrix ($Tm$), stored as a 6-element list
        ``[a, b, c, d, e, f]``.
    line_matrix (List[float]): The Text Line Matrix ($Tlm$), stored as a
        6-element list.
"""
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

import numpy as np

from .utils.pdf_conversion import miner_matrix_to_np

logger = logging.getLogger(__name__)


@dataclass
class TextState:  # pylint: disable=too-many-instance-attributes
    """Tracks the PDF Text State parameters.

    Attributes:
        char_spacing (float): Character spacing ($Tc$). Defaults to 0.0.
        word_spacing (float): Word spacing ($Tw$). Defaults to 0.0.
        horiz_scaling (float): Horizontal scaling ($Tz$). Defaults to 100.0.
        leading (float): Text leading ($Tl$). Defaults to 0.0.
        font_name (Optional[str]): The PostScript name of the current font
            (e.g., 'Helvetica-Bold'), if available. Note: This is *not* the
            Resource Name (e.g., '/F1').
        fontsize (float): The current font size ($Tfs$).
        render_mode (int): The text rendering mode ($Tr$).
        rise (float): Text rise ($Ts$). Defaults to 0.0.
        knockout (bool): Text knockout flag. Defaults to True.
        matrix (List[float]): The Text Matrix ($Tm$), stored as a 6-element list
            ``[a, b, c, d, e, f]``.
        line_matrix (List[float]): The Text Line Matrix ($Tlm$), stored as a
            6-element list.
    """

    char_spacing: float = 0.0
    word_spacing: float = 0.0
    horiz_scaling: float = 100.0
    leading: float = 0.0
    font_name: Optional[str] = None
    fontsize: float = 0.0
    render_mode: int = 0
    rise: float = 0.0
    knockout: bool = True

    # Matrices are stored as list of 6 floats [a, b, c, d, e, f]
    matrix: List[float] = field(default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0])
    line_matrix: List[float] = field(
        default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
    )

    def copy(self):
        """Return a copy of this state"""
        new_obj = TextState(**self.__dict__)
        new_obj.matrix = list(self.matrix)
        new_obj.line_matrix = list(self.line_matrix)
        return new_obj


@dataclass
class GraphicsState:
    """Tracks the PDF Graphics State parameters.

    Attributes:
        ctm (List[float]): The Current Transformation Matrix ($CTM$), stored as
            a 6-element list ``[a, b, c, d, e, f]``.
    """

    ctm: np.ndarray = field(default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0])

    def copy(self):
        """Return a copy of this state"""
        new_obj = GraphicsState(**self.__dict__)
        new_obj.ctm = list(self.ctm)
        return new_obj



[docs]
class StateTracker:
    """
    State tracker. Tracks the CTM (Graphics) and Text Matrices.

    This tracker acts as a bridge between the underlying ``pdfminer`` state machine
    and the ``pdfbeaver`` context. It ingests snapshots of the state provided by the
    iterator and makes them accessible in a clean, pythonic format.

    """

    def __init__(self):
        self.gstate = GraphicsState()
        self.gstack: List[GraphicsState] = []
        self.textstate = TextState()
        self.text_obj_active = False


[docs]
    def get_matrices(self) -> Tuple[np.ndarray, np.ndarray]:
        """
        Calculates the effective transformation matrices.

        Returns:
            Tuple[np.ndarray, np.ndarray]: A tuple containing:
                1. The CTM (3x3 numpy array).
                2. The Text Render Matrix (CTM x TM) (3x3 numpy array).
        """
        ctm = miner_matrix_to_np(self.gstate.ctm)
        tm = miner_matrix_to_np(self.textstate.matrix)
        trm = tm @ ctm
        return ctm, trm



[docs]
    def set_state(self, state: Dict[str, Any]):
        """
        Updates the internal state to match the snapshot provided by the iterator.
        This is the 'Passive Tracking' model: we trust the engine (pdfminer).
        """
        if not state:
            return

        # Sync CTM (pdfminer provides tuple/list of 6 floats)
        if "ctm" in state:
            self.gstate.ctm = list(state["ctm"])

        # Sync Text State
        # Note: pdfminer returns a PDFTextState object in 'tstate'
        # We map specific fields we care about to our internal structure
        if "tstate" in state:
            src = state["tstate"]
            dst = self.textstate

            dst.char_spacing = src.charspace
            dst.word_spacing = src.wordspace
            dst.horiz_scaling = src.scaling
            dst.leading = src.leading
            dst.font_name = getattr(src.font, "basefont", None) if src.font else None
            dst.fontsize = src.fontsize
            dst.render_mode = src.render
            dst.rise = src.rise
            dst.matrix = src.matrix
            dst.line_matrix = src.linematrix



[docs]
    def get_snapshot(self) -> Dict[str, Any]:
        """Returns a snapshot of the current state."""
        return {
            "ctm": self.gstate.ctm,
            "tstate": self.textstate.copy(),
            "gstate": self.gstate.copy(),
            "font_name": self.textstate.font_name,
        }



[docs]
    def get_current_user_pos(self) -> np.ndarray:
        """
        Returns the (x, y) position of the cursor in User Space.

        Calculated as: Origin(0,0) x Tm x CTM.

        Returns:
            np.ndarray: A 3-element vector [x, y, 1] representing the cursor position.
        """
        # Start of text space (0, 0)
        p = np.array([0.0, 0.0, 1.0])

        # Apply Text Matrix
        a, b, c, d, e, f = self.textstate.matrix
        tm = np.array([[a, b, 0], [c, d, 0], [e, f, 1]])

        # Apply CTM
        ctm = miner_matrix_to_np(self.gstate.ctm)

        return p @ tm @ ctm