Source code for opengnc.optimal_control.finite_horizon_lqr

"""
Finite-Horizon Linear Quadratic Regulator (LQR).
"""

from collections.abc import Callable
from typing import cast

import numpy as np
from scipy.integrate import solve_ivp



[docs]
class FiniteHorizonLQR:
    r"""
    Finite-Horizon Linear Quadratic Regulator (LQR).

    Computes a time-varying optimal control law $u(t) = -K(t)x(t)$ for systems
    over a fixed time interval $[0, T]$.

    Objective: $\min J = x(T)^T P_f x(T) + \int_{0}^{T} (x^T Q(t) x + u^T R(t) u) dt$

    Parameters
    ----------
    A_fn : Callable[[float], np.ndarray]
        Time-varying state matrix $A(t)$ (nx x nx).
    B_fn : Callable[[float], np.ndarray]
        Time-varying input matrix $B(t)$ (nx x nu).
    Q_fn : Callable[[float], np.ndarray]
        Time-varying state cost matrix $Q(t)$ (nx x nx).
    R_fn : Callable[[float], np.ndarray]
        Time-varying input cost matrix $R(t)$ (nu x nu).
    Pf : np.ndarray
        Terminal state cost matrix at $t=T$ (nx x nx).
    T : float
        The fixed final time of the optimization horizon.
    """

    def __init__(
        self,
        A_fn: Callable[[float], np.ndarray],
        B_fn: Callable[[float], np.ndarray],
        Q_fn: Callable[[float], np.ndarray],
        R_fn: Callable[[float], np.ndarray],
        Pf: np.ndarray,
        T: float,
    ) -> None:
        """Initialize the Finite-Horizon LQR problem."""
        self.A_fn = A_fn
        self.B_fn = B_fn
        self.Q_fn = Q_fn
        self.R_fn = R_fn
        self.Pf = np.asarray(Pf)
        self.T = float(T)
        self.P_trajectory: np.ndarray | None = None
        self.t_span: np.ndarray | None = None


[docs]
    def solve(self, num_points: int = 100) -> tuple[np.ndarray, np.ndarray]:
        r"""
        Solve the Differential Riccati Equation (DRE) backwards in time.

        $-\dot{P} = P A + A^T P - P B R^{-1} B^T P + Q$, with $P(T) = P_f$

        Parameters
        ----------
        num_points : int, optional
            Number of points for the solution trajectory. Default is 100.

        Returns
        -------
        Tuple[np.ndarray, np.ndarray]
            A tuple of (t_span, P_trajectory) where P_trajectory is (num_points, nx, nx).
        """
        nx = self.Pf.shape[0]

        def dre(t: float, p_flat: np.ndarray) -> np.ndarray:
            p_mat = p_flat.reshape((nx, nx))
            a_cur = self.A_fn(t)
            b_cur = self.B_fn(t)
            q_cur = self.Q_fn(t)
            r_cur = self.R_fn(t)

            # Solve for R^-1 * B.T @ P numerically
            k_term = np.linalg.solve(r_cur, b_cur.T @ p_mat)
            p_dot = -(p_mat @ a_cur + a_cur.T @ p_mat - p_mat @ b_cur @ k_term + q_cur)
            return cast(np.ndarray, p_dot.flatten())

        # Backward integration from T to 0
        t_eval = np.linspace(self.T, 0, num_points)
        sol = solve_ivp(dre, [self.T, 0], self.Pf.flatten(), t_eval=t_eval, method="RK45")

        # Reverse results to be monotonic from 0 to T
        self.t_span = cast(np.ndarray, sol.t[::-1])
        self.P_trajectory = cast(np.ndarray, sol.y.T[::-1].reshape((-1, nx, nx)))

        return self.t_span, self.P_trajectory



[docs]
    def get_gain(self, t: float) -> np.ndarray:
        """
        Compute/interpolate the optimal feedback gain K at time t.

        Parameters
        ----------
        t : float
            Current time (s).

        Returns
        -------
        np.ndarray
            Feedback gain matrix K (nu x nx).
        """
        if self.P_trajectory is None or self.t_span is None:
            self.solve()
        if self.P_trajectory is None or self.t_span is None:
            raise RuntimeError("Riccati trajectory not available.")

        # Linearly interpolate each element of the P matrix
        nx = self.Pf.shape[0]
        p_t = np.zeros((nx, nx))
        for i in range(nx):
            for j in range(nx):
                p_t[i, j] = np.interp(t, self.t_span, self.P_trajectory[:, i, j])

        b_cur = self.B_fn(t)
        r_cur = self.R_fn(t)
        return cast(np.ndarray, np.linalg.solve(r_cur, b_cur.T @ p_t))



[docs]
    def compute_control(self, x: np.ndarray, t: float) -> np.ndarray:
        """
        Compute optimal control input $u = -K(t)x$.

        Parameters
        ----------
        x : np.ndarray
            Current state vector (nx,).
        t : float
            Current time (s).

        Returns
        -------
        np.ndarray
            Optimal control input u (nu,).
        """
        k_t = self.get_gain(t)
        return cast(np.ndarray, -k_t @ np.asarray(x))