Source code for upsp.cam_cal_utils.photogrammetry

import cv2
import numpy as np
import copy
from scipy.spatial.transform import Rotation as scipy_Rotation

np.set_printoptions(linewidth=180, precision=4, threshold=np.inf)

#---------------------------------------------------------------------------------------
# General Photogrammetry Utility Functions


[docs]def rot(angle, axis):
    """Rotation matrix for angle-axis transformation

    An identity matrix is rotated about the given axis by the given angle (in degrees)

    Parameters
    ----------
    angle : float
        angle of rotation (in degrees) about the given axis
    axis : {'x', 'y', 'z'}
        axis to rotate about

    Returns
    ----------
    np.ndarray, shape (3, 3), float
        Rotation matrix of an identity matrix rotated about the given axis by the given
        amount
    """

    assert_error_msg = "Error in photogrammetry.rot. Axis must be 'x', 'y', or 'z'."
    assert axis in ['x', 'y', 'z'], assert_error_msg

    angle_rad = np.deg2rad(angle)

    if axis == 'x':
        return np.array([[1,                 0,                  0],
                         [0, np.cos(angle_rad), -np.sin(angle_rad)],
                         [0, np.sin(angle_rad),  np.cos(angle_rad)]])

    if axis == 'y':
        return np.array([[np.cos(angle_rad), 0,   np.sin(angle_rad)],
                        [0,                  1,                   0],
                        [-np.sin(angle_rad), 0,   np.cos(angle_rad)]])

    if axis == 'z':
        return np.array([[np.cos(angle_rad), -np.sin(angle_rad), 0],
                         [np.sin(angle_rad),  np.cos(angle_rad), 0],
                         [0,                  0,                 1]])


[docs]def invTransform(R, t):
    """Returns the inverse transformation of the given `R` and `t`

    Parameters
    ----------
    R : np.ndarray, shape (3, 3), float
        Rotation Matrix
    t : np.ndarray (3, 1), float
        Translation Vector

    Returns
    ----------
    rmat_i : np.ndarray, shape (3, 3), float
        Inverse rotation matrix
    tvec_i : np.ndarray, shape (3, 1), float
        Inverse translation vector
    """

    R_transpose = R.transpose()
    return (R_transpose, -np.matmul(R_transpose, t))


[docs]def isRotationMatrix(R):
    """Checks if a matrix is a valid rotation matrix

    Parameters
    ----------
    R : np.ndarray, shape (3, 3), float
        Rotation Matrix

    Returns
    ----------
    bool
        True if R is a valid rotation matrix, and False if it is not
    """

    Rt = np.transpose(R)
    shouldBeIdentity = np.dot(Rt, R)
    n = np.linalg.norm(np.identity(3, dtype=R.dtype) - shouldBeIdentity)
    return n < 1e-6


[docs]def isRotationMatrixRightHanded(R):
    """ Returns True if matrix is right handed, and False it is not

    Parameters
    ----------
    R : np.ndarray (3, 3), float
        Rotation Matrix

    Returns
    ----------
    boolean
        True if R is a right handed rotation matrix, and False if it is not
    """
    return np.dot(np.cross(R[:,0], R[:,1]), R[:,2]) > 0.


[docs]def rotationMatrixToTunnelAngles(R):
    """Converts rotation matrix to tunnel angles

    Parameters
    ----------
    R : np.ndarray, shape (3, 3), float
        Rotation Matrix

    Returns
    ----------
    np.ndarray (3, 1), float
        Array of tunnel angle (alpha, beta, phi)

    See Also
    ----------
    isRotationMatrix : Checks if a matrix is a valid rotation matrix
    """
    # Check that the given matrix is a valid roation matrix
    assert(isRotationMatrix(R))

    # Use scipy to convert to yzx euler angles
    r = scipy_Rotation.from_matrix(R)
    alpha, beta, neg_phi = np.array(r.as_euler('yzx', degrees=True))

    # Return the tunnel angles
    return np.expand_dims([alpha, beta, -neg_phi], 1)


[docs]def transform_3d_point(rmat, tvec, obj_pts):
    """Transform 3D points from the object frame to the camera frame.

    `rmat` and `tvec` are the transformation from the camera to the object's frame.
    ``obj_pts[i]`` is the position of point ``i`` relative to the object frame. The
    function returns the points relative to the camera.

    Parameters
    ----------
    rmat : np.ndarray, shape (3, 3), float
        Rotation matrix from camera to object
    tvec : np.ndarray (3, 1), float
        Translation vector from camera to object
    obj_pts : np.ndarray (n, 3), float
        List-like of 3D positions. The positions are relative to the object frame
        defined by rmat and tvec

    Returns
    ----------
    np.ndarray (n, 3) of floats
        Array of transformed points. return[i] is the transformed obj_pts[i]
    """
    obj_pts_tf = np.squeeze(np.array([np.matmul(rmat, np.expand_dims(pt, 1)) + tvec for pt in obj_pts]), axis=2)
    return obj_pts_tf


[docs]def project_3d_point(rmat, tvec, cameraMatrix, distCoeffs, obj_pts, ret_jac=False, ret_full_jac=False):
    """Projects targets into an image.

    Parameters
    ----------
    rmat : np.ndarray, shape (3, 3), float
        Rotation matrix from camera to object
    tvec : np.ndarray, shape (3, 1), float
        Translation vector from camera to object
    cameraMatrix : np.ndarray, shape (3, 3), float
        The (openCV formatted) camera matrix for the camera
    distCoeffs : np.ndarray (1, 5), float
        The (openCV formatted) distortion coefficients for the camera
    obj_pts : np.ndarray, shape (n, 3), float
        The 3D position of the points on the object (relative to the object frame) to
        be projected
    ret_jac : bool, optional, default=False
        If True, returns the Jacobian. If False only the projection is returned
    ret_full_jac : bool, optional, default=False
        If True, returns full 15 term Jacobian (delta rvec, delta tvec, delta focal
        length, delta principal point, delta distortion). If False, returns the 6 term
        Jacobian (delta rvec, delta tvec)

    Returns
    ----------
    projs, np.ndarray, shape (n, 2), float
        Projected pixel positions
    jacs : np.ndarray, shape (n, 2, 6) or (n, 2, 15), float
        Jacobian of the projected pixel location. Only returned if `ret_jac` is True.
        ``jacs[i]`` is associated with ``projs[i]``. ``jacs[:, 0, :]`` is the x axis,
        ``jacs[:, 1, :]`` is the y axis.  Axis 2 of `jacs` is the partial derivative
        related to the inputs (delta rvec, delta tvec, etc).
    """
    if not len(obj_pts):
        return np.array([])

    # Convert the rmat into an rvec
    rvec, _ = cv2.Rodrigues(rmat)

    # Project the points onto the image
    obj_pts = np.array(obj_pts).astype(np.float64)
    projs, jacs = cv2.projectPoints(obj_pts,
                                    rvec=rvec, tvec=tvec,
                                    cameraMatrix=cameraMatrix, distCoeffs=distCoeffs)

    # Squeeze the extra dimension out
    projs = projs.squeeze(axis=1)

    # Add the jacobian if desired
    if ret_jac:
        if not ret_full_jac:
            # Crop just the delta-rotation-vector and delta-translation-vector terms
            jacs = jacs[:, :6]

        # Reshape jac so it has a dimension for pixel (u, v)
        jacs = jacs.reshape(jacs.shape[0]//2, 2, jacs.shape[1])

        return projs, jacs

    # If the jacobian is not required, return just he projected pixel locations
    else:
        return projs


#---------------------------------------------------------------------------------------
# Target Functions

[docs]def transform_targets(rmat, tvec, tgts):
    """Transform the targets by the given transformation values.

    `rmat` and `tvec` are the transformation from the camera to the object's frame.
    ``tgts[i]['tvec']`` is the position of target ``i`` relative to the object frame.
    ``tgts[i]['norm']`` is the normal of target ``i`` relative to the object frame. The
    function returns the targets such that 'tvec' and 'norm' are relative to the camera.

    Parameters
    ----------
    rmat : np.ndarray, shape (3, 3), float
        Rotation matrix from camera to object
    tvec : np.ndarray, shape (3, 1), float
        Translation vector from camera to object
    tgts : list of dict
        Each target is a dict with (at a minimum) a 'tvec', and 'norm' attribute. 'tvec'
        is the position of the target relative to the object frame. 'norm' is he normal
        of the target relative to the object frame. All other attributes will be copied
        to the transformed targets.

    Returns
    ----------
    tgts_tf : list of dict
        List of targets. Each target has the attributes of the input targets, except
        the transformed targets are transformed to be relative to the camera
    """

    tgts_tf = []
    for tgt in tgts:
        # Transform the target tvec and norm to be relative to the camera
        tgt_tf_tvec = transform_3d_point(rmat, tvec, tgt['tvec'].T).T
        tgt_tf_norm = np.matmul(rmat, tgt['norm'])

        tgt_copy = copy.deepcopy(tgt)
        tgt_copy['tvec'] = tgt_tf_tvec
        tgt_copy['norm'] = tgt_tf_norm

        tgts_tf.append(tgt_copy)

    return tgts_tf


[docs]def project_targets(rmat, tvec, cameraMatrix, distCoeffs, tgts, dims=None):
    """Projects targets into an image.

    Parameters
    ----------
    rmat : np.ndarray, shape (3, 3), float
        Rotation matrix from camera to object
    tvec : np.ndarray (3, 1), float
        Translation vector from camera to object
    cameraMatrix : np.ndarray, shape (3, 3), float
        The (openCV formatted) camera matrix for the camera
    distCoeffs : np.ndarray, shape (5, 1) or (5,), float
        The (openCV formatted) distortion coefficients for the camera
    tgts : list of dict
        Each target is a dict with (at a minimum) 'tvec' and 'target_type' attributes.
        The 'tvec' attribute gives the target's location and 'target_type' is a string
        denoting the type of target (most commonly 'dot' or 'kulite')
    dims : array_like, length 2, optional
        Dimensions of image in (width, height). If not None, any targets that get
        projected outside the image (x < 0, x > dims[1], y < 0, y > dims[0]) will be
        None rather than having a value

    Returns
    ----------
    list of dict
        List of target projections. Each target projections is a dict with the
        attributes 'target_type' and 'proj'. If `dims` is not None, some target
        projections may be None instead
    """
    if (len(tgts) == 0):
        return []

    # Package the 3D point translation vectors
    obj_pts = np.array([tgt['tvec'] for tgt in tgts])

    # Project the points onto the image
    projs = project_3d_point(rmat, tvec, cameraMatrix, distCoeffs, obj_pts)

    # Repackage the projected points with their target type
    tgt_projs = []
    for proj, tgt, in zip(projs, tgts):
        tgt_projs.append({'target_type': tgt['target_type'], 'proj': proj})

    # If dims is given, filter projections outside the image
    if dims is not None:
        tgt_projs_temp = []
        for tgt_proj in tgt_projs:
            pt = tgt_proj['proj']
            # If the point is within the image bounds given, add it to the temp list
            if (0.0 <= pt[0] <= dims[1] - 1) and (0.0 <= pt[1] <= dims[0] - 1):
                tgt_projs_temp.append(tgt_proj)
            else:
                tgt_projs_temp.append(None)
        tgt_projs = tgt_projs_temp

    return tgt_projs


[docs]def get_occlusions_targets(rmat, tvec, tgts, vis_checker):
    """Wrapper around :mod:`~upsp.cam_cal_utils.visibility` methods

    Parameters
    ----------
    rmat : np.ndarray, shape (3, 3), float
        Rotation matrix from camera to object
    tvec : np.ndarray (3, 1), float
        Translation vector from camera to object
    tgts : list of dict
        Each target is a dict with (at a minimum) a 'tvec', and 'norm' attribute. The
        'tvec' attribute gives the target's location and the 'norm' attribute gives the
        target's normal vector
    vis_checker : ~upsp.cam_cal_utils.visibility.VisibilityChecker
        Visibility checker object with the relevant BVH and oblique viewing angle

    Returns
    ----------
    occlusions : list of tuple
        The first element of each tuple is a boolean. This boolean denotes if there was
        an occlusion or not. The second element of each tuple is the point that the
        occlusion happens. If there was no occlusion (the boolean from the first element
        would be False), the value [0, 0, 0] is returned.
    """
    # Get the position and orientation of the camera in the tgts frame
    rmat_model2camera, tvec_model2camera = invTransform(rmat, tvec)

    # Package the tvecs and normals of the targets
    tvecs = []
    norms = []
    for tgt in tgts:
        tvecs.append(tgt['tvec'])
        norms.append(tgt['norm'])

    occlusions = []
    for i, node_data in enumerate(zip(tvecs, norms)):
        # Unpackage the data
        node, normal = node_data

        # Check for occlusion

        # Get the direction of the ray
        direction = tvec_model2camera - node
        direction /= np.linalg.norm(direction)

        # Get the origin at a point differentiably close the the vertex
        origin = node + vis_checker.epsilon * normal

        # If it is occluded, mark it occluded and move on to the next node
        occlusions.append(vis_checker.does_intersect(origin, direction, return_pos=True))

    return occlusions


[docs]def get_visible_targets(rmat, tvec, tgts, vis_checker):
    """Wrapper around :meth:`~upsp.cam_cal_utils.visibility.VisibilityChecker.is_visible`

    Parameters
    ----------
    rmat : np.ndarray, shape (3, 3), float
        Rotation matrix from camera to object
    tvec : np.ndarray (3, 1), float
        Translation vector from camera to object
    tgts : list of dict
        Each target is a dict with (at a minimum) a 'tvec', and 'norm' attribute.  The
        'tvec' attribute gives the target's location and the 'norm' attribute gives the
        target's normal vector. Both are np.ndarrays (n, 3) of floats. Returned targets
        will have the same additionalk attributes as the input targets
    vis_checker : ~upsp.cam_cal_utils.visibility.VisibilityChecker
        Visibility checker object with the relevant BVH and oblique viewing angle

    Returns
    ----------
    list of dict
        List of visible targets. Returned targets are references to input targets.
        Returned list is the subset of the input targets that are visible to the camera
    """

    # Get the position and orientation of the camera in the tgts frame
    rmat_model2camera, tvec_model2camera = invTransform(rmat, tvec)

    # Package the tvecs and normals of the targets
    tvecs = []
    norms = []
    for tgt in tgts:
        tvecs.append(np.array(tgt['tvec']))
        norms.append(np.array(tgt['norm']))
    tvecs = np.squeeze(np.array(tvecs), 2)
    norms = np.squeeze(np.array(norms), 2)

    # Get the visible targets and return
    visible_indices = vis_checker.is_visible(tvec_model2camera, tvecs, norms)
    tgts_visibles = [tgts[x] for x in visible_indices]
    return tgts_visibles


[docs]def reprojection_error(rmat, tvec, cameraMatrix, distCoeffs, tgts, img_targets):
    """Calculates RMS reprojection error

    Parameters
    ----------
    rmat : np.ndarray, shape (3, 3), float
        Rotation matrix from camera to object
    tvec : np.ndarray, shape (3, 1), float
        Translation vector from camera to object
    cameraMatrix : np.ndarray, shape (3, 3), float
        The (openCV formatted) camera matrix for the camera
    distCoeffs : np.ndarray, shape (1, 5), float
        The (openCV formatted) distortion coefficients for the camera
    tgts : list of dict
        Each target is a dict with (at a minimum) 'tvec', 'norm', and 'target_type'
        attributes. The 'tvec' attribute gives the target's location and the 'norm'
        attribute gives the target's normal vector. 'target_type' is a string denoting
        the type of target (most commonly 'dot' or 'kulite')
    img_targets : list of dict
        Each ``img_target`` is a dict with (at a minimum) a 'center' attribute. The
        'center' attribute gives the pixel position of the target's center in the image.
        ``img_targets[i]`` is associated with ``tgts[i]``

    Returns
    ----------
    rms, max_dist : float
        RMS distance and maximum distance
    """

    if len(tgts) == 0:
        return np.inf, np.inf

    # Project inlier targets into image
    tgt_projs = project_targets(
        rmat, tvec, cameraMatrix, distCoeffs, tgts)

    # Calculate the reprojection error
    rms = 0
    max_dist = -np.inf
    for proj, img_pt in zip(tgt_projs, img_targets):
        # Get the Euclidean distance between the projection and detection img target
        dist = np.linalg.norm([proj['proj'][0] - img_pt['center'][0],
                               proj['proj'][1] - img_pt['center'][1]])

        # Check if this is the largest distance thus far
        max_dist = max(dist, max_dist)

        # Add the square of the distance to the sum
        rms += dist**2

    # Divide by the number of targets to get the mean of the squares
    rms /= len(tgts)

    # Take the square root to get the room mean square error
    rms = np.sqrt(rms)

    return rms, max_dist
uPSP

Navigation

Related Topics

Source code for upsp.cam_cal_utils.photogrammetry