00001 /* 00002 This class implements a simple camera model as often used in computer vision. 00003 00004 Copyright (C) 2010 - 2019 Christoph Hänisch 00005 00006 SurgiTAIX AG 00007 Kaiserstr. 100, TPH 1, 2. Et. 00008 52134 Herzogenrath 00009 Germany 00010 */ 00011 00018 #ifndef PINHOLE_CAMERA_MODEL_HPP_4231789408 00019 #define PINHOLE_CAMERA_MODEL_HPP_4231789408 00020 00021 00022 #include <tuple> 00023 #include <utility> 00024 #include <vector> 00025 00026 #include <Eigen/Dense> 00027 #include <Eigen/Eigenvalues> 00028 #include <Eigen/Geometry> 00029 #include <Eigen/SVD> 00030 00031 #ifdef CPPOPTLIB_FOUND 00032 #include <cppoptlib/meta.h> 00033 #include <cppoptlib/problem.h> 00034 #include <cppoptlib/solver/bfgssolver.h> 00035 #endif // CPPOPTLIB_FOUND 00036 00037 #include "ErrorObj.hpp" 00038 #include "Coordinate.hpp" 00039 #include "Range.hpp" 00040 00041 00042 namespace TRTK 00043 { 00044 00045 /************************************************************** 00046 * Helper function * 00047 **************************************************************/ 00048 00073 template <class T> 00074 std::tuple<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>, Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> 00075 computeRQDecomposition(const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> & A) 00076 { 00077 /* 00078 This RQ decomposition scheme is based on the math.stackexchange article 00079 https://math.stackexchange.com/questions/1640695/rq-decomposition/1640762. 00080 00081 Algorithm 00082 ========= 00083 00084 Given the square matrix A. 00085 00086 1. Define P as the exchange matrix, i.e., an anti-diagonal matrix with ones along the counter-diagonal 00087 2. Compute A_tilda = P * A 00088 3. Compute the decomposition of A_tilda^T = Q_tilda * R_tilda 00089 4. Set Q := P * Q_tilda^T 00090 5. Set R := P * R_tilda^T * P 00091 00092 Then, it holds that 00093 00094 R * Q = (P * R_tilda^T * P)(P * Q_tilda^T) = P * R_tilda^T * Q_tilda^T 00095 = P * (Q_tilda * R_tilda)^T = P * (A_tilda^T)^T 00096 = P * A_tilda = P * P * A = A 00097 00098 since P = P^T and P * P = I. 00099 00100 Uniqueness 00101 ========== 00102 00103 Eigen's Housholder QR decomposition is not unique. This can be easily rectified 00104 by changing QR to QDDR=(QD)(DR) where D = diag(sign(r11), ..., sign(rnn)). That 00105 is, a diagonal matrix D is formed whose entries are the signs of the entries of 00106 the main diagonal of R. Then the rows of DR are altered in such a way that the 00107 new diagonal entries are all positive. Since DD^T = DD = I it holds that 00108 (QD)(DR) = QR. Also, D is orthogonal and so is QD. Thus, the result is still a 00109 valid QR decomposition. 00110 00111 This circumstance is used below when computing the QR decomposition of A_tilda^T. 00112 */ 00113 00114 using namespace std; 00115 using namespace Eigen; 00116 00117 using Matrix = Eigen::Matrix<T, Dynamic, Dynamic>; 00118 00119 const bool INPUT_MATRIX_SQUARE = A.cols() == A.rows(); 00120 assert(INPUT_MATRIX_SQUARE); 00121 00122 const int n = (int) A.cols(); 00123 00124 Matrix P = Matrix::Zero(n, n); 00125 for (int i=0; i < n; ++i) P(n-1-i, i) = 1; 00126 00127 Matrix A_tilda = P * A; // reverse rows of A 00128 HouseholderQR<Matrix> qr(A_tilda.transpose()); 00129 Matrix Q_tilda = qr.householderQ(); 00130 Matrix R_tilda = qr.matrixQR().triangularView<Upper>(); 00131 DiagonalMatrix<T, Dynamic, Dynamic> D(n); 00132 for (int i = 0; i < n; ++i) D.diagonal()(i) = R_tilda(i, i) >= 0 ? 1 : -1; 00133 Q_tilda = Q_tilda * D; 00134 R_tilda = D * R_tilda; // make R unique by making the diagonal entries positive 00135 Matrix Q = P * Q_tilda.transpose(); 00136 Matrix R = P * R_tilda.transpose() * P; // reverse rows and columns of R_tilda 00137 00138 const bool Q_IS_ORTHOGONAL = (Q * Q.transpose() - Matrix::Identity(n, n)).norm() < 1e-7; 00139 assert(Q_IS_ORTHOGONAL); 00140 00141 return make_pair(R, Q); 00142 } 00143 00144 00145 /************************************************************** 00146 * PinholeCameraModel * 00147 **************************************************************/ 00148 00347 template <class T> 00348 class PinholeCameraModel 00349 { 00350 public: 00351 enum Error { 00352 NOT_ENOUGH_INPUT_DATA, 00353 UNEQUAL_CARDINALITY_OF_INPUT_SETS, 00354 UNKNOWN_ESTIMATION_METHOD, 00355 UNKNOWN_ERROR 00356 }; 00357 00358 enum Constraints { 00359 NO_SKEW, 00360 SAME_FOCAL_LENGTHS, 00361 SAME_FOCAL_LENGTHS_AND_NO_SKEW, 00362 }; 00363 00364 using value_type = T; 00365 using Vector2T = Eigen::Matrix<T, 2, 1>; 00366 using Vector3T = Eigen::Matrix<T, 3, 1>; 00367 using Vector4T = Eigen::Matrix<T, 4, 1>; 00368 using VectorXT = Eigen::Matrix<T, Eigen::Dynamic, 1>; 00369 using Matrix3T = Eigen::Matrix<T, 3, 3>; 00370 using Matrix34T = Eigen::Matrix<T, 3, 4>; 00371 using Matrix4T = Eigen::Matrix<T, 4, 4>; 00372 using MatrixXT = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>; 00373 using Point = Coordinate<T>; 00374 00375 PinholeCameraModel(); 00376 virtual ~PinholeCameraModel(); 00377 00378 // Getters 00379 00380 Vector3T getCameraPosition() const; 00381 Matrix3T getCameraOrientation() const; 00382 00383 Matrix4T getExtrinsicParameters() const; 00384 Matrix34T getIntrinsicParameters() const; 00385 Matrix34T getCameraParameters() const; 00386 00387 std::tuple<T, T> getFocalLengths() const; 00388 std::tuple<T, T> getImageCenter() const; 00389 T getSkew() const; 00390 00391 // Setters 00392 00393 void setCameraPosition(const Vector3T & position); 00394 void setCameraOrientation(const Matrix3T & orientation); 00395 00396 void setExtrinsicParameters(const Matrix4T & parameters); 00397 void setIntrinsicParameters(const Matrix34T & parameters); 00398 00399 void setFocalLengths(T f_x, T f_y); 00400 void setImageCenter(T c_x, T c_y); 00401 void setSkew(T skew); 00402 00403 // Other methods 00404 00405 Vector2T operator*(const Vector3T & point) const; 00406 Vector2T transform(const Vector3T & point) const; 00407 00408 T estimate(const Range<Vector3T> & points_world, const Range<Vector2T> & points_display); 00409 00410 #ifdef CPPOPTLIB_FOUND 00411 T estimateWithConstraints(const Range<Vector3T> & points_world, const Range<Vector2T> & points_display, Constraints constraints = SAME_FOCAL_LENGTHS_AND_NO_SKEW, bool initialize = true, bool constrained_decomposition = true); 00412 #endif // CPPOPTLIB_FOUND 00413 00414 private: 00415 Matrix4T T_pose = Matrix4T::Identity(); 00416 Matrix34T T_proj = Matrix34T::Identity(); 00417 }; 00418 00419 00420 template <class T> 00421 PinholeCameraModel<T>::PinholeCameraModel() 00422 { 00423 } 00424 00425 00426 template <class T> 00427 PinholeCameraModel<T>::~PinholeCameraModel() 00428 { 00429 } 00430 00431 00495 template <class T> 00496 T PinholeCameraModel<T>::estimate(const Range<Vector3T> & points_world, const Range<Vector2T> & points_display) 00497 { 00498 /* 00499 00500 Three different implementations were tested. First variant 1 as described in [4] which 00501 worked fine for noise-free data but was very unstable for noisy data---especially w.r.t. 00502 the decomposition. Both variants 2 and 3 were described in [5] and are much more stable 00503 in the presence of noise. Also the RMSE is much lower and the decomposition yields 00504 reasonable values. Variant 3 is possibly numerically slightly less stable than variant 2 00505 but is much faster and thus the final choice. 00506 00507 All three implementation are kept in this header file for documentation purposes. 00508 00509 00510 References 00511 ========== 00512 00513 [1] Hartley and Zisserman, "Multiple view geometry in computer vision", 2003 00514 00515 [2] Szeliski, "Computer Vision - Algorithms and Applications", 2011, Springer 00516 00517 [3] Sutherland, "Three-Dimensional Data Input by Tablet", 1974, IEEE 00518 00519 [4] Tuceryan et al., "Single point active alignment method (SPAAM) for optical 00520 see-through HMD calibration for AR", 2000, IEEE 00521 00522 [5] Carl Olsson, Computer Vision, "Lecture 3: Camera Calibration, DLT, SVD", 2014, 00523 http://www.maths.lth.se/matematiklth/personal/calle/datorseende14/notes/forelas3.pdf 00524 00525 */ 00526 00527 using namespace std; 00528 using namespace Eigen; 00529 00530 // Check for assertions. 00531 00532 int number_of_points = (int) points_world.size(); 00533 00534 if (points_world.size() != points_display.size()) 00535 { 00536 ErrorObj error; 00537 error.setClassName("PinholeCameraModel<T>"); 00538 error.setFunctionName("estimate"); 00539 error.setErrorMessage("Set sizes do not match."); 00540 error.setErrorCode(UNEQUAL_CARDINALITY_OF_INPUT_SETS); 00541 throw error; 00542 } 00543 00544 if (number_of_points < 6) 00545 { 00546 ErrorObj error; 00547 error.setClassName("PinholeCameraModel<T>"); 00548 error.setFunctionName("estimate"); 00549 error.setErrorMessage("Not enough points (6) to estimate the camera parameters."); 00550 error.setErrorCode(NOT_ENOUGH_INPUT_DATA); 00551 throw error; 00552 } 00553 00554 enum Variants 00555 { 00556 VARIANT1, // Tuceryan (or Sutherland but formulated as a null-space problem) 00557 VARIANT2, // Olsson (estimation of scaling factors) 00558 VARIANT3 // Olsson (cross product) 00559 } variant = VARIANT3; 00560 00561 Matrix34T A; // camera matrix to be estimated by one of the following variants and then to be decomposed 00562 00563 switch(variant) 00564 { 00565 case VARIANT1: 00566 { 00567 /* 00568 00569 This implementation is based on the work of Tuceryan et al. [4]. The QR decomposition 00570 is not mentioned in the paper, but solving for the equations manually as described in 00571 [5] yields the exact same results (this was thoroughly tested). Note, the parameter 00572 names and variable names do slightly vary from this article. The algorithm is 00573 essentially the same as the direct linear transformation algorithm as described in [3]. 00574 00575 00576 Algorithm 00577 ========= 00578 00579 Given: N corresponding point pairs (x'_i, y'_i) and (x_i, y_i, z_i) 00580 00581 The projection can be described with the following relation 00582 00583 p' ~ T_proj * T_pose * p = A * p (where A = [a_ij]) 00584 00585 or written-out 00586 00587 | x' | | u | | fx tau cx 0 | | r11 r12 r13 t1 | | x | 00588 | y' | ~ | v | = | 0 fy cy 0 | * | r21 r22 r23 t2 | * | y | (1) 00589 | 1 | | w | | 0 0 1 0 | | r31 r32 r33 t3 | | z | 00590 | 0 0 0 1 | | 1 |. 00591 00592 The resulting vector (x', y', 1) equals the right side of the relation up 00593 to a scaling factor. Due to the normalization properties of homogeneous 00594 coordinates it holds that 00595 00596 x' = u / w (2) 00597 y' = v / w. 00598 00599 Rearranging equation (2) and inserting equation (1) into it yields 00600 00601 u = x' * w <==> a11 * x + a12 * y + a13 * z + a14 = x' * (a31 * x + a32 * y + a33 * z + a34) (3) 00602 v = y' * w <==> a21 * x + a22 * y + a23 * z + a24 = y' * (a31 * x + a32 * y + a33 * z + a34) 00603 00604 By defining a vector c := (a11, a12, ..., a34)^T with the sough coefficients 00605 equation (3) can again be rewritten to 00606 00607 B * c = 0 00608 00609 where 00610 00611 B = | x y z 1 0 0 0 0 -x'x -x'y -x'z -x' | (4) 00612 | 0 0 0 0 x y z 1 -y'x -y'y -y'z -y' |. 00613 00614 Actually, B must be formed from all measurements B = [B_1^T, B_2^T, ..., B_N^T]^T. 00615 Here, the indices were dropped for ease in notation. 00616 00617 Now, the sought parameters can be found as the null-space of B. For this, a 00618 Lagrange function L := || B * c ||^2 - lambda (|| c ||^2 - 1) is defined and 00619 minimized. This can be justified with the fact that projection matrices (and 00620 thus also the vector c) may be arbitrarily scaled without changing their 00621 properties. Hence, c might be constrained to have a magnitude of 1 to avoid the 00622 trivial solution. This leads to an eigenvalue problem which can be solved in a 00623 numerically more stable way using a singular value decomposition (SVD). Then, c 00624 is the singular vector associated with the smallest singular value of V with 00625 B = U * S * V^T. 00626 00627 Finally, the matrix A must be decomposed into the projection matrix T_proj and 00628 the pose matrix T_pose. Using block matrices equation (1) can also be written as 00629 00630 | U 0 | * | R t | = | UR Ut | = A (5) 00631 | 0^T 1 | 00632 00633 where U is the left part of T_proj (which is an upper triangular 3-by-3 matrix) 00634 and where R is the upper left 3-by-3 sub-matrix of T_pose which is orthogonal; t 00635 is the 3d translation vector of T_pose. That is, U * R is the left 3-by-3 matrix 00636 of A and a product of an upper triangular matrix and an orthogonal matrix. This 00637 can be factorized with the RQ algorithm (similar to the QR algorithm). The 00638 remaining variables can then be easily computed. 00639 00640 Note, not every implementation of the QR / RQ algorithm returns a unique upper 00641 triangular matrix. The above implementation does this by returning a triangular 00642 matrix whose diagonals are always positive. This is in accordance with the 00643 projection matrix of the pinhole camera model where both focal lengths are 00644 assumed to be positive. 00645 00646 Note, U may be arbitrarily scaled and should be normalized in such a way that 00647 the coefficient u33 is equal to 1 (as is the case in the above definition of 00648 the projection matrix). 00649 00650 */ 00651 00652 // Form the matrix B. 00653 00654 MatrixXT B; 00655 B.resize(2 * number_of_points, 12); 00656 int i = 0; 00657 00658 for (auto const & p_W : points_world) 00659 { 00660 auto & x = p_W.x(); 00661 auto & y = p_W.y(); 00662 auto & z = p_W.z(); 00663 00664 auto const & p_D = points_display.currentItem(); 00665 auto & x_dash = p_D.x(); 00666 auto & y_dash = p_D.y(); 00667 points_display.next(); 00668 00669 B(i, 0) = x; 00670 B(i, 1) = y; 00671 B(i, 2) = z; 00672 B(i, 3) = 1; 00673 B(i, 4) = 0; 00674 B(i, 5) = 0; 00675 B(i, 6) = 0; 00676 B(i, 7) = 0; 00677 B(i, 8) = -x_dash * x; 00678 B(i, 9) = -x_dash * y; 00679 B(i, 10) = -x_dash * z; 00680 B(i, 11) = -x_dash; 00681 00682 B(i+1, 0) = 0; 00683 B(i+1, 1) = 0; 00684 B(i+1, 2) = 0; 00685 B(i+1, 3) = 0; 00686 B(i+1, 4) = x; 00687 B(i+1, 5) = y; 00688 B(i+1, 6) = z; 00689 B(i+1, 7) = 1; 00690 B(i+1, 8) = -y_dash * x; 00691 B(i+1, 9) = -y_dash * y; 00692 B(i+1, 10) = -y_dash * z; 00693 B(i+1, 11) = -y_dash; 00694 00695 i = i + 2; 00696 } 00697 00698 // Compute the parameter vector c and rearrange c to A. 00699 00700 JacobiSVD<MatrixXT> svd(B, ComputeThinV); 00701 VectorXT c = svd.matrixV().col(svd.matrixV().cols() - 1); 00702 A = Map<Matrix<T, 3, 4, RowMajor>>(c.data()); 00703 00704 break; 00705 } 00706 00707 case VARIANT2: // estimate projection matrix and scale factors 00708 { 00709 /* 00710 00711 This implementation is based on the computer vision lecture notes of Carl Olsson [5]. 00712 The RQ decomposition differs from the description found in the lecture notes, but 00713 solving for the equations manually as described in [5] yields the exact same results 00714 (this was thoroughly tested). Note, the parameter names and variable names may vary 00715 from the reference. 00716 00717 00718 Algorithm 00719 ========= 00720 00721 Given: N corresponding point pairs (x'_i, y'_i) and (x_i, y_i, z_i) 00722 00723 The projection can be described with the following relation 00724 00725 p'_i ~ T_proj * T_pose * p_i = A * p_i (where A = [a_kl]) 00726 00727 or 00728 00729 lambda_i * p'_i = A * p_i (1) 00730 00731 using an unknown scaling factor lambda_i. With 00732 00733 | A1^T | | x'_i | 00734 A = | A2^T | and p'_i = | y'_i | 00735 | A3^T | | z'_i | 00736 00737 equation (1) can be rewritten as 00738 00739 p_i^T * A1 - lambda_i * x'_i = 0 00740 p_i^T * A2 - lambda_i * y'_i = 0 00741 p_i^T * A3 - lambda_i * z'_i = 0 00742 00743 or in matrix notation (z'_i = 1 due to the normalization) 00744 00745 | p_i^T 0 0 -x'_i | | A1 | | 0 | 00746 | 0 p_i^T 0 -y'_i | * | A2 | = | 0 | 00747 | 0 0 p_i^T -1 | | A3 | | 0 |. 00748 | lambda_i | 00749 00750 For multiple entries we then have 00751 00752 | p_1^T 0 0 -x'_1 0 0 ... | 00753 | 0 p_1^T 0 -y'_1 0 0 ... | | A1 | 00754 | 0 0 p_1^T -1 0 0 ... | | A2 | 00755 | p_2^T 0 0 0 -x'_2 0 ... | | A3 | 00756 | 0 p_2^T 0 0 -y'_2 0 ... | * | lambda_1 | = 0. 00757 | 0 0 p_2^T 0 -1 0 ... | | lambda_2 | 00758 | p_3^T 0 0 0 0 -x'_3 ... | | lambda_3 | 00759 | 0 p_3^T 0 0 0 -y'_3 ... | | ... | 00760 | 0 0 p_3^T 0 0 -1 ... | 00761 | ... ... ... ... ... ... ... | 00762 00763 Or 00764 00765 M * c = 0. 00766 00767 Now, the sought parameters can be found as the null-space of M. For this, a 00768 Lagrange function L := || M * c ||^2 - lambda (|| c ||^2 - 1) is defined and 00769 minimized. This can be justified with the fact that projection matrices (and 00770 thus also the vector c) may be arbitrarily scaled without changing their 00771 properties. Hence, c might be constrained to have a magnitude of 1 to avoid the 00772 trivial solution. This leads to an eigenvalue problem which can be solved in a 00773 numerically more stable way using a singular value decomposition (SVD). Then, c 00774 is the singular vector associated with the smallest singular value of V with 00775 B = U * S * V^T. 00776 00777 Finally, the matrix A must be decomposed into the projection matrix T_proj and 00778 the pose matrix T_pose. Using block matrices equation (1) can also be written as 00779 00780 | U 0 | * | R t | = | UR Ut | = A (5) 00781 | 0^T 1 | 00782 00783 where U is the left part of T_proj (which is an upper triangular 3-by-3 matrix) 00784 and where R is the upper left 3-by-3 sub-matrix of T_pose which is orthogonal; t 00785 is the 3d translation vector of T_pose. That is, U * R is the left 3-by-3 matrix 00786 of A and a product of an upper triangular matrix and an orthogonal matrix. This 00787 can be factorized with the RQ algorithm (similar to the QR algorithm). The 00788 remaining variables can then be easily computed. 00789 00790 Note, not every implementation of the QR / RQ algorithm returns a unique upper 00791 triangular matrix. The above implementation does this by returning a triangular 00792 matrix whose diagonals are always positive. This is in accordance with the 00793 projection matrix of the pinhole camera model where both focal lengths are 00794 assumed to be positive. 00795 00796 Note, U may be arbitrarily scaled and should be normalized in such a way that 00797 the coefficient u33 is equal to 1 (as is the case in the above definition of 00798 the projection matrix). 00799 00800 */ 00801 00802 // Form the matrix M. 00803 00804 MatrixXT M = MatrixXT::Zero(3 * number_of_points, 12 + number_of_points); 00805 int i = 0; 00806 00807 for (auto const & p_W : points_world) 00808 { 00809 auto const & p_D = points_display.currentItem(); 00810 auto & x = p_D.x(); 00811 auto & y = p_D.y(); 00812 points_display.next(); 00813 00814 M.block<1, 4>(3 * i + 0, 0) = p_W.homogeneous(); 00815 M.block<1, 4>(3 * i + 1, 4) = p_W.homogeneous(); 00816 M.block<1, 4>(3 * i + 2, 8) = p_W.homogeneous(); 00817 00818 M(3 * i + 0, 12 + i) = -x; 00819 M(3 * i + 1, 12 + i) = -y; 00820 M(3 * i + 2, 12 + i) = -1; 00821 00822 ++i; 00823 } 00824 00825 // Compute the parameter vector c and rearrange c to A. 00826 00827 BDCSVD<MatrixXT> svd(M, ComputeThinV); 00828 VectorXT c = svd.matrixV().col(svd.matrixV().cols() - 1); 00829 A = Map<Matrix<T, 3, 4, RowMajor>>(c.data()); 00830 00831 break; 00832 } 00833 00834 case VARIANT3: // cross-product 00835 { 00836 /* 00837 00838 This implementation is based on the computer vision lecture notes of Carl Olsson [5]. 00839 The RQ decomposition differs from the description found in the lecture notes, but 00840 solving for the equations manually as described in [5] yields the exact same results 00841 (this was thoroughly tested). Note, the parameter names and variable names may vary 00842 from the reference. 00843 00844 00845 Algorithm 00846 ========= 00847 00848 Given: N corresponding point pairs (x'_i, y'_i) and (x_i, y_i, z_i) 00849 00850 The projection can be described with the following relation 00851 00852 p'_i ~ T_proj * T_pose * p_i = A * p_i (where A = [a_kl]) 00853 00854 or 00855 00856 lambda_i * p'_i = A * p_i (1) 00857 00858 using an unknown scaling factor lambda_i. Stated in other words, the vectors p'_i and 00859 A * p_i are parallel to each other. This can be stated more formally using the cross 00860 product 00861 00862 p'_i x A * p_i = 0. (2) 00863 00864 By setting 00865 00866 | A1^T | | x'_i | 00867 A = | A2^T | and p'_i = | y'_i | 00868 | A3^T | | z'_i | 00869 00870 equation (2) can be rewritten as 00871 00872 | y'_i * A3^T * p_i - z'_i * A2^T * p_i | 00873 | z'_i * A1^T * p_i - x'_i * A3^T * p_i | = 0. 00874 | x'_i * A2^T * p_i - y'_i * A1^T * p_i | 00875 00876 With z_i = 1 it can again be rewritten as 00877 00878 | 0 -1 * p_i y' * p_i | | A1 | 00879 | 1 * p_i 0 -x' * p_i | * | A2 | = 0 00880 | -y' * p_i x' * p_i 0 | | A3 | 00881 00882 or 00883 00884 M_i * c = 0. 00885 00886 For multiple entries we then have 00887 00888 | M1 | 00889 M = | M2 |. 00890 | M3 | 00891 | ... | 00892 00893 Now, the sought parameters can be found as the null-space of M. For this, a 00894 Lagrange function L := || M * c ||^2 - lambda (|| c ||^2 - 1) is defined and 00895 minimized. This can be justified with the fact that projection matrices (and 00896 thus also the vector c) may be arbitrarily scaled without changing their 00897 properties. Hence, c might be constrained to have a magnitude of 1 to avoid the 00898 trivial solution. This leads to an eigenvalue problem which can be solved in a 00899 numerically more stable way using a singular value decomposition (SVD). Then, c 00900 is the singular vector associated with the smallest singular value of V with 00901 B = U * S * V^T. 00902 00903 Finally, the matrix A must be decomposed into the projection matrix T_proj and 00904 the pose matrix T_pose. Using block matrices equation (1) can also be written as 00905 00906 | U 0 | * | R t | = | UR Ut | = A (5) 00907 | 0^T 1 | 00908 00909 where U is the left part of T_proj (which is an upper triangular 3-by-3 matrix) 00910 and where R is the upper left 3-by-3 sub-matrix of T_pose which is orthogonal; t 00911 is the 3d translation vector of T_pose. That is, U * R is the left 3-by-3 matrix 00912 of A and a product of an upper triangular matrix and an orthogonal matrix. This 00913 can be factorized with the RQ algorithm (similar to the QR algorithm). The 00914 remaining variables can then be easily computed. 00915 00916 Note, not every implementation of the QR / RQ algorithm returns a unique upper 00917 triangular matrix. The above implementation does this by returning a triangular 00918 matrix whose diagonals are always positive. This is in accordance with the 00919 projection matrix of the pinhole camera model where both focal lengths are 00920 assumed to be positive. 00921 00922 Note, U may be arbitrarily scaled and should be normalized in such a way that 00923 the coefficient u33 is equal to 1 (as is the case in the above definition of 00924 the projection matrix). 00925 00926 */ 00927 00928 // Form the matrix M. 00929 00930 MatrixXT M = MatrixXT(3 * number_of_points, 12); 00931 int i = 0; 00932 00933 for (auto const & p_W : points_world) 00934 { 00935 auto const & p_D = points_display.currentItem(); 00936 auto & x = p_D.x(); 00937 auto & y = p_D.y(); 00938 points_display.next(); 00939 00940 M.block<1, 4>(3 * i + 0, 0).fill(0); 00941 M.block<1, 4>(3 * i + 0, 4) = -1 * p_W.homogeneous(); 00942 M.block<1, 4>(3 * i + 0, 8) = y * p_W.homogeneous(); 00943 00944 M.block<1, 4>(3 * i + 1, 0) = 1 * p_W.homogeneous(); 00945 M.block<1, 4>(3 * i + 1, 4).fill(0); 00946 M.block<1, 4>(3 * i + 1, 8) = -x * p_W.homogeneous(); 00947 00948 M.block<1, 4>(3 * i + 2, 0) = -y * p_W.homogeneous(); 00949 M.block<1, 4>(3 * i + 2, 4) = x * p_W.homogeneous(); 00950 M.block<1, 4>(3 * i + 2, 8).fill(0); 00951 00952 ++i; 00953 } 00954 00955 // Compute the parameter vector c and rearrange c to A. 00956 00957 JacobiSVD<MatrixXT> svd(M, ComputeThinV); 00958 VectorXT c = svd.matrixV().col(svd.matrixV().cols() - 1); 00959 A = Map<Matrix<T, 3, 4, RowMajor>>(c.data()); 00960 00961 break; 00962 } 00963 00964 default: 00965 { 00966 ErrorObj error; 00967 error.setClassName("PinholeCameraModel<T>"); 00968 error.setFunctionName("estimate"); 00969 error.setErrorMessage("Unknown estimation method."); 00970 error.setErrorCode(UNKNOWN_ESTIMATION_METHOD); 00971 throw error; 00972 } 00973 00974 } // switch(method) 00975 00976 // Decompose A and form T_proj and T_pose from U and R according to (5). 00977 00978 auto [U, R] = computeRQDecomposition<T>(A.block<3, 3>(0, 0)); 00979 00980 // T_proj 00981 T_proj.block<3, 3>(0, 0) = U / U(2, 2); 00982 T_proj(0, 3) = 0; 00983 T_proj(1, 3) = 0; 00984 T_proj(2, 3) = 0; 00985 00986 // T_pose 00987 MatrixXT Ut = A.block<3, 1>(0, 3); 00988 MatrixXT t = U.inverse() * Ut; 00989 T_pose.block<3, 3>(0, 0) = R; 00990 T_pose.block<3, 1>(0, 3) = t; 00991 00992 // Compute the root-mean square error. 00993 00994 T sum_of_squared_error = 0; 00995 00996 points_world.first(); 00997 points_display.first(); 00998 00999 while (!points_world.isDone()) 01000 { 01001 auto const & p_W = points_world.currentItem(); 01002 auto const & p_D = points_display.currentItem(); 01003 01004 sum_of_squared_error += (this->transform(p_W) - p_D).squaredNorm(); 01005 01006 points_world.next(); 01007 points_display.next(); 01008 } 01009 01010 using std::sqrt; 01011 return ::sqrt(sum_of_squared_error / number_of_points); 01012 } 01013 01014 01015 #ifdef CPPOPTLIB_FOUND 01016 01116 template <class T> 01117 T PinholeCameraModel<T>::estimateWithConstraints(const Range<Vector3T> & points_world, 01118 const Range<Vector2T> & points_display, 01119 Constraints constraints, 01120 bool initialize, 01121 bool constrained_decomposition) 01122 { 01123 using namespace std; 01124 using namespace Eigen; 01125 using namespace TRTK::Tools; 01126 01127 T rmse = std::numeric_limits<T>::max(); 01128 01129 if (initialize) 01130 { 01131 // Estimate the camera parameters. 01132 T rmse = estimate(points_world, points_display); 01133 } 01134 01135 // Store the input point pairs in two matrices where each column represents one point. 01136 // This makes the computation of the objective function much easier and faster (see below). 01137 01138 int n = (int) points_world.size(); 01139 MatrixXT P_world(4, n); 01140 MatrixXT P_display(2, n); 01141 auto iter_world = points_world.begin(); 01142 auto iter_display = points_display.begin(); 01143 for (int i = 0; i < n; ++i) 01144 { 01145 P_world.col(i) = (*(iter_world++)).homogeneous(); 01146 P_display.col(i) = *(iter_display++); 01147 } 01148 01149 switch (constraints) 01150 { 01151 case NO_SKEW: 01152 { 01153 // First decompose the camera matrix into the projection matrix and the pose matrix, respectively. 01154 01155 VectorXT optimization_parameters; 01156 optimization_parameters.resize(10); 01157 01158 if (constrained_decomposition) 01159 { 01160 /* 01161 01162 Decompose T_camera into T_proj and T_pose. While doing so, assume that tau = 0. Since, this 01163 assumption does no hold (due to noise, etc.), the below equation only provide some 01164 rough estimates which are then used as the initial values for the non-linear optimization. 01165 01166 01167 | T11 T12 T13 T14 | | fx tau cx 0 | | R t | 01168 | T21 T22 T23 T24 | = | 0 fy cy 0 | * | 0 1 | 01169 | T31 T32 T33 T34 | | 0 0 1 0 | 01170 01171 | T11 T12 T13 | | T1^T | | fx 0 cx | | R1^T | 01172 ==> | T21 T22 T23 | = | T2^T | = | 0 fy cy | * | R2^T | (*) 01173 | T31 T32 T33 | | T3^T | | 0 0 1 | | R3^T | 01174 01175 fx * R1 + cx * R3 = T1 01176 ==> fy * R2 + cy * R3 = T2 01177 R3 = T3 01178 01179 Normalize T with respect to the last row: T = T / || T3 ||. 01180 01181 With some easy algebraic manipulations we have (R1, R2, and R3 are orthogonal to each other) 01182 01183 cx = T3^T * T1 01184 cy = T3^T * T2 01185 01186 fx * R1 = T1 - cx * T3 01187 ==> fx = || T1 - cx * T3 || 01188 ==> R1 = (T1 - cx * T3) / fx 01189 01190 fy * R2 = T2 - cy * T3 01191 ==> fy = || T2 - cy * T3 || 01192 ==> R2 = (T2 - cy * T3) / fy 01193 01194 R = [R1, R2, R3]^T is not necessarily orthogonal, thus a non-linear optimization has to follow. 01195 01196 */ 01197 01198 auto T_camera = getCameraParameters(); 01199 01200 T_camera /= T_camera.block<1, 3>(2, 0).norm(); // the last row vector shall be a unit vector 01201 Vector3T T1 = T_camera.block<1, 3>(0, 0); 01202 Vector3T T2 = T_camera.block<1, 3>(1, 0); 01203 Vector3T T3 = T_camera.block<1, 3>(2, 0); 01204 Vector3T R3 = T3; 01205 T cx = T3.transpose() * T1; 01206 T cy = T3.transpose() * T2; 01207 01208 T fx = (T1 - cx * T3).norm(); 01209 Vector3T R1 = (T1 - cx * T3) / fx; 01210 01211 T fy = (T2 - cy * T3).norm(); 01212 Vector3T R2 = (T2 - cy * T3) / fy; 01213 01214 Matrix3T R; 01215 R.block<1, 3>(0, 0) = R1; 01216 R.block<1, 3>(1, 0) = R2; 01217 R.block<1, 3>(2, 0) = R3; 01218 01219 // Orthogonalize R (guarantee right-handedness). 01220 01221 auto svd = JacobiSVD<Matrix3T>(R, ComputeFullU | ComputeFullV); 01222 R = svd.matrixU() * svd.matrixV().transpose(); 01223 if (R.determinant() < 0) // left-handed? 01224 { 01225 MatrixXT D = MatrixXT::Identity(3, 3); 01226 D(2, 2) = -1; 01227 R = svd.matrixU() * D * svd.matrixV().transpose(); 01228 } 01229 01230 // Now, do the non-linear optimization. The RMSE is computed over the function (*) 01231 // where the rotation matrix is constructed from an axis-angle representation; the 01232 // unit axis is represented in spherical coordinates with only two angles/parameters. 01233 // Constructing a rotation matrix has the advantage that the orthogonality constraint 01234 // is always fulfilled. 01235 01236 auto [axis, rotation_angle] = axisAngleFromRotationMatrix(R); 01237 auto [__, axis_theta, axis_phi] = cartesian2Spherical(axis.x(), axis.y(), axis.z()); 01238 01239 optimization_parameters(0) = fx; 01240 optimization_parameters(1) = fy; 01241 optimization_parameters(2) = cx; 01242 optimization_parameters(3) = cy; 01243 optimization_parameters(4) = axis_theta; 01244 optimization_parameters(5) = axis_phi; 01245 optimization_parameters(6) = rotation_angle; 01246 optimization_parameters(7) = this->getCameraPosition().x(); 01247 optimization_parameters(8) = this->getCameraPosition().y(); 01248 optimization_parameters(9) = this->getCameraPosition().z(); 01249 } 01250 else 01251 { 01252 Matrix3T R = T_pose.block<3, 3>(0, 0); 01253 auto [axis, rotation_angle] = axisAngleFromRotationMatrix(R); 01254 auto [__, axis_theta, axis_phi] = cartesian2Spherical(axis.x(), axis.y(), axis.z()); 01255 01256 optimization_parameters(0) = T_proj(0, 0); // fx 01257 optimization_parameters(1) = T_proj(1, 1); // fy 01258 optimization_parameters(2) = T_proj(0, 2); // cx 01259 optimization_parameters(3) = T_proj(1, 2); // cy 01260 optimization_parameters(4) = axis_theta; 01261 optimization_parameters(5) = axis_phi; 01262 optimization_parameters(6) = rotation_angle; 01263 optimization_parameters(7) = T_pose(0, 3); // tx 01264 optimization_parameters(8) = T_pose(1, 3); // ty 01265 optimization_parameters(9) = T_pose(2, 3); // tz 01266 } 01267 01268 // Now compute the constrained optimization. 01269 01270 class ObjectiveFunction : public cppoptlib::Problem<double> 01271 { 01272 public: 01273 using typename cppoptlib::Problem<double>::Scalar; 01274 using typename cppoptlib::Problem<double>::TVector; 01275 01276 ObjectiveFunction(const MatrixXT & P_world, const MatrixXT & P_display) 01277 { 01278 this->P_world = P_world; 01279 this->P_display = P_display; 01280 } 01281 01282 double value(const TVector & x) 01283 { 01284 T_proj = Matrix34T::Identity(); 01285 T_proj(0, 0) = x(0); 01286 T_proj(1, 1) = x(1); 01287 T_proj(0, 2) = x(2); 01288 T_proj(1, 2) = x(3); 01289 01290 T_pose = Matrix4T::Identity(); 01291 auto axis = spherical2Cartesian(Vector3T(1, x(4), x(5))); 01292 T_pose.block<3, 3>(0, 0) = rotationMatrix(axis, x(6)); 01293 T_pose(0, 3) = x(7); 01294 T_pose(1, 3) = x(8); 01295 T_pose(2, 3) = x(9); 01296 01297 using ::sqrt; 01298 MatrixXT T_camera = T_proj * T_pose; 01299 MatrixXT P_projected = (T_camera * P_world).colwise().hnormalized(); 01300 T rmse = sqrt((P_display - P_projected).colwise().squaredNorm().mean()); 01301 return rmse; 01302 } 01303 01304 public: 01305 MatrixXT P_world; 01306 MatrixXT P_display; 01307 Matrix34T T_proj; 01308 Matrix4T T_pose; 01309 }; 01310 01311 auto func = ObjectiveFunction(P_world, P_display); 01312 cppoptlib::BfgsSolver<ObjectiveFunction> solver; 01313 solver.minimize(func, optimization_parameters); 01314 T_proj = func.T_proj; 01315 T_pose = func.T_pose; 01316 rmse = func.value(optimization_parameters); 01317 01318 break; 01319 } 01320 01321 case SAME_FOCAL_LENGTHS: 01322 { 01323 // First decompose the camera matrix into the projection matrix and the pose matrix, respectively. 01324 01325 VectorXT optimization_parameters; 01326 optimization_parameters.resize(10); 01327 01328 if (constrained_decomposition) 01329 { 01330 /* 01331 01332 Decompose T_camera into T_proj and T_pose. While doing so, assume that f := f_x = f_y. 01333 Since, this assumption does no hold (due to noise, etc.), the below equations only provide 01334 some rough estimates which are then used as the initial values for the non-linear optimization. 01335 01336 01337 | T11 T12 T13 T14 | | fx tau cx 0 | | R t | 01338 | T21 T22 T23 T24 | = | 0 fy cy 0 | * | 0 1 | 01339 | T31 T32 T33 T34 | | 0 0 1 0 | 01340 01341 | T11 T12 T13 | | T1^T | | f tau cx | | R1^T | 01342 ==> | T21 T22 T23 | = | T2^T | = | 0 f cy | * | R2^T | (***) 01343 | T31 T32 T33 | | T3^T | | 0 0 1 | | R3^T | 01344 01345 f * R1 + tau * R2 + cx * R3 = T1 01346 ==> f * R2 + cy * R3 = T2 01347 R3 = T3 01348 01349 Normalize T with respect to the last row: T = T / || T3 ||. 01350 01351 With some easy algebraic manipulations we have (R1, R2, and R3 are orthogonal to each other) 01352 01353 cx = T3^T * T1 01354 cy = T3^T * T2 01355 01356 f * R2 = T2 - cy * T3 01357 ==> f = || T2 - cy * T3 || 01358 ==> R2 = (T2 - cy * T3) / f 01359 01360 tau * R2 = T1 - f * R1 - cx * T3 01361 ==> tau = R2^T * (T1 - f * R1 - cx * T3) = R2^T * T1 01362 ==> R1 = T1 - tau * R2 - cx * T3 (also normalize R1..) 01363 01364 R = [R1, R2, R3]^T is not necessarily orthogonal, thus a non-linear optimization is still necessary. 01365 01366 */ 01367 01368 auto T_camera = getCameraParameters(); 01369 01370 T_camera /= T_camera.block<1, 3>(2, 0).norm(); // the last row vector shall be a unit vector 01371 Vector3T T1 = T_camera.block<1, 3>(0, 0); 01372 Vector3T T2 = T_camera.block<1, 3>(1, 0); 01373 Vector3T T3 = T_camera.block<1, 3>(2, 0); 01374 Vector3T R3 = T3; 01375 T cx = T3.transpose() * T1; 01376 T cy = T3.transpose() * T2; 01377 01378 T f = (T2 - cy * T3).norm(); 01379 Vector3T R2 = (T2 - cy * T3) / f; 01380 01381 T tau = R2.transpose() * T1; 01382 Vector3T R1 = (T1 - tau * R2 - cx * T3).normalized(); 01383 01384 Matrix3T R; 01385 R.block<1, 3>(0, 0) = R1; 01386 R.block<1, 3>(1, 0) = R2; 01387 R.block<1, 3>(2, 0) = R3; 01388 01389 // Orthogonalize R (guarantee right-handedness). 01390 01391 auto svd = JacobiSVD<Matrix3T>(R, ComputeFullU | ComputeFullV); 01392 R = svd.matrixU() * svd.matrixV().transpose(); 01393 if (R.determinant() < 0) // left-handed? 01394 { 01395 MatrixXT D = MatrixXT::Identity(3, 3); 01396 D(2, 2) = -1; 01397 R = svd.matrixU() * D * svd.matrixV().transpose(); 01398 } 01399 01400 // Now, do the non-linear optimization. The RMSE is computed over the function (***) 01401 // where the rotation matrix is constructed from an axis-angle representation; the 01402 // unit axis is represented in spherical coordinates with only two angles/parameters. 01403 // Constructing a rotation matrix has the advantage that the orthogonality constraint 01404 // is always fulfilled. 01405 01406 auto [axis, rotation_angle] = axisAngleFromRotationMatrix(R); 01407 auto [__, axis_theta, axis_phi] = cartesian2Spherical(axis.x(), axis.y(), axis.z()); 01408 01409 optimization_parameters(0) = f; 01410 optimization_parameters(1) = tau; 01411 optimization_parameters(2) = cx; 01412 optimization_parameters(3) = cy; 01413 optimization_parameters(4) = axis_theta; 01414 optimization_parameters(5) = axis_phi; 01415 optimization_parameters(6) = rotation_angle; 01416 optimization_parameters(7) = this->getCameraPosition().x(); 01417 optimization_parameters(8) = this->getCameraPosition().y(); 01418 optimization_parameters(9) = this->getCameraPosition().z(); 01419 } 01420 else 01421 { 01422 Matrix3T R = T_pose.block<3, 3>(0, 0); 01423 auto [axis, rotation_angle] = axisAngleFromRotationMatrix(R); 01424 auto [__, axis_theta, axis_phi] = cartesian2Spherical(axis.x(), axis.y(), axis.z()); 01425 01426 optimization_parameters(0) = 0.5 * (T_proj(0, 0) + T_proj(1, 1)); // f = (fx + fy) / 2 01427 optimization_parameters(1) = T_proj(0, 1); // tau 01428 optimization_parameters(2) = T_proj(0, 2); // cx 01429 optimization_parameters(3) = T_proj(1, 2); // cy 01430 optimization_parameters(4) = axis_theta; 01431 optimization_parameters(5) = axis_phi; 01432 optimization_parameters(6) = rotation_angle; 01433 optimization_parameters(7) = T_pose(0, 3); // tx 01434 optimization_parameters(8) = T_pose(1, 3); // ty 01435 optimization_parameters(9) = T_pose(2, 3); // tz 01436 } 01437 01438 // Now compute the constrained optimization. 01439 01440 class ObjectiveFunction : public cppoptlib::Problem<double> 01441 { 01442 public: 01443 using typename cppoptlib::Problem<double>::Scalar; 01444 using typename cppoptlib::Problem<double>::TVector; 01445 01446 ObjectiveFunction(const MatrixXT & P_world, const MatrixXT & P_display) 01447 { 01448 this->P_world = P_world; 01449 this->P_display = P_display; 01450 } 01451 01452 double value(const TVector & x) 01453 { 01454 T_proj = Matrix34T::Identity(); 01455 T_proj(0, 0) = x(0); 01456 T_proj(0, 1) = x(1); 01457 T_proj(0, 2) = x(2); 01458 T_proj(1, 1) = x(0); 01459 T_proj(1, 2) = x(3); 01460 01461 T_pose = Matrix4T::Identity(); 01462 auto axis = spherical2Cartesian(Vector3T(1, x(4), x(5))); 01463 T_pose.block<3, 3>(0, 0) = rotationMatrix(axis, x(6)); 01464 T_pose(0, 3) = x(7); 01465 T_pose(1, 3) = x(8); 01466 T_pose(2, 3) = x(9); 01467 01468 using ::sqrt; 01469 MatrixXT T_camera = T_proj * T_pose; 01470 MatrixXT P_projected = (T_camera * P_world).colwise().hnormalized(); 01471 T rmse = sqrt((P_display - P_projected).colwise().squaredNorm().mean()); 01472 return rmse; 01473 } 01474 01475 public: 01476 MatrixXT P_world; 01477 MatrixXT P_display; 01478 Matrix34T T_proj; 01479 Matrix4T T_pose; 01480 }; 01481 01482 auto func = ObjectiveFunction(P_world, P_display); 01483 cppoptlib::BfgsSolver<ObjectiveFunction> solver; 01484 solver.minimize(func, optimization_parameters); 01485 T_proj = func.T_proj; 01486 T_pose = func.T_pose; 01487 rmse = func.value(optimization_parameters); 01488 01489 break; 01490 } 01491 01492 case SAME_FOCAL_LENGTHS_AND_NO_SKEW: 01493 { 01494 // First decompose the camera matrix into the projection matrix and the pose matrix, respectively. 01495 01496 VectorXT optimization_parameters; 01497 optimization_parameters.resize(9); 01498 01499 if (constrained_decomposition) 01500 { 01501 /* 01502 01503 Decompose T_camera into T_proj and T_pose. While doing so, assume that f := f_x = f_y and tau = 0. 01504 Since, these assumptions do no hold (due to noise, etc.), the below equation only provide some 01505 rough estimates which are then used as the initial values for the non-linear optimization. 01506 01507 01508 | T11 T12 T13 T14 | | fx tau cx 0 | | R t | 01509 | T21 T22 T23 T24 | = | 0 fy cy 0 | * | 0 1 | 01510 | T31 T32 T33 T34 | | 0 0 1 0 | 01511 01512 | T11 T12 T13 | | T1^T | | f 0 cx | | R1^T | 01513 ==> | T21 T22 T23 | = | T2^T | = | 0 f cy | * | R2^T | (***) 01514 | T31 T32 T33 | | T3^T | | 0 0 1 | | R3^T | 01515 01516 f * R1 + cx * R3 = T1 01517 ==> f * R2 + cy * R3 = T2 01518 R3 = T3 01519 01520 Normalize T with respect to the last row: T = T / || T3 ||. 01521 01522 With some easy algebraic manipulations we have (R1, R2, and R3 are orthogonal to each other) 01523 01524 cx = T3^T * T1 01525 cy = T3^T * T2 01526 01527 f * R1 = T1 - cx * T3 01528 ==> f = || T1 - cx * T3 || 01529 ==> R1 = (T1 - cx * T3) / f 01530 01531 f * R2 = T2 - cy * T3 01532 ==> f = || T2 - cy * T3 || 01533 ==> R2 = (T2 - cy * T3) / f 01534 01535 Both former computations for determining f may conflict with each other. Thus, we just 01536 average both values of f. Also, R = [R1, R2, R3]^T is not necessarily orthogonal. 01537 01538 */ 01539 01540 auto T_camera = getCameraParameters(); 01541 01542 T_camera /= T_camera.block<1, 3>(2, 0).norm(); // the last row vector shall be a unit vector 01543 Vector3T T1 = T_camera.block<1, 3>(0, 0); 01544 Vector3T T2 = T_camera.block<1, 3>(1, 0); 01545 Vector3T T3 = T_camera.block<1, 3>(2, 0); 01546 Vector3T R3 = T3; 01547 T cx = T3.transpose() * T1; 01548 T cy = T3.transpose() * T2; 01549 01550 T fx = (T1 - cx * T3).norm(); 01551 Vector3T R1 = (T1 - cx * T3) / fx; 01552 01553 T fy = (T2 - cy * T3).norm(); 01554 Vector3T R2 = (T2 - cy * T3) / fy; 01555 01556 T f = (fx + fy) / 2; 01557 01558 Matrix3T R; 01559 R.block<1, 3>(0, 0) = R1; 01560 R.block<1, 3>(1, 0) = R2; 01561 R.block<1, 3>(2, 0) = R3; 01562 01563 // Orthogonalize R (guarantee right-handedness). 01564 01565 auto svd = JacobiSVD<Matrix3T>(R, ComputeFullU | ComputeFullV); 01566 R = svd.matrixU() * svd.matrixV().transpose(); 01567 if (R.determinant() < 0) // left-handed? 01568 { 01569 MatrixXT D = MatrixXT::Identity(3, 3); 01570 D(2, 2) = -1; 01571 R = svd.matrixU() * D * svd.matrixV().transpose(); 01572 } 01573 01574 // Now, do the non-linear optimization. The RMSE is computed over the function (***) 01575 // where the rotation matrix is constructed from an axis-angle representation; the 01576 // unit axis is represented in spherical coordinates with only two angles/parameters. 01577 // Constructing a rotation matrix has the advantage that the orthogonality constraint 01578 // is always fulfilled. 01579 01580 auto [axis, rotation_angle] = axisAngleFromRotationMatrix(R); 01581 auto [__, axis_theta, axis_phi] = cartesian2Spherical(axis.x(), axis.y(), axis.z()); 01582 01583 optimization_parameters(0) = f; 01584 optimization_parameters(1) = cx; 01585 optimization_parameters(2) = cy; 01586 optimization_parameters(3) = axis_theta; 01587 optimization_parameters(4) = axis_phi; 01588 optimization_parameters(5) = rotation_angle; 01589 optimization_parameters(6) = this->getCameraPosition().x(); 01590 optimization_parameters(7) = this->getCameraPosition().y(); 01591 optimization_parameters(8) = this->getCameraPosition().z(); 01592 } 01593 else 01594 { 01595 Matrix3T R = T_pose.block<3, 3>(0, 0); 01596 auto [axis, rotation_angle] = axisAngleFromRotationMatrix(R); 01597 auto [__, axis_theta, axis_phi] = cartesian2Spherical(axis.x(), axis.y(), axis.z()); 01598 01599 optimization_parameters(0) = 0.5 * (T_proj(0, 0) + T_proj(1, 1)); // f = (fx + fy) / 2 01600 optimization_parameters(1) = T_proj(0, 2); // cx 01601 optimization_parameters(2) = T_proj(1, 2); // cy 01602 optimization_parameters(3) = axis_theta; 01603 optimization_parameters(4) = axis_phi; 01604 optimization_parameters(5) = rotation_angle; 01605 optimization_parameters(6) = T_pose(0, 3); // tx 01606 optimization_parameters(7) = T_pose(1, 3); // ty 01607 optimization_parameters(8) = T_pose(2, 3); // tz 01608 } 01609 01610 // Now compute the constrained optimization. 01611 01612 class ObjectiveFunction : public cppoptlib::Problem<double> 01613 { 01614 public: 01615 using typename cppoptlib::Problem<double>::Scalar; 01616 using typename cppoptlib::Problem<double>::TVector; 01617 01618 ObjectiveFunction(const MatrixXT & P_world, const MatrixXT & P_display) 01619 { 01620 this->P_world = P_world; 01621 this->P_display = P_display; 01622 } 01623 01624 double value(const TVector & x) 01625 { 01626 T_proj = Matrix34T::Identity(); 01627 T_proj(0, 0) = x(0); 01628 T_proj(1, 1) = x(0); 01629 T_proj(0, 2) = x(1); 01630 T_proj(1, 2) = x(2); 01631 01632 T_pose = Matrix4T::Identity(); 01633 auto axis = spherical2Cartesian(Vector3T(1, x(3), x(4))); 01634 T_pose.block<3, 3>(0, 0) = rotationMatrix(axis, x(5)); 01635 T_pose(0, 3) = x(6); 01636 T_pose(1, 3) = x(7); 01637 T_pose(2, 3) = x(8); 01638 01639 using ::sqrt; 01640 MatrixXT T_camera = T_proj * T_pose; 01641 MatrixXT P_projected = (T_camera * P_world).colwise().hnormalized(); 01642 T rmse = sqrt((P_display - P_projected).colwise().squaredNorm().mean()); 01643 return rmse; 01644 } 01645 01646 public: 01647 MatrixXT P_world; 01648 MatrixXT P_display; 01649 Matrix34T T_proj; 01650 Matrix4T T_pose; 01651 }; 01652 01653 auto func = ObjectiveFunction(P_world, P_display); 01654 cppoptlib::BfgsSolver<ObjectiveFunction> solver; 01655 solver.minimize(func, optimization_parameters); 01656 T_proj = func.T_proj; 01657 T_pose = func.T_pose; 01658 rmse = func.value(optimization_parameters); 01659 01660 break; 01661 } 01662 01663 default: 01664 { 01665 ErrorObj error; 01666 error.setClassName("PinholeCameraModel<T>"); 01667 error.setFunctionName("estimate"); 01668 error.setErrorMessage("Unknown estimation method."); 01669 error.setErrorCode(UNKNOWN_ESTIMATION_METHOD); 01670 throw error; 01671 } 01672 01673 } // switch(method) 01674 01675 01676 return rmse; 01677 } 01678 01679 #endif // CPPOPTLIB_FOUND 01680 01681 01682 template <class T> 01683 typename PinholeCameraModel<T>::Matrix34T PinholeCameraModel<T>::getCameraParameters() const 01684 { 01685 return T_proj * T_pose; 01686 } 01687 01688 01689 template <class T> 01690 typename PinholeCameraModel<T>::Matrix3T PinholeCameraModel<T>::getCameraOrientation() const 01691 { 01692 return T_pose.block<3, 3>(0, 0); 01693 } 01694 01695 01696 template <class T> 01697 typename PinholeCameraModel<T>::Vector3T PinholeCameraModel<T>::getCameraPosition() const 01698 { 01699 return T_pose.block<3, 1>(0, 3); 01700 } 01701 01702 01703 template <class T> 01704 typename PinholeCameraModel<T>::Matrix4T PinholeCameraModel<T>::getExtrinsicParameters() const 01705 { 01706 return T_pose; 01707 } 01708 01709 01710 template <class T> 01711 std::tuple<T, T> PinholeCameraModel<T>::getFocalLengths() const 01712 { 01713 return std::make_tuple(T_proj(0, 0), T_proj(1, 1)); 01714 } 01715 01716 01717 template <class T> 01718 std::tuple<T, T> PinholeCameraModel<T>::getImageCenter() const 01719 { 01720 return std::make_tuple(T_proj(0, 2), T_proj(1, 2)); 01721 } 01722 01723 01724 template <class T> 01725 typename PinholeCameraModel<T>::Matrix34T PinholeCameraModel<T>::getIntrinsicParameters() const 01726 { 01727 return T_proj; 01728 } 01729 01730 01731 template <class T> 01732 T PinholeCameraModel<T>::getSkew() const 01733 { 01734 return T_proj(0, 1); 01735 } 01736 01737 01738 template <class T> 01739 typename PinholeCameraModel<T>::Vector2T PinholeCameraModel<T>::operator*(const Vector3T & point) const 01740 { 01741 return transform(point); 01742 } 01743 01744 01745 template <class T> 01746 typename PinholeCameraModel<T>::Vector2T PinholeCameraModel<T>::transform(const Vector3T & point) const 01747 { 01748 return (T_proj * T_pose * point.homogeneous()).hnormalized(); 01749 } 01750 01751 01752 template <class T> 01753 void PinholeCameraModel<T>::setCameraOrientation(const Matrix3T & orientation) 01754 { 01755 assert((orientation * orientation.transpose() - Matrix3T::Identity()).norm() < 1e-7); // orthogonal? 01756 // assert(abs(orientation.determinant() - 1) < 1e-7); // right-handed without scaling? <-- nope, we should not unnecessarily restrict the mapping 01757 01758 T_pose.block<3, 3>(0, 0) = orientation; 01759 } 01760 01761 01762 template <class T> 01763 void PinholeCameraModel<T>::setCameraPosition(const Vector3T & position) 01764 { 01765 T_pose.block<3, 1>(0, 3) = position; 01766 } 01767 01768 01769 template <class T> 01770 void PinholeCameraModel<T>::setExtrinsicParameters(const Matrix4T & parameters) 01771 { 01772 auto & R = parameters.block<3, 3>(0, 0); 01773 const bool ORIENTATION_MATRIX_IS_ORTHOGONAL = ((R * R.transpose()) - Matrix3T::Identity()).norm() < 1e-7; 01774 assert(ORIENTATION_MATRIX_IS_ORTHOGONAL); 01775 01776 T_pose = parameters; 01777 } 01778 01779 01780 template <class T> 01781 void PinholeCameraModel<T>::setFocalLengths(T f_x, T f_y) 01782 { 01783 T_proj(0, 0) = f_x; 01784 T_proj(1, 1) = f_y; 01785 } 01786 01787 01788 template <class T> 01789 void PinholeCameraModel<T>::setImageCenter(T c_x, T c_y) 01790 { 01791 T_proj(0, 2) = c_x; 01792 T_proj(1, 2) = c_y; 01793 } 01794 01795 01796 template <class T> 01797 void PinholeCameraModel<T>::setIntrinsicParameters(const Matrix34T & parameters) 01798 { 01799 T_proj = parameters; 01800 } 01801 01802 01803 template <class T> 01804 void PinholeCameraModel<T>::setSkew(T skew) 01805 { 01806 T_proj(0, 1) = skew; 01807 } 01808 01809 01810 } // namespace TRTK 01811 01812 01813 #endif // PINHOLE_CAMERA_MODEL_HPP_4231789408
Documentation generated by Doxygen