38 #ifndef GMM_LAPACK_INTERFACE_H
39 #define GMM_LAPACK_INTERFACE_H
46 #if defined(GMM_USES_LAPACK)
88 void sgetrf_(...);
void dgetrf_(...);
void cgetrf_(...);
void zgetrf_(...);
89 void sgetrs_(...);
void dgetrs_(...);
void cgetrs_(...);
void zgetrs_(...);
90 void sgetri_(...);
void dgetri_(...);
void cgetri_(...);
void zgetri_(...);
91 void sgeqrf_(...);
void dgeqrf_(...);
void cgeqrf_(...);
void zgeqrf_(...);
92 void sorgqr_(...);
void dorgqr_(...);
void cungqr_(...);
void zungqr_(...);
93 void sormqr_(...);
void dormqr_(...);
void cunmqr_(...);
void zunmqr_(...);
94 void sgees_ (...);
void dgees_ (...);
void cgees_ (...);
void zgees_ (...);
95 void sgeev_ (...);
void dgeev_ (...);
void cgeev_ (...);
void zgeev_ (...);
96 void sgeesx_(...);
void dgeesx_(...);
void cgeesx_(...);
void zgeesx_(...);
97 void sgesvd_(...);
void dgesvd_(...);
void cgesvd_(...);
void zgesvd_(...);
104 # define getrf_interface(lapack_name, base_type) inline \
105 size_type lu_factor(dense_matrix<base_type > &A, lapack_ipvt &ipvt){ \
106 GMMLAPACK_TRACE("getrf_interface"); \
107 BLAS_INT m = BLAS_INT(mat_nrows(A)), n = BLAS_INT(mat_ncols(A)), lda(m); \
109 if (m && n) lapack_name(&m, &n, &A(0,0), &lda, ipvt.pfirst(), &info); \
110 if ((sizeof(BLAS_INT) == 4) || \
111 ((info & 0xFFFFFFFF00000000L) && !(info & 0x00000000FFFFFFFFL))) \
113 ipvt.set_to_int32(); \
114 return size_type(int(info & 0x00000000FFFFFFFFL)); \
117 getrf_interface(sgetrf_, BLAS_S)
118 getrf_interface(dgetrf_, BLAS_D)
119 getrf_interface(cgetrf_, BLAS_C)
120 getrf_interface(zgetrf_, BLAS_Z)
126 # define getrs_interface(f_name, trans1, lapack_name, base_type) inline \
127 void f_name(const dense_matrix<base_type > &A, \
128 const lapack_ipvt &ipvt, std::vector<base_type > &x, \
129 const std::vector<base_type > &b) { \
130 GMMLAPACK_TRACE("getrs_interface"); \
131 BLAS_INT n = BLAS_INT(mat_nrows(A)), info(0), nrhs(1); \
132 gmm::copy(b, x); trans1; \
134 lapack_name(&t,&n,&nrhs,&(A(0,0)),&n,ipvt.pfirst(),&x[0],&n,&info); \
137 # define getrs_trans_n const char t = 'N'
138 # define getrs_trans_t const char t = 'T'
140 getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S)
141 getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D)
142 getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C)
143 getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z)
144 getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S)
145 getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D)
146 getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C)
147 getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z)
153 # define getri_interface(lapack_name, base_type) inline \
154 void lu_inverse(const dense_matrix<base_type > &LU, \
155 const lapack_ipvt &ipvt, const dense_matrix<base_type > &A_) { \
156 GMMLAPACK_TRACE("getri_interface"); \
157 dense_matrix<base_type >& \
158 A = const_cast<dense_matrix<base_type > &>(A_); \
159 BLAS_INT n = BLAS_INT(mat_nrows(A)), info(0), lwork(-1); \
163 lapack_name(&n, &A(0,0), &n, ipvt.pfirst(), &work1, &lwork, &info); \
164 lwork = int(gmm::real(work1)); \
165 std::vector<base_type> work(lwork); \
166 lapack_name(&n, &A(0,0), &n, ipvt.pfirst(), &work[0], &lwork,&info); \
170 getri_interface(sgetri_, BLAS_S)
171 getri_interface(dgetri_, BLAS_D)
172 getri_interface(cgetri_, BLAS_C)
173 getri_interface(zgetri_, BLAS_Z)
179 # define geqrf_interface(lapack_name1, base_type) inline \
180 void qr_factor(dense_matrix<base_type > &A){ \
181 GMMLAPACK_TRACE("geqrf_interface"); \
182 BLAS_INT m = BLAS_INT(mat_nrows(A)), n=BLAS_INT(mat_ncols(A)); \
183 BLAS_INT info(0), lwork(-1); \
186 std::vector<base_type > tau(n); \
187 lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work1 , &lwork, &info); \
188 lwork = BLAS_INT(gmm::real(work1)); \
189 std::vector<base_type > work(lwork); \
190 lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work[0], &lwork, &info); \
191 GMM_ASSERT1(!info, "QR factorization failed"); \
195 geqrf_interface(sgeqrf_, BLAS_S)
196 geqrf_interface(dgeqrf_, BLAS_D)
202 # define geqrf_interface2(lapack_name1, lapack_name2, base_type) inline \
203 void qr_factor(const dense_matrix<base_type > &A, \
204 dense_matrix<base_type > &Q, dense_matrix<base_type > &R) { \
205 GMMLAPACK_TRACE("geqrf_interface2"); \
206 BLAS_INT m = BLAS_INT(mat_nrows(A)), n=BLAS_INT(mat_ncols(A)); \
207 BLAS_INT info(0), lwork(-1); \
210 std::copy(A.begin(), A.end(), Q.begin()); \
211 std::vector<base_type > tau(n); \
212 lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work1 , &lwork, &info); \
213 lwork = BLAS_INT(gmm::real(work1)); \
214 std::vector<base_type > work(lwork); \
215 lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work[0], &lwork, &info); \
216 GMM_ASSERT1(!info, "QR factorization failed"); \
217 base_type *p = &R(0,0), *q = &Q(0,0); \
218 for (BLAS_INT j = 0; j < n; ++j, q += m-n) \
219 for (BLAS_INT i = 0; i < n; ++i, ++p, ++q) \
220 *p = (j < i) ? base_type(0) : *q; \
221 lapack_name2(&m, &n, &n, &Q(0,0), &m,&tau[0],&work[0],&lwork,&info); \
223 else gmm::clear(Q); \
226 geqrf_interface2(sgeqrf_, sorgqr_, BLAS_S)
227 geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D)
228 geqrf_interface2(cgeqrf_, cungqr_, BLAS_C)
229 geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z)
235 # define gees_interface(lapack_name, base_type) \
236 template <typename VECT> inline void implicit_qr_algorithm( \
237 const dense_matrix<base_type > &A, const VECT &eigval_, \
238 dense_matrix<base_type > &Q, \
239 double tol=gmm::default_tol(base_type()), bool compvect = true) { \
240 GMMLAPACK_TRACE("gees_interface"); \
241 typedef bool (*L_fp)(...); L_fp p = 0; \
242 BLAS_INT n=BLAS_INT(mat_nrows(A)), info(0), lwork(-1), sdim; \
245 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
246 char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \
247 std::vector<double> rwork(n), eigv1(n), eigv2(n); \
248 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \
249 &eigv2[0], &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
250 lwork = BLAS_INT(gmm::real(work1)); \
251 std::vector<base_type > work(lwork); \
252 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \
253 &eigv2[0], &Q(0,0), &n, &work[0], &lwork, &rwork[0],&info);\
254 GMM_ASSERT1(!info, "QR algorithm failed"); \
255 extract_eig(H, const_cast<VECT &>(eigval_), tol); \
258 # define gees_interface2(lapack_name, base_type) \
259 template <typename VECT> inline void implicit_qr_algorithm( \
260 const dense_matrix<base_type > &A, const VECT &eigval_, \
261 dense_matrix<base_type > &Q, \
262 double tol=gmm::default_tol(base_type()), bool compvect = true) { \
263 GMMLAPACK_TRACE("gees_interface2"); \
264 typedef bool (*L_fp)(...); L_fp p = 0; \
265 BLAS_INT n=BLAS_INT(mat_nrows(A)), info(0), lwork(-1), sdim; \
268 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
269 char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \
270 std::vector<double> rwork(n), eigvv(n*2); \
271 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \
272 &Q(0,0), &n, &work1, &lwork, &rwork[0], &rwork[0], &info); \
273 lwork = BLAS_INT(gmm::real(work1)); \
274 std::vector<base_type > work(lwork); \
275 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \
276 &Q(0,0), &n, &work[0], &lwork, &rwork[0], &rwork[0],&info);\
277 GMM_ASSERT1(!info, "QR algorithm failed"); \
278 extract_eig(H, const_cast<VECT &>(eigval_), tol); \
281 gees_interface(sgees_, BLAS_S)
282 gees_interface(dgees_, BLAS_D)
283 gees_interface2(cgees_, BLAS_C)
284 gees_interface2(zgees_, BLAS_Z)
287 # define jobv_right char jobvl = 'N', jobvr = 'V';
288 # define jobv_left char jobvl = 'V', jobvr = 'N';
290 # define geev_interface(lapack_name, base_type, side) \
291 template <typename VECT> inline void geev_interface_ ## side( \
292 const dense_matrix<base_type > &A, const VECT &eigval_, \
293 dense_matrix<base_type > &Q) { \
294 GMMLAPACK_TRACE("geev_interface"); \
295 BLAS_INT n = BLAS_INT(mat_nrows(A)), info(0), lwork(-1); \
298 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
300 std::vector<base_type > eigvr(n), eigvi(n); \
301 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \
302 &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info); \
303 lwork = BLAS_INT(gmm::real(work1)); \
304 std::vector<base_type > work(lwork); \
305 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \
306 &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info); \
307 GMM_ASSERT1(!info, "QR algorithm failed"); \
308 gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_))); \
309 gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_))); \
312 # define geev_interface2(lapack_name, base_type, side) \
313 template <typename VECT> inline void geev_interface_ ## side( \
314 const dense_matrix<base_type > &A, const VECT &eigval_, \
315 dense_matrix<base_type > &Q) { \
316 GMMLAPACK_TRACE("geev_interface"); \
317 BLAS_INT n = BLAS_INT(mat_nrows(A)), info(0), lwork(-1); \
320 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
322 std::vector<base_type::value_type> rwork(2*n); \
323 std::vector<base_type> eigv(n); \
324 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \
325 &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
326 lwork = BLAS_INT(gmm::real(work1)); \
327 std::vector<base_type > work(lwork); \
328 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \
329 &Q(0,0), &n, &work[0], &lwork, &rwork[0], &info); \
330 GMM_ASSERT1(!info, "QR algorithm failed"); \
331 gmm::copy(eigv, const_cast<VECT &>(eigval_)); \
334 geev_interface(sgeev_, BLAS_S, right)
335 geev_interface(dgeev_, BLAS_D, right)
336 geev_interface2(cgeev_, BLAS_C, right)
337 geev_interface2(zgeev_, BLAS_Z, right)
339 geev_interface(sgeev_, BLAS_S, left)
340 geev_interface(dgeev_, BLAS_D, left)
341 geev_interface2(cgeev_, BLAS_C, left)
342 geev_interface2(zgeev_, BLAS_Z, left)
350 # define geesx_interface(lapack_name, base_type) inline \
351 void schur(dense_matrix<base_type> &A, \
352 dense_matrix<base_type> &S, \
353 dense_matrix<base_type> &Q) { \
354 GMMLAPACK_TRACE("geesx_interface"); \
355 BLAS_INT m = BLAS_INT(mat_nrows(A)), n = BLAS_INT(mat_ncols(A)); \
356 GMM_ASSERT1(m == n, "Schur decomposition requires square matrix"); \
357 char jobvs = 'V', sort = 'N', sense = 'N'; \
358 bool select = false; \
359 BLAS_INT lwork = 8*n, sdim = 0, liwork = 1; \
360 std::vector<base_type> work(lwork), wr(n), wi(n); \
361 std::vector<BLAS_INT> iwork(liwork); \
362 std::vector<BLAS_INT> bwork(1); \
363 resize(S, n, n); copy(A, S); \
365 base_type rconde(0), rcondv(0); \
367 lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n, \
368 &sdim, &wr[0], &wi[0], &Q(0,0), &n, &rconde, &rcondv, \
369 &work[0], &lwork, &iwork[0], &liwork, &bwork[0], &info);\
370 GMM_ASSERT1(!info, "SCHUR algorithm failed"); \
373 # define geesx_interface2(lapack_name, base_type) inline \
374 void schur(dense_matrix<base_type> &A, \
375 dense_matrix<base_type> &S, \
376 dense_matrix<base_type> &Q) { \
377 GMMLAPACK_TRACE("geesx_interface"); \
378 BLAS_INT m = BLAS_INT(mat_nrows(A)), n = BLAS_INT(mat_ncols(A)); \
379 GMM_ASSERT1(m == n, "Schur decomposition requires square matrix"); \
380 char jobvs = 'V', sort = 'N', sense = 'N'; \
381 bool select = false; \
382 BLAS_INT lwork = 8*n, sdim = 0; \
383 std::vector<base_type::value_type> rwork(lwork); \
384 std::vector<base_type> work(lwork), w(n); \
385 std::vector<BLAS_INT> bwork(1); \
386 resize(S, n, n); copy(A, S); \
388 base_type rconde(0), rcondv(0); \
390 lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n, \
391 &sdim, &w[0], &Q(0,0), &n, &rconde, &rcondv, \
392 &work[0], &lwork, &rwork[0], &bwork[0], &info); \
393 GMM_ASSERT1(!info, "SCHUR algorithm failed"); \
396 geesx_interface(sgeesx_, BLAS_S)
397 geesx_interface(dgeesx_, BLAS_D)
398 geesx_interface2(cgeesx_, BLAS_C)
399 geesx_interface2(zgeesx_, BLAS_Z)
401 template <
typename MAT>
402 void schur(
const MAT &A_, MAT &S, MAT &Q) {
413 # define gesvd_interface(lapack_name, base_type) inline \
414 void svd(dense_matrix<base_type> &X, \
415 dense_matrix<base_type> &U, \
416 dense_matrix<base_type> &Vtransposed, \
417 std::vector<base_type> &sigma) { \
418 GMMLAPACK_TRACE("gesvd_interface"); \
419 BLAS_INT m = BLAS_INT(mat_nrows(X)), n = BLAS_INT(mat_ncols(X)); \
420 BLAS_INT mn_min = m < n ? m : n; \
421 sigma.resize(mn_min); \
422 std::vector<base_type> work(15 * mn_min); \
423 BLAS_INT lwork = BLAS_INT(work.size()); \
425 resize(Vtransposed, n, n); \
428 lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0), \
429 &m, &Vtransposed(0,0), &n, &work[0], &lwork, &info); \
432 # define cgesvd_interface(lapack_name, base_type, base_type2) inline \
433 void svd(dense_matrix<base_type> &X, \
434 dense_matrix<base_type> &U, \
435 dense_matrix<base_type> &Vtransposed, \
436 std::vector<base_type2> &sigma) { \
437 GMMLAPACK_TRACE("gesvd_interface"); \
438 BLAS_INT m = BLAS_INT(mat_nrows(X)), n = BLAS_INT(mat_ncols(X)); \
439 BLAS_INT mn_min = m < n ? m : n; \
440 sigma.resize(mn_min); \
441 std::vector<base_type> work(15 * mn_min); \
442 std::vector<base_type2> rwork(5 * mn_min); \
443 BLAS_INT lwork = BLAS_INT(work.size()); \
445 resize(Vtransposed, n, n); \
448 lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0), \
449 &m, &Vtransposed(0,0), &n, &work[0], &lwork, \
453 gesvd_interface(sgesvd_, BLAS_S)
454 gesvd_interface(dgesvd_, BLAS_D)
455 cgesvd_interface(cgesvd_, BLAS_C, BLAS_S)
456 cgesvd_interface(zgesvd_, BLAS_Z, BLAS_D)
458 template <
typename MAT,
typename VEC>
459 void svd(
const MAT &X_, MAT &U, MAT &Vtransposed, VEC &sigma) {
461 svd(X, U, Vtransposed, sigma);
473 template <
typename MAT>
474 void schur(
const MAT &A_, MAT &S, MAT &Q)
476 GMM_ASSERT1(
false,
"Use of function schur(A,S,Q) requires GetFEM "
477 "to be built with Lapack");
482 #endif // GMM_USES_LAPACK
484 #endif // GMM_LAPACK_INTERFACE_H