1 2 /* 3 Inverts 2 by 2 matrix using partial pivoting. 4 5 Used by the sparse factorization routines in 6 src/mat/impls/baij/seq 7 8 9 This is a combination of the Linpack routines 10 dgefa() and dgedi() specialized for a size of 2. 11 12 */ 13 #include <petscsys.h> 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2" 17 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 18 { 19 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3; 20 PetscInt k4,j3; 21 MatScalar *aa,*ax,*ay,work[4],stmp; 22 MatReal tmp,max; 23 24 /* gaussian elimination with partial pivoting */ 25 26 PetscFunctionBegin; 27 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 28 29 shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3])); 30 /* Parameter adjustments */ 31 a -= 3; 32 33 /*for (k = 1; k <= 1; ++k) {*/ 34 k = 1; 35 kp1 = k + 1; 36 k3 = 2*k; 37 k4 = k3 + k; 38 /* find l = pivot index */ 39 40 i__2 = 3 - k; 41 aa = &a[k4]; 42 max = PetscAbsScalar(aa[0]); 43 l = 1; 44 for (ll=1; ll<i__2; ll++) { 45 tmp = PetscAbsScalar(aa[ll]); 46 if (tmp > max) { max = tmp; l = ll+1;} 47 } 48 l += k - 1; 49 ipvt[k-1] = l; 50 51 if (a[l + k3] == 0.0) { 52 if (shift == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 53 else { 54 a[l + k3] = shift; 55 } 56 } 57 58 /* interchange if necessary */ 59 60 if (l != k) { 61 stmp = a[l + k3]; 62 a[l + k3] = a[k4]; 63 a[k4] = stmp; 64 } 65 66 /* compute multipliers */ 67 68 stmp = -1. / a[k4]; 69 i__2 = 2 - k; 70 aa = &a[1 + k4]; 71 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 72 73 /* row elimination with column indexing */ 74 75 ax = &a[k4+1]; 76 for (j = kp1; j <= 2; ++j) { 77 j3 = 2*j; 78 stmp = a[l + j3]; 79 if (l != k) { 80 a[l + j3] = a[k + j3]; 81 a[k + j3] = stmp; 82 } 83 84 i__3 = 2 - k; 85 ay = &a[1+k+j3]; 86 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 87 } 88 89 ipvt[1] = 2; 90 if (a[6] == 0.0) { 91 PetscErrorCode ierr; 92 if (allowzeropivot) { 93 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",1);CHKERRQ(ierr); 94 *zeropivotdetected = PETSC_TRUE; 95 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1); 96 } 97 98 /* 99 Now form the inverse 100 */ 101 102 /* compute inverse(u) */ 103 104 for (k = 1; k <= 2; ++k) { 105 k3 = 2*k; 106 k4 = k3 + k; 107 a[k4] = 1.0 / a[k4]; 108 stmp = -a[k4]; 109 i__2 = k - 1; 110 aa = &a[k3 + 1]; 111 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 112 kp1 = k + 1; 113 if (2 < kp1) continue; 114 ax = aa; 115 for (j = kp1; j <= 2; ++j) { 116 j3 = 2*j; 117 stmp = a[k + j3]; 118 a[k + j3] = 0.0; 119 ay = &a[j3 + 1]; 120 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 121 } 122 } 123 124 /* form inverse(u)*inverse(l) */ 125 k = 1; 126 k3 = 2*k; 127 kp1 = k + 1; 128 aa = a + k3; 129 for (i = kp1; i <= 2; ++i) { 130 work[i-1] = aa[i]; 131 aa[i] = 0.0; 132 } 133 for (j = kp1; j <= 2; ++j) { 134 stmp = work[j-1]; 135 ax = &a[2*j + 1]; 136 ay = &a[k3 + 1]; 137 ay[0] += stmp*ax[0]; 138 ay[1] += stmp*ax[1]; 139 } 140 l = ipvt[k-1]; 141 if (l != k) { 142 ax = &a[k3 + 1]; 143 ay = &a[2*l + 1]; 144 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 145 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 146 } 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9" 152 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 153 { 154 PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3; 155 PetscInt k4,j3; 156 MatScalar *aa,*ax,*ay,work[81],stmp; 157 MatReal tmp,max; 158 159 /* gaussian elimination with partial pivoting */ 160 161 PetscFunctionBegin; 162 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 163 164 /* Parameter adjustments */ 165 a -= 10; 166 167 for (k = 1; k <= 8; ++k) { 168 kp1 = k + 1; 169 k3 = 9*k; 170 k4 = k3 + k; 171 /* find l = pivot index */ 172 173 i__2 = 10 - k; 174 aa = &a[k4]; 175 max = PetscAbsScalar(aa[0]); 176 l = 1; 177 for (ll=1; ll<i__2; ll++) { 178 tmp = PetscAbsScalar(aa[ll]); 179 if (tmp > max) { max = tmp; l = ll+1;} 180 } 181 l += k - 1; 182 ipvt[k-1] = l; 183 184 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 185 186 /* interchange if necessary */ 187 188 if (l != k) { 189 stmp = a[l + k3]; 190 a[l + k3] = a[k4]; 191 a[k4] = stmp; 192 } 193 194 /* compute multipliers */ 195 196 stmp = -1. / a[k4]; 197 i__2 = 9 - k; 198 aa = &a[1 + k4]; 199 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 200 201 /* row elimination with column indexing */ 202 203 ax = &a[k4+1]; 204 for (j = kp1; j <= 9; ++j) { 205 j3 = 9*j; 206 stmp = a[l + j3]; 207 if (l != k) { 208 a[l + j3] = a[k + j3]; 209 a[k + j3] = stmp; 210 } 211 212 i__3 = 9 - k; 213 ay = &a[1+k+j3]; 214 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 215 } 216 } 217 ipvt[8] = 9; 218 if (a[90] == 0.0) { 219 PetscErrorCode ierr; 220 if (allowzeropivot) { 221 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",6);CHKERRQ(ierr); 222 *zeropivotdetected = PETSC_TRUE; 223 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 224 } 225 226 /* 227 Now form the inverse 228 */ 229 230 /* compute inverse(u) */ 231 232 for (k = 1; k <= 9; ++k) { 233 k3 = 9*k; 234 k4 = k3 + k; 235 a[k4] = 1.0 / a[k4]; 236 stmp = -a[k4]; 237 i__2 = k - 1; 238 aa = &a[k3 + 1]; 239 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 240 kp1 = k + 1; 241 if (9 < kp1) continue; 242 ax = aa; 243 for (j = kp1; j <= 9; ++j) { 244 j3 = 9*j; 245 stmp = a[k + j3]; 246 a[k + j3] = 0.0; 247 ay = &a[j3 + 1]; 248 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 249 } 250 } 251 252 /* form inverse(u)*inverse(l) */ 253 254 for (kb = 1; kb <= 8; ++kb) { 255 k = 9 - kb; 256 k3 = 9*k; 257 kp1 = k + 1; 258 aa = a + k3; 259 for (i = kp1; i <= 9; ++i) { 260 work[i-1] = aa[i]; 261 aa[i] = 0.0; 262 } 263 for (j = kp1; j <= 9; ++j) { 264 stmp = work[j-1]; 265 ax = &a[9*j + 1]; 266 ay = &a[k3 + 1]; 267 ay[0] += stmp*ax[0]; 268 ay[1] += stmp*ax[1]; 269 ay[2] += stmp*ax[2]; 270 ay[3] += stmp*ax[3]; 271 ay[4] += stmp*ax[4]; 272 ay[5] += stmp*ax[5]; 273 ay[6] += stmp*ax[6]; 274 ay[7] += stmp*ax[7]; 275 ay[8] += stmp*ax[8]; 276 } 277 l = ipvt[k-1]; 278 if (l != k) { 279 ax = &a[k3 + 1]; 280 ay = &a[9*l + 1]; 281 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 282 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 283 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 284 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 285 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 286 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 287 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 288 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 289 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 290 } 291 } 292 PetscFunctionReturn(0); 293 } 294 295 /* 296 Inverts 15 by 15 matrix using partial pivoting. 297 298 Used by the sparse factorization routines in 299 src/mat/impls/baij/seq 300 301 This is a combination of the Linpack routines 302 dgefa() and dgedi() specialized for a size of 15. 303 304 */ 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15" 308 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected) 309 { 310 PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3; 311 PetscInt k4,j3; 312 MatScalar *aa,*ax,*ay,stmp; 313 MatReal tmp,max; 314 315 /* gaussian elimination with partial pivoting */ 316 317 PetscFunctionBegin; 318 if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; 319 320 /* Parameter adjustments */ 321 a -= 16; 322 323 for (k = 1; k <= 14; ++k) { 324 kp1 = k + 1; 325 k3 = 15*k; 326 k4 = k3 + k; 327 /* find l = pivot index */ 328 329 i__2 = 16 - k; 330 aa = &a[k4]; 331 max = PetscAbsScalar(aa[0]); 332 l = 1; 333 for (ll=1; ll<i__2; ll++) { 334 tmp = PetscAbsScalar(aa[ll]); 335 if (tmp > max) { max = tmp; l = ll+1;} 336 } 337 l += k - 1; 338 ipvt[k-1] = l; 339 340 if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); 341 342 /* interchange if necessary */ 343 344 if (l != k) { 345 stmp = a[l + k3]; 346 a[l + k3] = a[k4]; 347 a[k4] = stmp; 348 } 349 350 /* compute multipliers */ 351 352 stmp = -1. / a[k4]; 353 i__2 = 15 - k; 354 aa = &a[1 + k4]; 355 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 356 357 /* row elimination with column indexing */ 358 359 ax = &a[k4+1]; 360 for (j = kp1; j <= 15; ++j) { 361 j3 = 15*j; 362 stmp = a[l + j3]; 363 if (l != k) { 364 a[l + j3] = a[k + j3]; 365 a[k + j3] = stmp; 366 } 367 368 i__3 = 15 - k; 369 ay = &a[1+k+j3]; 370 for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll]; 371 } 372 } 373 ipvt[14] = 15; 374 if (a[240] == 0.0) { 375 PetscErrorCode ierr; 376 if (allowzeropivot) { 377 ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",6);CHKERRQ(ierr); 378 *zeropivotdetected = PETSC_TRUE; 379 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6); 380 } 381 382 /* 383 Now form the inverse 384 */ 385 386 /* compute inverse(u) */ 387 388 for (k = 1; k <= 15; ++k) { 389 k3 = 15*k; 390 k4 = k3 + k; 391 a[k4] = 1.0 / a[k4]; 392 stmp = -a[k4]; 393 i__2 = k - 1; 394 aa = &a[k3 + 1]; 395 for (ll=0; ll<i__2; ll++) aa[ll] *= stmp; 396 kp1 = k + 1; 397 if (15 < kp1) continue; 398 ax = aa; 399 for (j = kp1; j <= 15; ++j) { 400 j3 = 15*j; 401 stmp = a[k + j3]; 402 a[k + j3] = 0.0; 403 ay = &a[j3 + 1]; 404 for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll]; 405 } 406 } 407 408 /* form inverse(u)*inverse(l) */ 409 410 for (kb = 1; kb <= 14; ++kb) { 411 k = 15 - kb; 412 k3 = 15*k; 413 kp1 = k + 1; 414 aa = a + k3; 415 for (i = kp1; i <= 15; ++i) { 416 work[i-1] = aa[i]; 417 aa[i] = 0.0; 418 } 419 for (j = kp1; j <= 15; ++j) { 420 stmp = work[j-1]; 421 ax = &a[15*j + 1]; 422 ay = &a[k3 + 1]; 423 ay[0] += stmp*ax[0]; 424 ay[1] += stmp*ax[1]; 425 ay[2] += stmp*ax[2]; 426 ay[3] += stmp*ax[3]; 427 ay[4] += stmp*ax[4]; 428 ay[5] += stmp*ax[5]; 429 ay[6] += stmp*ax[6]; 430 ay[7] += stmp*ax[7]; 431 ay[8] += stmp*ax[8]; 432 ay[9] += stmp*ax[9]; 433 ay[10] += stmp*ax[10]; 434 ay[11] += stmp*ax[11]; 435 ay[12] += stmp*ax[12]; 436 ay[13] += stmp*ax[13]; 437 ay[14] += stmp*ax[14]; 438 } 439 l = ipvt[k-1]; 440 if (l != k) { 441 ax = &a[k3 + 1]; 442 ay = &a[15*l + 1]; 443 stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp; 444 stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp; 445 stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp; 446 stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp; 447 stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp; 448 stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp; 449 stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp; 450 stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp; 451 stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp; 452 stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp; 453 stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp; 454 stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp; 455 stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp; 456 stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp; 457 stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp; 458 } 459 } 460 PetscFunctionReturn(0); 461 } 462