xref: /petsc/src/mat/impls/baij/seq/dgefa2.c (revision c80103dabbafe9c3104be5e968fb1ec50fcbf95c)
1 
2 /*
3      Inverts 2 by 2 matrix using partial pivoting.
4 
5        Used by the sparse factorization routines in
6      src/mat/impls/baij/seq
7 
8 
9        This is a combination of the Linpack routines
10     dgefa() and dgedi() specialized for a size of 2.
11 
12 */
13 #include <petscsys.h>
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_2"
17 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
18 {
19   PetscInt  i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
20   PetscInt  k4,j3;
21   MatScalar *aa,*ax,*ay,work[4],stmp;
22   MatReal   tmp,max;
23 
24   /* gaussian elimination with partial pivoting */
25 
26   PetscFunctionBegin;
27   if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
28 
29   shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
30   /* Parameter adjustments */
31   a -= 3;
32 
33   /*for (k = 1; k <= 1; ++k) {*/
34   k   = 1;
35   kp1 = k + 1;
36   k3  = 2*k;
37   k4  = k3 + k;
38   /* find l = pivot index */
39 
40   i__2 = 3 - k;
41   aa   = &a[k4];
42   max  = PetscAbsScalar(aa[0]);
43   l    = 1;
44   for (ll=1; ll<i__2; ll++) {
45     tmp = PetscAbsScalar(aa[ll]);
46     if (tmp > max) { max = tmp; l = ll+1;}
47   }
48   l        += k - 1;
49   ipvt[k-1] = l;
50 
51   if (a[l + k3] == 0.0) {
52     if (shift == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
53     else {
54       a[l + k3] = shift;
55     }
56   }
57 
58   /* interchange if necessary */
59 
60   if (l != k) {
61     stmp      = a[l + k3];
62     a[l + k3] = a[k4];
63     a[k4]     = stmp;
64   }
65 
66   /* compute multipliers */
67 
68   stmp = -1. / a[k4];
69   i__2 = 2 - k;
70   aa = &a[1 + k4];
71   for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
72 
73   /* row elimination with column indexing */
74 
75   ax = &a[k4+1];
76   for (j = kp1; j <= 2; ++j) {
77     j3   = 2*j;
78     stmp = a[l + j3];
79     if (l != k) {
80       a[l + j3] = a[k + j3];
81       a[k + j3] = stmp;
82     }
83 
84     i__3 = 2 - k;
85     ay   = &a[1+k+j3];
86     for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
87   }
88 
89   ipvt[1] = 2;
90   if (a[6] == 0.0) {
91     PetscErrorCode ierr;
92     if (allowzeropivot) {
93       ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",1);CHKERRQ(ierr);
94       *zeropivotdetected = PETSC_TRUE;
95     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
96   }
97 
98   /*
99    Now form the inverse
100   */
101 
102   /* compute inverse(u) */
103 
104   for (k = 1; k <= 2; ++k) {
105     k3    = 2*k;
106     k4    = k3 + k;
107     a[k4] = 1.0 / a[k4];
108     stmp  = -a[k4];
109     i__2  = k - 1;
110     aa    = &a[k3 + 1];
111     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
112     kp1 = k + 1;
113     if (2 < kp1) continue;
114     ax = aa;
115     for (j = kp1; j <= 2; ++j) {
116       j3        = 2*j;
117       stmp      = a[k + j3];
118       a[k + j3] = 0.0;
119       ay        = &a[j3 + 1];
120       for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
121     }
122   }
123 
124   /* form inverse(u)*inverse(l) */
125   k   = 1;
126   k3  = 2*k;
127   kp1 = k + 1;
128   aa  = a + k3;
129   for (i = kp1; i <= 2; ++i) {
130     work[i-1] = aa[i];
131     aa[i]     = 0.0;
132   }
133   for (j = kp1; j <= 2; ++j) {
134     stmp   = work[j-1];
135     ax     = &a[2*j + 1];
136     ay     = &a[k3 + 1];
137     ay[0] += stmp*ax[0];
138     ay[1] += stmp*ax[1];
139   }
140   l = ipvt[k-1];
141   if (l != k) {
142     ax   = &a[k3 + 1];
143     ay   = &a[2*l + 1];
144     stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
145     stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
146   }
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_9"
152 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
153 {
154   PetscInt  i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
155   PetscInt  k4,j3;
156   MatScalar *aa,*ax,*ay,work[81],stmp;
157   MatReal   tmp,max;
158 
159   /* gaussian elimination with partial pivoting */
160 
161   PetscFunctionBegin;
162   if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
163 
164   /* Parameter adjustments */
165   a -= 10;
166 
167   for (k = 1; k <= 8; ++k) {
168     kp1 = k + 1;
169     k3  = 9*k;
170     k4  = k3 + k;
171     /* find l = pivot index */
172 
173     i__2 = 10 - k;
174     aa   = &a[k4];
175     max  = PetscAbsScalar(aa[0]);
176     l    = 1;
177     for (ll=1; ll<i__2; ll++) {
178       tmp = PetscAbsScalar(aa[ll]);
179       if (tmp > max) { max = tmp; l = ll+1;}
180     }
181     l        += k - 1;
182     ipvt[k-1] = l;
183 
184     if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
185 
186     /* interchange if necessary */
187 
188     if (l != k) {
189       stmp      = a[l + k3];
190       a[l + k3] = a[k4];
191       a[k4]     = stmp;
192     }
193 
194     /* compute multipliers */
195 
196     stmp = -1. / a[k4];
197     i__2 = 9 - k;
198     aa = &a[1 + k4];
199     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
200 
201     /* row elimination with column indexing */
202 
203     ax = &a[k4+1];
204     for (j = kp1; j <= 9; ++j) {
205       j3   = 9*j;
206       stmp = a[l + j3];
207       if (l != k) {
208         a[l + j3] = a[k + j3];
209         a[k + j3] = stmp;
210       }
211 
212       i__3 = 9 - k;
213       ay = &a[1+k+j3];
214       for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
215     }
216   }
217   ipvt[8] = 9;
218   if (a[90] == 0.0) {
219     PetscErrorCode ierr;
220     if (allowzeropivot) {
221       ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",6);CHKERRQ(ierr);
222       *zeropivotdetected = PETSC_TRUE;
223     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
224   }
225 
226   /*
227    Now form the inverse
228   */
229 
230   /* compute inverse(u) */
231 
232   for (k = 1; k <= 9; ++k) {
233     k3    = 9*k;
234     k4    = k3 + k;
235     a[k4] = 1.0 / a[k4];
236     stmp  = -a[k4];
237     i__2  = k - 1;
238     aa    = &a[k3 + 1];
239     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
240     kp1 = k + 1;
241     if (9 < kp1) continue;
242     ax = aa;
243     for (j = kp1; j <= 9; ++j) {
244       j3        = 9*j;
245       stmp      = a[k + j3];
246       a[k + j3] = 0.0;
247       ay        = &a[j3 + 1];
248       for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
249     }
250   }
251 
252   /* form inverse(u)*inverse(l) */
253 
254   for (kb = 1; kb <= 8; ++kb) {
255     k   = 9 - kb;
256     k3  = 9*k;
257     kp1 = k + 1;
258     aa  = a + k3;
259     for (i = kp1; i <= 9; ++i) {
260       work[i-1] = aa[i];
261       aa[i]     = 0.0;
262     }
263     for (j = kp1; j <= 9; ++j) {
264       stmp   = work[j-1];
265       ax     = &a[9*j + 1];
266       ay     = &a[k3 + 1];
267       ay[0] += stmp*ax[0];
268       ay[1] += stmp*ax[1];
269       ay[2] += stmp*ax[2];
270       ay[3] += stmp*ax[3];
271       ay[4] += stmp*ax[4];
272       ay[5] += stmp*ax[5];
273       ay[6] += stmp*ax[6];
274       ay[7] += stmp*ax[7];
275       ay[8] += stmp*ax[8];
276     }
277     l = ipvt[k-1];
278     if (l != k) {
279       ax   = &a[k3 + 1];
280       ay   = &a[9*l + 1];
281       stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
282       stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
283       stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
284       stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
285       stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
286       stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
287       stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
288       stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
289       stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
290     }
291   }
292   PetscFunctionReturn(0);
293 }
294 
295 /*
296       Inverts 15 by 15 matrix using partial pivoting.
297 
298        Used by the sparse factorization routines in
299      src/mat/impls/baij/seq
300 
301        This is a combination of the Linpack routines
302     dgefa() and dgedi() specialized for a size of 15.
303 
304 */
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "PetscKernel_A_gets_inverse_A_15"
308 PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
309 {
310   PetscInt  i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
311   PetscInt  k4,j3;
312   MatScalar *aa,*ax,*ay,stmp;
313   MatReal   tmp,max;
314 
315   /* gaussian elimination with partial pivoting */
316 
317   PetscFunctionBegin;
318   if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
319 
320   /* Parameter adjustments */
321   a -= 16;
322 
323   for (k = 1; k <= 14; ++k) {
324     kp1 = k + 1;
325     k3  = 15*k;
326     k4  = k3 + k;
327     /* find l = pivot index */
328 
329     i__2 = 16 - k;
330     aa   = &a[k4];
331     max  = PetscAbsScalar(aa[0]);
332     l    = 1;
333     for (ll=1; ll<i__2; ll++) {
334       tmp = PetscAbsScalar(aa[ll]);
335       if (tmp > max) { max = tmp; l = ll+1;}
336     }
337     l        += k - 1;
338     ipvt[k-1] = l;
339 
340     if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
341 
342     /* interchange if necessary */
343 
344     if (l != k) {
345       stmp      = a[l + k3];
346       a[l + k3] = a[k4];
347       a[k4]     = stmp;
348     }
349 
350     /* compute multipliers */
351 
352     stmp = -1. / a[k4];
353     i__2 = 15 - k;
354     aa = &a[1 + k4];
355     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
356 
357     /* row elimination with column indexing */
358 
359     ax = &a[k4+1];
360     for (j = kp1; j <= 15; ++j) {
361       j3   = 15*j;
362       stmp = a[l + j3];
363       if (l != k) {
364         a[l + j3] = a[k + j3];
365         a[k + j3] = stmp;
366       }
367 
368       i__3 = 15 - k;
369       ay = &a[1+k+j3];
370       for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
371     }
372   }
373   ipvt[14] = 15;
374   if (a[240] == 0.0) {
375     PetscErrorCode ierr;
376     if (allowzeropivot) {
377       ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",6);CHKERRQ(ierr);
378       *zeropivotdetected = PETSC_TRUE;
379     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
380   }
381 
382   /*
383    Now form the inverse
384   */
385 
386   /* compute inverse(u) */
387 
388   for (k = 1; k <= 15; ++k) {
389     k3    = 15*k;
390     k4    = k3 + k;
391     a[k4] = 1.0 / a[k4];
392     stmp  = -a[k4];
393     i__2  = k - 1;
394     aa    = &a[k3 + 1];
395     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
396     kp1 = k + 1;
397     if (15 < kp1) continue;
398     ax = aa;
399     for (j = kp1; j <= 15; ++j) {
400       j3        = 15*j;
401       stmp      = a[k + j3];
402       a[k + j3] = 0.0;
403       ay        = &a[j3 + 1];
404       for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
405     }
406   }
407 
408   /* form inverse(u)*inverse(l) */
409 
410   for (kb = 1; kb <= 14; ++kb) {
411     k   = 15 - kb;
412     k3  = 15*k;
413     kp1 = k + 1;
414     aa  = a + k3;
415     for (i = kp1; i <= 15; ++i) {
416       work[i-1] = aa[i];
417       aa[i]     = 0.0;
418     }
419     for (j = kp1; j <= 15; ++j) {
420       stmp    = work[j-1];
421       ax      = &a[15*j + 1];
422       ay      = &a[k3 + 1];
423       ay[0]  += stmp*ax[0];
424       ay[1]  += stmp*ax[1];
425       ay[2]  += stmp*ax[2];
426       ay[3]  += stmp*ax[3];
427       ay[4]  += stmp*ax[4];
428       ay[5]  += stmp*ax[5];
429       ay[6]  += stmp*ax[6];
430       ay[7]  += stmp*ax[7];
431       ay[8]  += stmp*ax[8];
432       ay[9]  += stmp*ax[9];
433       ay[10] += stmp*ax[10];
434       ay[11] += stmp*ax[11];
435       ay[12] += stmp*ax[12];
436       ay[13] += stmp*ax[13];
437       ay[14] += stmp*ax[14];
438     }
439     l = ipvt[k-1];
440     if (l != k) {
441       ax   = &a[k3 + 1];
442       ay   = &a[15*l + 1];
443       stmp = ax[0];  ax[0]  = ay[0];  ay[0]  = stmp;
444       stmp = ax[1];  ax[1]  = ay[1];  ay[1]  = stmp;
445       stmp = ax[2];  ax[2]  = ay[2];  ay[2]  = stmp;
446       stmp = ax[3];  ax[3]  = ay[3];  ay[3]  = stmp;
447       stmp = ax[4];  ax[4]  = ay[4];  ay[4]  = stmp;
448       stmp = ax[5];  ax[5]  = ay[5];  ay[5]  = stmp;
449       stmp = ax[6];  ax[6]  = ay[6];  ay[6]  = stmp;
450       stmp = ax[7];  ax[7]  = ay[7];  ay[7]  = stmp;
451       stmp = ax[8];  ax[8]  = ay[8];  ay[8]  = stmp;
452       stmp = ax[9];  ax[9]  = ay[9];  ay[9]  = stmp;
453       stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
454       stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
455       stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
456       stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
457       stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
458     }
459   }
460   PetscFunctionReturn(0);
461 }
462