xref: /petsc/src/mat/impls/maij/maij.c (revision 2886d2b79abfef6f8a33d24869d53209e704bf9b)
1 /*$Id: maij.c,v 1.4 2000/06/01 20:10:27 bsmith Exp bsmith $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/aij/mpi/mpiaij.h"
20 
21 typedef struct {
22   int        dof;         /* number of components */
23   Mat        AIJ,OAIJ;    /* representation of interpolation for one component */
24   VecScatter ctx;         /* update ghost points for parallel case */
25   Vec        w;           /* work space for ghost values for parallel case */
26 } Mat_MAIJ;
27 
28 #undef __FUNC__
29 #define __FUNC__ /*<a name="MatDestroy_MAIJ"></a>*/"MatDestroy_MAIJ"
30 int MatDestroy_MAIJ(Mat A)
31 {
32   int      ierr;
33   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
34 
35   PetscFunctionBegin;
36   if (b->AIJ) {
37     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
38   }
39   if (b->OAIJ) {
40     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
41   }
42   if (b->ctx) {
43     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
44   }
45   if (b->w) {
46     ierr = VecDestroy(b->w);CHKERRQ(ierr);
47   }
48   ierr = PetscFree(b);CHKERRQ(ierr);
49   PetscHeaderDestroy(A);
50   PetscFunctionReturn(0);
51 }
52 
53 EXTERN_C_BEGIN
54 #undef __FUNC__
55 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ"
56 int MatCreate_MAIJ(Mat A)
57 {
58   int      ierr;
59   Mat_MAIJ *b;
60 
61   PetscFunctionBegin;
62   A->data             = (void*)(b = PetscNew(Mat_MAIJ));CHKPTRQ(b);
63   ierr = PetscMemzero(b,sizeof(Mat_MAIJ));CHKERRQ(ierr);
64   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
65   A->factor           = 0;
66   A->mapping          = 0;
67 
68   b->AIJ  = 0;
69   b->dof  = 0;
70   b->OAIJ = 0;
71   b->ctx  = 0;
72   b->w    = 0;
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /* ----------------------------------------------------------------------------------*/
78 EXTERN int MatMult_SeqAIJ(Mat,Vec,Vec);
79 EXTERN int MatMultAdd_SeqAIJ(Mat,Vec,Vec,Vec);
80 EXTERN int MatMultTranspose_SeqAIJ(Mat,Vec,Vec);
81 EXTERN int MatMultTransposeAdd_SeqAIJ(Mat,Vec,Vec,Vec);
82 
83 #undef __FUNC__
84 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_1"></a>*/"MatMult_SeqMAIJ_1"
85 int MatMult_SeqMAIJ_1(Mat A,Vec xx,Vec yy)
86 {
87   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
88   int      ierr;
89   PetscFunctionBegin;
90   ierr = MatMult_SeqAIJ(b->AIJ,xx,yy);
91   PetscFunctionReturn(0);
92 }
93 #undef __FUNC__
94 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_1"></a>*/"MatMultTranspose_SeqMAIJ_1"
95 int MatMultTranspose_SeqMAIJ_1(Mat A,Vec xx,Vec yy)
96 {
97   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
98   int      ierr;
99   PetscFunctionBegin;
100   ierr = MatMultTranspose_SeqAIJ(b->AIJ,xx,yy);
101   PetscFunctionReturn(0);
102 }
103 #undef __FUNC__
104 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_1"></a>*/"MatMultAdd_SeqMAIJ_1"
105 int MatMultAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
106 {
107   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
108   int      ierr;
109   PetscFunctionBegin;
110   ierr = MatMultAdd_SeqAIJ(b->AIJ,xx,yy,zz);
111   PetscFunctionReturn(0);
112 }
113 #undef __FUNC__
114 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_1"></a>*/"MatMultTransposeAdd_SeqMAIJ_1"
115 int MatMultTransposeAdd_SeqMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
116 {
117   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
118   int      ierr;
119   PetscFunctionBegin;
120   ierr = MatMultTransposeAdd_SeqAIJ(b->AIJ,xx,yy,zz);
121   PetscFunctionReturn(0);
122 }
123 
124 /* --------------------------------------------------------------------------------------*/
125 #undef __FUNC__
126 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2"
127 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
128 {
129   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
130   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
131   Scalar      *x,*y,*v,sum1, sum2;
132   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
133   int         n,i,jrow,j;
134 
135   PetscFunctionBegin;
136   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
137   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
138   x    = x + shift;    /* shift for Fortran start by 1 indexing */
139   idx  = a->j;
140   v    = a->a;
141   ii   = a->i;
142 
143   v    += shift; /* shift for Fortran start by 1 indexing */
144   idx  += shift;
145   for (i=0; i<m; i++) {
146     jrow = ii[i];
147     n    = ii[i+1] - jrow;
148     sum1  = 0.0;
149     sum2  = 0.0;
150     for (j=0; j<n; j++) {
151       sum1 += v[jrow]*x[2*idx[jrow]];
152       sum2 += v[jrow]*x[2*idx[jrow]+1];
153       jrow++;
154      }
155     y[2*i]   = sum1;
156     y[2*i+1] = sum2;
157   }
158 
159   PLogFlops(4*a->nz - 2*m);
160   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
161   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 #undef __FUNC__
166 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2"
167 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
168 {
169   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
170   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
171   Scalar     *x,*y,*v,alpha1,alpha2,zero = 0.0;
172   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
173 
174   PetscFunctionBegin;
175   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
176   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
177   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
178   y = y + shift; /* shift for Fortran start by 1 indexing */
179   for (i=0; i<m; i++) {
180     idx    = a->j + a->i[i] + shift;
181     v      = a->a + a->i[i] + shift;
182     n      = a->i[i+1] - a->i[i];
183     alpha1 = x[2*i];
184     alpha2 = x[2*i+1];
185     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
186   }
187   PLogFlops(4*a->nz - 2*a->n);
188   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
189   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
190   PetscFunctionReturn(0);
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2"
195 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
196 {
197   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
198   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
199   Scalar      *x,*y,*v,sum1, sum2;
200   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
201   int         n,i,jrow,j;
202 
203   PetscFunctionBegin;
204   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
205   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
206   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
207   x    = x + shift;    /* shift for Fortran start by 1 indexing */
208   idx  = a->j;
209   v    = a->a;
210   ii   = a->i;
211 
212   v    += shift; /* shift for Fortran start by 1 indexing */
213   idx  += shift;
214   for (i=0; i<m; i++) {
215     jrow = ii[i];
216     n    = ii[i+1] - jrow;
217     sum1  = 0.0;
218     sum2  = 0.0;
219     for (j=0; j<n; j++) {
220       sum1 += v[jrow]*x[2*idx[jrow]];
221       sum2 += v[jrow]*x[2*idx[jrow]+1];
222       jrow++;
223      }
224     y[2*i]   += sum1;
225     y[2*i+1] += sum2;
226   }
227 
228   PLogFlops(4*a->nz - 2*m);
229   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
230   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 #undef __FUNC__
234 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2"
235 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
236 {
237   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
238   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
239   Scalar     *x,*y,*v,alpha1,alpha2;
240   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
241 
242   PetscFunctionBegin;
243   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
244   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
245   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
246   y = y + shift; /* shift for Fortran start by 1 indexing */
247   for (i=0; i<m; i++) {
248     idx   = a->j + a->i[i] + shift;
249     v     = a->a + a->i[i] + shift;
250     n     = a->i[i+1] - a->i[i];
251     alpha1 = x[2*i];
252     alpha2 = x[2*i+1];
253     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
254   }
255   PLogFlops(4*a->nz - 2*a->n);
256   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
257   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 /* --------------------------------------------------------------------------------------*/
261 #undef __FUNC__
262 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3"
263 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
264 {
265   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
266   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
267   Scalar      *x,*y,*v,sum1, sum2, sum3;
268   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
269   int         n,i,jrow,j;
270 
271   PetscFunctionBegin;
272   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
273   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
274   x    = x + shift;    /* shift for Fortran start by 1 indexing */
275   idx  = a->j;
276   v    = a->a;
277   ii   = a->i;
278 
279   v    += shift; /* shift for Fortran start by 1 indexing */
280   idx  += shift;
281   for (i=0; i<m; i++) {
282     jrow = ii[i];
283     n    = ii[i+1] - jrow;
284     sum1  = 0.0;
285     sum2  = 0.0;
286     sum3  = 0.0;
287     for (j=0; j<n; j++) {
288       sum1 += v[jrow]*x[3*idx[jrow]];
289       sum2 += v[jrow]*x[3*idx[jrow]+1];
290       sum3 += v[jrow]*x[3*idx[jrow]+2];
291       jrow++;
292      }
293     y[3*i]   = sum1;
294     y[3*i+1] = sum2;
295     y[3*i+2] = sum3;
296   }
297 
298   PLogFlops(6*a->nz - 3*m);
299   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
300   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNC__
305 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3"
306 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
307 {
308   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
309   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
310   Scalar     *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
311   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
312 
313   PetscFunctionBegin;
314   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
315   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
316   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
317   y = y + shift; /* shift for Fortran start by 1 indexing */
318   for (i=0; i<m; i++) {
319     idx    = a->j + a->i[i] + shift;
320     v      = a->a + a->i[i] + shift;
321     n      = a->i[i+1] - a->i[i];
322     alpha1 = x[3*i];
323     alpha2 = x[3*i+1];
324     alpha3 = x[3*i+2];
325     while (n-->0) {
326       y[3*(*idx)]   += alpha1*(*v);
327       y[3*(*idx)+1] += alpha2*(*v);
328       y[3*(*idx)+2] += alpha3*(*v);
329       idx++; v++;
330     }
331   }
332   PLogFlops(6*a->nz - 3*a->n);
333   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
334   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 #undef __FUNC__
339 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3"
340 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
341 {
342   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
343   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
344   Scalar      *x,*y,*v,sum1, sum2, sum3;
345   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
346   int         n,i,jrow,j;
347 
348   PetscFunctionBegin;
349   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
352   x    = x + shift;    /* shift for Fortran start by 1 indexing */
353   idx  = a->j;
354   v    = a->a;
355   ii   = a->i;
356 
357   v    += shift; /* shift for Fortran start by 1 indexing */
358   idx  += shift;
359   for (i=0; i<m; i++) {
360     jrow = ii[i];
361     n    = ii[i+1] - jrow;
362     sum1  = 0.0;
363     sum2  = 0.0;
364     sum3  = 0.0;
365     for (j=0; j<n; j++) {
366       sum1 += v[jrow]*x[3*idx[jrow]];
367       sum2 += v[jrow]*x[3*idx[jrow]+1];
368       sum3 += v[jrow]*x[3*idx[jrow]+2];
369       jrow++;
370      }
371     y[3*i]   += sum1;
372     y[3*i+1] += sum2;
373     y[3*i+2] += sum3;
374   }
375 
376   PLogFlops(6*a->nz);
377   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
378   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 #undef __FUNC__
382 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3"
383 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
384 {
385   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
386   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
387   Scalar     *x,*y,*v,alpha1,alpha2,alpha3;
388   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
389 
390   PetscFunctionBegin;
391   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
392   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
393   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
394   y = y + shift; /* shift for Fortran start by 1 indexing */
395   for (i=0; i<m; i++) {
396     idx    = a->j + a->i[i] + shift;
397     v      = a->a + a->i[i] + shift;
398     n      = a->i[i+1] - a->i[i];
399     alpha1 = x[3*i];
400     alpha2 = x[3*i+1];
401     alpha3 = x[3*i+2];
402     while (n-->0) {
403       y[3*(*idx)]   += alpha1*(*v);
404       y[3*(*idx)+1] += alpha2*(*v);
405       y[3*(*idx)+2] += alpha3*(*v);
406       idx++; v++;
407     }
408   }
409   PLogFlops(6*a->nz);
410   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
411   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 
415 /* ------------------------------------------------------------------------------*/
416 #undef __FUNC__
417 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4"
418 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
419 {
420   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
421   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
422   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
423   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
424   int         n,i,jrow,j;
425 
426   PetscFunctionBegin;
427   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
428   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
429   x    = x + shift;    /* shift for Fortran start by 1 indexing */
430   idx  = a->j;
431   v    = a->a;
432   ii   = a->i;
433 
434   v    += shift; /* shift for Fortran start by 1 indexing */
435   idx  += shift;
436   for (i=0; i<m; i++) {
437     jrow = ii[i];
438     n    = ii[i+1] - jrow;
439     sum1  = 0.0;
440     sum2  = 0.0;
441     sum3  = 0.0;
442     sum4  = 0.0;
443     for (j=0; j<n; j++) {
444       sum1 += v[jrow]*x[4*idx[jrow]];
445       sum2 += v[jrow]*x[4*idx[jrow]+1];
446       sum3 += v[jrow]*x[4*idx[jrow]+2];
447       sum4 += v[jrow]*x[4*idx[jrow]+3];
448       jrow++;
449      }
450     y[4*i]   = sum1;
451     y[4*i+1] = sum2;
452     y[4*i+2] = sum3;
453     y[4*i+3] = sum4;
454   }
455 
456   PLogFlops(8*a->nz - 4*m);
457   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
458   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNC__
463 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4"
464 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
465 {
466   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
467   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
468   Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
469   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
470 
471   PetscFunctionBegin;
472   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
473   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
474   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
475   y = y + shift; /* shift for Fortran start by 1 indexing */
476   for (i=0; i<m; i++) {
477     idx    = a->j + a->i[i] + shift;
478     v      = a->a + a->i[i] + shift;
479     n      = a->i[i+1] - a->i[i];
480     alpha1 = x[4*i];
481     alpha2 = x[4*i+1];
482     alpha3 = x[4*i+2];
483     alpha4 = x[4*i+3];
484     while (n-->0) {
485       y[4*(*idx)]   += alpha1*(*v);
486       y[4*(*idx)+1] += alpha2*(*v);
487       y[4*(*idx)+2] += alpha3*(*v);
488       y[4*(*idx)+3] += alpha4*(*v);
489       idx++; v++;
490     }
491   }
492   PLogFlops(8*a->nz - 4*a->n);
493   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
494   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNC__
499 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4"
500 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
503   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
504   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
505   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
506   int         n,i,jrow,j;
507 
508   PetscFunctionBegin;
509   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
510   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
511   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
512   x    = x + shift;    /* shift for Fortran start by 1 indexing */
513   idx  = a->j;
514   v    = a->a;
515   ii   = a->i;
516 
517   v    += shift; /* shift for Fortran start by 1 indexing */
518   idx  += shift;
519   for (i=0; i<m; i++) {
520     jrow = ii[i];
521     n    = ii[i+1] - jrow;
522     sum1  = 0.0;
523     sum2  = 0.0;
524     sum3  = 0.0;
525     sum4  = 0.0;
526     for (j=0; j<n; j++) {
527       sum1 += v[jrow]*x[4*idx[jrow]];
528       sum2 += v[jrow]*x[4*idx[jrow]+1];
529       sum3 += v[jrow]*x[4*idx[jrow]+2];
530       sum4 += v[jrow]*x[4*idx[jrow]+3];
531       jrow++;
532      }
533     y[4*i]   += sum1;
534     y[4*i+1] += sum2;
535     y[4*i+2] += sum3;
536     y[4*i+3] += sum4;
537   }
538 
539   PLogFlops(8*a->nz - 4*m);
540   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
541   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 #undef __FUNC__
545 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4"
546 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
547 {
548   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
549   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
550   Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
551   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
552 
553   PetscFunctionBegin;
554   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
555   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
556   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
557   y = y + shift; /* shift for Fortran start by 1 indexing */
558   for (i=0; i<m; i++) {
559     idx    = a->j + a->i[i] + shift;
560     v      = a->a + a->i[i] + shift;
561     n      = a->i[i+1] - a->i[i];
562     alpha1 = x[4*i];
563     alpha2 = x[4*i+1];
564     alpha3 = x[4*i+2];
565     alpha4 = x[4*i+3];
566     while (n-->0) {
567       y[4*(*idx)]   += alpha1*(*v);
568       y[4*(*idx)+1] += alpha2*(*v);
569       y[4*(*idx)+2] += alpha3*(*v);
570       y[4*(*idx)+3] += alpha4*(*v);
571       idx++; v++;
572     }
573   }
574   PLogFlops(8*a->nz - 4*a->n);
575   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
576   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 
579 }
580 
581 /* ------------------------------------------------------------------------------*/
582 #undef __FUNC__
583 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5"
584 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
585 {
586   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
587   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
588   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
589   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
590   int         n,i,jrow,j;
591 
592   PetscFunctionBegin;
593   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
594   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
595   x    = x + shift;    /* shift for Fortran start by 1 indexing */
596   idx  = a->j;
597   v    = a->a;
598   ii   = a->i;
599 
600   v    += shift; /* shift for Fortran start by 1 indexing */
601   idx  += shift;
602   for (i=0; i<m; i++) {
603     jrow = ii[i];
604     n    = ii[i+1] - jrow;
605     sum1  = 0.0;
606     sum2  = 0.0;
607     sum3  = 0.0;
608     sum4  = 0.0;
609     sum5  = 0.0;
610     for (j=0; j<n; j++) {
611       sum1 += v[jrow]*x[5*idx[jrow]];
612       sum2 += v[jrow]*x[5*idx[jrow]+1];
613       sum3 += v[jrow]*x[5*idx[jrow]+2];
614       sum4 += v[jrow]*x[5*idx[jrow]+3];
615       sum5 += v[jrow]*x[5*idx[jrow]+4];
616       jrow++;
617      }
618     y[5*i]   = sum1;
619     y[5*i+1] = sum2;
620     y[5*i+2] = sum3;
621     y[5*i+3] = sum4;
622     y[5*i+4] = sum5;
623   }
624 
625   PLogFlops(10*a->nz - 5*m);
626   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
627   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNC__
632 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5"
633 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634 {
635   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
636   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
637   Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
638   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
639 
640   PetscFunctionBegin;
641   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
642   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
643   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
644   y = y + shift; /* shift for Fortran start by 1 indexing */
645   for (i=0; i<m; i++) {
646     idx    = a->j + a->i[i] + shift;
647     v      = a->a + a->i[i] + shift;
648     n      = a->i[i+1] - a->i[i];
649     alpha1 = x[5*i];
650     alpha2 = x[5*i+1];
651     alpha3 = x[5*i+2];
652     alpha4 = x[5*i+3];
653     alpha5 = x[5*i+4];
654     while (n-->0) {
655       y[5*(*idx)]   += alpha1*(*v);
656       y[5*(*idx)+1] += alpha2*(*v);
657       y[5*(*idx)+2] += alpha3*(*v);
658       y[5*(*idx)+3] += alpha4*(*v);
659       y[5*(*idx)+4] += alpha5*(*v);
660       idx++; v++;
661     }
662   }
663   PLogFlops(10*a->nz - 5*a->n);
664   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
665   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNC__
670 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5"
671 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MAIJ    *b = (Mat_MAIJ*)A->data;
674   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
675   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
676   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
677   int         n,i,jrow,j;
678 
679   PetscFunctionBegin;
680   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
681   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
682   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
683   x    = x + shift;    /* shift for Fortran start by 1 indexing */
684   idx  = a->j;
685   v    = a->a;
686   ii   = a->i;
687 
688   v    += shift; /* shift for Fortran start by 1 indexing */
689   idx  += shift;
690   for (i=0; i<m; i++) {
691     jrow = ii[i];
692     n    = ii[i+1] - jrow;
693     sum1  = 0.0;
694     sum2  = 0.0;
695     sum3  = 0.0;
696     sum4  = 0.0;
697     sum5  = 0.0;
698     for (j=0; j<n; j++) {
699       sum1 += v[jrow]*x[5*idx[jrow]];
700       sum2 += v[jrow]*x[5*idx[jrow]+1];
701       sum3 += v[jrow]*x[5*idx[jrow]+2];
702       sum4 += v[jrow]*x[5*idx[jrow]+3];
703       sum5 += v[jrow]*x[5*idx[jrow]+4];
704       jrow++;
705      }
706     y[5*i]   += sum1;
707     y[5*i+1] += sum2;
708     y[5*i+2] += sum3;
709     y[5*i+3] += sum4;
710     y[5*i+4] += sum5;
711   }
712 
713   PLogFlops(10*a->nz);
714   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
715   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNC__
720 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5"
721 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
722 {
723   Mat_MAIJ   *b = (Mat_MAIJ*)A->data;
724   Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
725   Scalar     *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
726   int        ierr,m = a->m,n,i,*idx,shift = a->indexshift;
727 
728   PetscFunctionBegin;
729   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
730   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
731   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
732   y = y + shift; /* shift for Fortran start by 1 indexing */
733   for (i=0; i<m; i++) {
734     idx    = a->j + a->i[i] + shift;
735     v      = a->a + a->i[i] + shift;
736     n      = a->i[i+1] - a->i[i];
737     alpha1 = x[5*i];
738     alpha2 = x[5*i+1];
739     alpha3 = x[5*i+2];
740     alpha4 = x[5*i+3];
741     alpha5 = x[5*i+4];
742     while (n-->0) {
743       y[5*(*idx)]   += alpha1*(*v);
744       y[5*(*idx)+1] += alpha2*(*v);
745       y[5*(*idx)+2] += alpha3*(*v);
746       y[5*(*idx)+3] += alpha4*(*v);
747       y[5*(*idx)+4] += alpha5*(*v);
748       idx++; v++;
749     }
750   }
751   PLogFlops(10*a->nz);
752   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
753   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 /*===================================================================================*/
758 EXTERN int MatMult_MPIAIJ(Mat,Vec,Vec);
759 EXTERN int MatMultAdd_MPIAIJ(Mat,Vec,Vec,Vec);
760 EXTERN int MatMultTranspose_MPIAIJ(Mat,Vec,Vec);
761 EXTERN int MatMultTransposeAdd_MPIAIJ(Mat,Vec,Vec,Vec);
762 
763 #undef __FUNC__
764 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_1"></a>*/"MatMult_MPIMAIJ_1"
765 int MatMult_MPIMAIJ_1(Mat A,Vec xx,Vec yy)
766 {
767   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
768   int      ierr;
769   PetscFunctionBegin;
770   ierr = MatMult_MPIAIJ(b->AIJ,xx,yy);
771   PetscFunctionReturn(0);
772 }
773 #undef __FUNC__
774 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_1"></a>*/"MatMultTranspose_MPIMAIJ_1"
775 int MatMultTranspose_MPIMAIJ_1(Mat A,Vec xx,Vec yy)
776 {
777   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
778   int      ierr;
779   PetscFunctionBegin;
780   ierr = MatMultTranspose_MPIAIJ(b->AIJ,xx,yy);
781   PetscFunctionReturn(0);
782 }
783 #undef __FUNC__
784 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_1"></a>*/"MatMultAdd_MPIMAIJ_1"
785 int MatMultAdd_MPIMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
786 {
787   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
788   int      ierr;
789   PetscFunctionBegin;
790   ierr = MatMultAdd_MPIAIJ(b->AIJ,xx,yy,zz);
791   PetscFunctionReturn(0);
792 }
793 #undef __FUNC__
794 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_1"></a>*/"MatMultTransposeAdd_MPIMAIJ_1"
795 int MatMultTransposeAdd_MPIMAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
796 {
797   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
798   int      ierr;
799   PetscFunctionBegin;
800   ierr = MatMultTransposeAdd_MPIAIJ(b->AIJ,xx,yy,zz);
801   PetscFunctionReturn(0);
802 }
803 
804 /* ---------------------------------------------------------------------------------- */
805 #undef __FUNC__
806 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof"
807 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
808 {
809   Mat_MAIJ *b = (Mat_MAIJ*)A->data;
810   int      ierr;
811   PetscFunctionBegin;
812 
813   /* start the scatter */
814   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
815   ierr = MatMult_SeqMAIJ_2(A,xx,yy);CHKERRQ(ierr);
816   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
817   ierr = MatMultAdd_SeqMAIJ_1(A,b->w,yy,yy);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 /* ---------------------------------------------------------------------------------- */
822 #undef __FUNC__
823 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ"
824 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
825 {
826   int      ierr,size,n;
827   Mat_MAIJ *b;
828   Mat      B;
829 
830   PetscFunctionBegin;
831   ierr = MATCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
832   ierr = MatSetType(B,MATMAIJ);CHKERRQ(ierr);
833 
834   B->assembled    = PETSC_TRUE;
835   B->ops->destroy = MatDestroy_MAIJ;
836   b = (Mat_MAIJ*)B->data;
837 
838   b->AIJ = A;
839   b->dof = dof;
840   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
841   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
842   if (size == 1) {
843     if (dof == 1) {
844       B->ops->mult             = MatMult_SeqMAIJ_1;
845       B->ops->multadd          = MatMultAdd_SeqMAIJ_1;
846       B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_1;
847       B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_1;
848     } else if (dof == 2) {
849       B->ops->mult             = MatMult_SeqMAIJ_2;
850       B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
851       B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
852       B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
853     } else if (dof == 3) {
854       B->ops->mult             = MatMult_SeqMAIJ_3;
855       B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
856       B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
857       B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
858     } else if (dof == 4) {
859       B->ops->mult             = MatMult_SeqMAIJ_4;
860       B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
861       B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
862       B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
863     } else if (dof == 5) {
864       B->ops->mult             = MatMult_SeqMAIJ_5;
865       B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
866       B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
867       B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
868     } else {
869       SETERRQ1(1,1,"Cannot handle a dof of %d\n",dof);
870     }
871   } else {
872     if (dof == 1) {
873       B->ops->mult             = MatMult_MPIMAIJ_1;
874       B->ops->multadd          = MatMultAdd_MPIMAIJ_1;
875       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_1;
876       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_1;
877     } else {
878       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
879       IS         from,to;
880       Vec        gvec;
881       int        *garray,i;
882 
883       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
884       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
885 
886       /* create two temporary Index sets for build scatter gather */
887       garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray);
888       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
889       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
890       ierr = PetscFree(garray);CHKERRQ(ierr);
891       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
892 
893       /* create temporary global vector to generate scatter context */
894       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
895 
896       /* generate the scatter context */
897       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
898 
899       ierr = ISDestroy(from);CHKERRQ(ierr);
900       ierr = ISDestroy(to);CHKERRQ(ierr);
901       ierr = VecDestroy(gvec);CHKERRQ(ierr);
902 
903       B->ops->mult = MatMult_MPIMAIJ_dof;
904     }
905   }
906   *maij = B;
907   PetscFunctionReturn(0);
908 }
909 
910 
911 
912 
913 
914 
915 
916 
917 
918 
919 
920 
921