xref: /petsc/src/mat/impls/maij/maij.c (revision 35d8aa7f675fa4ee5c80aa031e65d3ef307a4891)
1 /*$Id: maij.c,v 1.10 2000/10/10 19:24:12 bsmith Exp bsmith $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/aij/mpi/mpiaij.h"
20 
21 typedef struct {
22   int        dof;         /* number of components */
23   Mat        AIJ;        /* representation of interpolation for one component */
24 } Mat_SeqMAIJ;
25 
26 typedef struct {
27   int        dof;         /* number of components */
28   Mat        AIJ,OAIJ;    /* representation of interpolation for one component */
29   Mat        A;
30   VecScatter ctx;         /* update ghost points for parallel case */
31   Vec        w;           /* work space for ghost values for parallel case */
32 } Mat_MPIMAIJ;
33 
34 #undef __FUNC__
35 #define __FUNC__ /*<a name="MatMAIJGetAIJ"></a>*/"MatMAIJGetAIJ"
36 int MatMAIJGetAIJ(Mat A,Mat *B)
37 {
38   int         ierr;
39   PetscTruth  ismpimaij,isseqmaij;
40 
41   PetscFunctionBegin;
42   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
43   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
44   if (ismpimaij) {
45     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
46 
47     *B = b->A;
48   } else if (isseqmaij) {
49     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
50 
51     *B = b->AIJ;
52   } else {
53     *B = A;
54   }
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNC__
59 #define __FUNC__ /*<a name="MatMAIJRedimension"></a>*/"MatMAIJRedimension"
60 int MatMAIJRedimension(Mat A,int dof,Mat *B)
61 {
62   int ierr;
63   Mat Aij;
64 
65   PetscFunctionBegin;
66   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
67   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ /*<a name="MatDestroy_SeqMAIJ"></a>*/"MatDestroy_SeqMAIJ"
73 int MatDestroy_SeqMAIJ(Mat A)
74 {
75   int         ierr;
76   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
77 
78   PetscFunctionBegin;
79   if (b->AIJ) {
80     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
81   }
82   ierr = PetscFree(b);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNC__
87 #define __FUNC__ /*<a name="MatDestroy_MPIMAIJ"></a>*/"MatDestroy_MPIMAIJ"
88 int MatDestroy_MPIMAIJ(Mat A)
89 {
90   int         ierr;
91   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
92 
93   PetscFunctionBegin;
94   if (b->AIJ) {
95     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
96   }
97   if (b->OAIJ) {
98     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
99   }
100   if (b->A) {
101     ierr = MatDestroy(b->A);CHKERRQ(ierr);
102   }
103   if (b->ctx) {
104     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
105   }
106   if (b->w) {
107     ierr = VecDestroy(b->w);CHKERRQ(ierr);
108   }
109   ierr = PetscFree(b);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 EXTERN_C_BEGIN
114 #undef __FUNC__
115 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ"
116 int MatCreate_MAIJ(Mat A)
117 {
118   int         ierr;
119   Mat_MPIMAIJ *b;
120 
121   PetscFunctionBegin;
122   A->data             = (void*)(b = PetscNew(Mat_MPIMAIJ));CHKPTRQ(b);
123   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
124   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
125   A->factor           = 0;
126   A->mapping          = 0;
127 
128   b->AIJ  = 0;
129   b->dof  = 0;
130   b->OAIJ = 0;
131   b->ctx  = 0;
132   b->w    = 0;
133   PetscFunctionReturn(0);
134 }
135 EXTERN_C_END
136 
137 /* --------------------------------------------------------------------------------------*/
138 #undef __FUNC__
139 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2"
140 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
141 {
142   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
143   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
144   Scalar      *x,*y,*v,sum1, sum2;
145   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
146   int         n,i,jrow,j;
147 
148   PetscFunctionBegin;
149   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
150   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
151   x    = x + shift;    /* shift for Fortran start by 1 indexing */
152   idx  = a->j;
153   v    = a->a;
154   ii   = a->i;
155 
156   v    += shift; /* shift for Fortran start by 1 indexing */
157   idx  += shift;
158   for (i=0; i<m; i++) {
159     jrow = ii[i];
160     n    = ii[i+1] - jrow;
161     sum1  = 0.0;
162     sum2  = 0.0;
163     for (j=0; j<n; j++) {
164       sum1 += v[jrow]*x[2*idx[jrow]];
165       sum2 += v[jrow]*x[2*idx[jrow]+1];
166       jrow++;
167      }
168     y[2*i]   = sum1;
169     y[2*i+1] = sum2;
170   }
171 
172   PLogFlops(4*a->nz - 2*m);
173   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNC__
179 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2"
180 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
181 {
182   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
183   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
184   Scalar      *x,*y,*v,alpha1,alpha2,zero = 0.0;
185   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
186 
187   PetscFunctionBegin;
188   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
189   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
190   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
191   y = y + shift; /* shift for Fortran start by 1 indexing */
192   for (i=0; i<m; i++) {
193     idx    = a->j + a->i[i] + shift;
194     v      = a->a + a->i[i] + shift;
195     n      = a->i[i+1] - a->i[i];
196     alpha1 = x[2*i];
197     alpha2 = x[2*i+1];
198     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
199   }
200   PLogFlops(4*a->nz - 2*b->AIJ->n);
201   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
202   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNC__
207 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2"
208 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
209 {
210   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
211   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
212   Scalar      *x,*y,*v,sum1, sum2;
213   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
214   int         n,i,jrow,j;
215 
216   PetscFunctionBegin;
217   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
218   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
219   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
220   x    = x + shift;    /* shift for Fortran start by 1 indexing */
221   idx  = a->j;
222   v    = a->a;
223   ii   = a->i;
224 
225   v    += shift; /* shift for Fortran start by 1 indexing */
226   idx  += shift;
227   for (i=0; i<m; i++) {
228     jrow = ii[i];
229     n    = ii[i+1] - jrow;
230     sum1  = 0.0;
231     sum2  = 0.0;
232     for (j=0; j<n; j++) {
233       sum1 += v[jrow]*x[2*idx[jrow]];
234       sum2 += v[jrow]*x[2*idx[jrow]+1];
235       jrow++;
236      }
237     y[2*i]   += sum1;
238     y[2*i+1] += sum2;
239   }
240 
241   PLogFlops(4*a->nz - 2*m);
242   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 #undef __FUNC__
247 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2"
248 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
249 {
250   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
251   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
252   Scalar      *x,*y,*v,alpha1,alpha2;
253   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
254 
255   PetscFunctionBegin;
256   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
257   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
258   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
259   y = y + shift; /* shift for Fortran start by 1 indexing */
260   for (i=0; i<m; i++) {
261     idx   = a->j + a->i[i] + shift;
262     v     = a->a + a->i[i] + shift;
263     n     = a->i[i+1] - a->i[i];
264     alpha1 = x[2*i];
265     alpha2 = x[2*i+1];
266     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
267   }
268   PLogFlops(4*a->nz - 2*b->AIJ->n);
269   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
270   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 /* --------------------------------------------------------------------------------------*/
274 #undef __FUNC__
275 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3"
276 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
277 {
278   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
279   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
280   Scalar      *x,*y,*v,sum1, sum2, sum3;
281   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
282   int         n,i,jrow,j;
283 
284   PetscFunctionBegin;
285   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
286   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
287   x    = x + shift;    /* shift for Fortran start by 1 indexing */
288   idx  = a->j;
289   v    = a->a;
290   ii   = a->i;
291 
292   v    += shift; /* shift for Fortran start by 1 indexing */
293   idx  += shift;
294   for (i=0; i<m; i++) {
295     jrow = ii[i];
296     n    = ii[i+1] - jrow;
297     sum1  = 0.0;
298     sum2  = 0.0;
299     sum3  = 0.0;
300     for (j=0; j<n; j++) {
301       sum1 += v[jrow]*x[3*idx[jrow]];
302       sum2 += v[jrow]*x[3*idx[jrow]+1];
303       sum3 += v[jrow]*x[3*idx[jrow]+2];
304       jrow++;
305      }
306     y[3*i]   = sum1;
307     y[3*i+1] = sum2;
308     y[3*i+2] = sum3;
309   }
310 
311   PLogFlops(6*a->nz - 3*m);
312   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
313   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNC__
318 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3"
319 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
320 {
321   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
322   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
323   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
324   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
325 
326   PetscFunctionBegin;
327   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
328   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
329   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
330   y = y + shift; /* shift for Fortran start by 1 indexing */
331   for (i=0; i<m; i++) {
332     idx    = a->j + a->i[i] + shift;
333     v      = a->a + a->i[i] + shift;
334     n      = a->i[i+1] - a->i[i];
335     alpha1 = x[3*i];
336     alpha2 = x[3*i+1];
337     alpha3 = x[3*i+2];
338     while (n-->0) {
339       y[3*(*idx)]   += alpha1*(*v);
340       y[3*(*idx)+1] += alpha2*(*v);
341       y[3*(*idx)+2] += alpha3*(*v);
342       idx++; v++;
343     }
344   }
345   PLogFlops(6*a->nz - 3*b->AIJ->n);
346   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
347   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNC__
352 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3"
353 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
354 {
355   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
356   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
357   Scalar      *x,*y,*v,sum1, sum2, sum3;
358   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
359   int         n,i,jrow,j;
360 
361   PetscFunctionBegin;
362   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
363   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
364   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
365   x    = x + shift;    /* shift for Fortran start by 1 indexing */
366   idx  = a->j;
367   v    = a->a;
368   ii   = a->i;
369 
370   v    += shift; /* shift for Fortran start by 1 indexing */
371   idx  += shift;
372   for (i=0; i<m; i++) {
373     jrow = ii[i];
374     n    = ii[i+1] - jrow;
375     sum1  = 0.0;
376     sum2  = 0.0;
377     sum3  = 0.0;
378     for (j=0; j<n; j++) {
379       sum1 += v[jrow]*x[3*idx[jrow]];
380       sum2 += v[jrow]*x[3*idx[jrow]+1];
381       sum3 += v[jrow]*x[3*idx[jrow]+2];
382       jrow++;
383      }
384     y[3*i]   += sum1;
385     y[3*i+1] += sum2;
386     y[3*i+2] += sum3;
387   }
388 
389   PLogFlops(6*a->nz);
390   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
391   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
392   PetscFunctionReturn(0);
393 }
394 #undef __FUNC__
395 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3"
396 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
397 {
398   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
399   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
400   Scalar      *x,*y,*v,alpha1,alpha2,alpha3;
401   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
402 
403   PetscFunctionBegin;
404   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
405   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
406   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
407   y = y + shift; /* shift for Fortran start by 1 indexing */
408   for (i=0; i<m; i++) {
409     idx    = a->j + a->i[i] + shift;
410     v      = a->a + a->i[i] + shift;
411     n      = a->i[i+1] - a->i[i];
412     alpha1 = x[3*i];
413     alpha2 = x[3*i+1];
414     alpha3 = x[3*i+2];
415     while (n-->0) {
416       y[3*(*idx)]   += alpha1*(*v);
417       y[3*(*idx)+1] += alpha2*(*v);
418       y[3*(*idx)+2] += alpha3*(*v);
419       idx++; v++;
420     }
421   }
422   PLogFlops(6*a->nz);
423   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
424   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 /* ------------------------------------------------------------------------------*/
429 #undef __FUNC__
430 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4"
431 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
432 {
433   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
434   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
435   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
436   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
437   int         n,i,jrow,j;
438 
439   PetscFunctionBegin;
440   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
441   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
442   x    = x + shift;    /* shift for Fortran start by 1 indexing */
443   idx  = a->j;
444   v    = a->a;
445   ii   = a->i;
446 
447   v    += shift; /* shift for Fortran start by 1 indexing */
448   idx  += shift;
449   for (i=0; i<m; i++) {
450     jrow = ii[i];
451     n    = ii[i+1] - jrow;
452     sum1  = 0.0;
453     sum2  = 0.0;
454     sum3  = 0.0;
455     sum4  = 0.0;
456     for (j=0; j<n; j++) {
457       sum1 += v[jrow]*x[4*idx[jrow]];
458       sum2 += v[jrow]*x[4*idx[jrow]+1];
459       sum3 += v[jrow]*x[4*idx[jrow]+2];
460       sum4 += v[jrow]*x[4*idx[jrow]+3];
461       jrow++;
462      }
463     y[4*i]   = sum1;
464     y[4*i+1] = sum2;
465     y[4*i+2] = sum3;
466     y[4*i+3] = sum4;
467   }
468 
469   PLogFlops(8*a->nz - 4*m);
470   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
471   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNC__
476 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4"
477 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
478 {
479   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
480   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
481   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
482   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
483 
484   PetscFunctionBegin;
485   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
486   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
487   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
488   y = y + shift; /* shift for Fortran start by 1 indexing */
489   for (i=0; i<m; i++) {
490     idx    = a->j + a->i[i] + shift;
491     v      = a->a + a->i[i] + shift;
492     n      = a->i[i+1] - a->i[i];
493     alpha1 = x[4*i];
494     alpha2 = x[4*i+1];
495     alpha3 = x[4*i+2];
496     alpha4 = x[4*i+3];
497     while (n-->0) {
498       y[4*(*idx)]   += alpha1*(*v);
499       y[4*(*idx)+1] += alpha2*(*v);
500       y[4*(*idx)+2] += alpha3*(*v);
501       y[4*(*idx)+3] += alpha4*(*v);
502       idx++; v++;
503     }
504   }
505   PLogFlops(8*a->nz - 4*b->AIJ->n);
506   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
507   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNC__
512 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4"
513 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
514 {
515   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
516   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
517   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
518   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
519   int         n,i,jrow,j;
520 
521   PetscFunctionBegin;
522   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
523   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
524   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
525   x    = x + shift;    /* shift for Fortran start by 1 indexing */
526   idx  = a->j;
527   v    = a->a;
528   ii   = a->i;
529 
530   v    += shift; /* shift for Fortran start by 1 indexing */
531   idx  += shift;
532   for (i=0; i<m; i++) {
533     jrow = ii[i];
534     n    = ii[i+1] - jrow;
535     sum1  = 0.0;
536     sum2  = 0.0;
537     sum3  = 0.0;
538     sum4  = 0.0;
539     for (j=0; j<n; j++) {
540       sum1 += v[jrow]*x[4*idx[jrow]];
541       sum2 += v[jrow]*x[4*idx[jrow]+1];
542       sum3 += v[jrow]*x[4*idx[jrow]+2];
543       sum4 += v[jrow]*x[4*idx[jrow]+3];
544       jrow++;
545      }
546     y[4*i]   += sum1;
547     y[4*i+1] += sum2;
548     y[4*i+2] += sum3;
549     y[4*i+3] += sum4;
550   }
551 
552   PLogFlops(8*a->nz - 4*m);
553   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
554   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 #undef __FUNC__
558 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4"
559 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
560 {
561   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
562   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
563   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
564   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
565 
566   PetscFunctionBegin;
567   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
568   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
569   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
570   y = y + shift; /* shift for Fortran start by 1 indexing */
571   for (i=0; i<m; i++) {
572     idx    = a->j + a->i[i] + shift;
573     v      = a->a + a->i[i] + shift;
574     n      = a->i[i+1] - a->i[i];
575     alpha1 = x[4*i];
576     alpha2 = x[4*i+1];
577     alpha3 = x[4*i+2];
578     alpha4 = x[4*i+3];
579     while (n-->0) {
580       y[4*(*idx)]   += alpha1*(*v);
581       y[4*(*idx)+1] += alpha2*(*v);
582       y[4*(*idx)+2] += alpha3*(*v);
583       y[4*(*idx)+3] += alpha4*(*v);
584       idx++; v++;
585     }
586   }
587   PLogFlops(8*a->nz - 4*b->AIJ->n);
588   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
589   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 
592 }
593 
594 /* ------------------------------------------------------------------------------*/
595 #undef __FUNC__
596 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5"
597 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
598 {
599   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
600   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
601   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
602   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
603   int         n,i,jrow,j;
604 
605   PetscFunctionBegin;
606   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
607   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
608   x    = x + shift;    /* shift for Fortran start by 1 indexing */
609   idx  = a->j;
610   v    = a->a;
611   ii   = a->i;
612 
613   v    += shift; /* shift for Fortran start by 1 indexing */
614   idx  += shift;
615   for (i=0; i<m; i++) {
616     jrow = ii[i];
617     n    = ii[i+1] - jrow;
618     sum1  = 0.0;
619     sum2  = 0.0;
620     sum3  = 0.0;
621     sum4  = 0.0;
622     sum5  = 0.0;
623     for (j=0; j<n; j++) {
624       sum1 += v[jrow]*x[5*idx[jrow]];
625       sum2 += v[jrow]*x[5*idx[jrow]+1];
626       sum3 += v[jrow]*x[5*idx[jrow]+2];
627       sum4 += v[jrow]*x[5*idx[jrow]+3];
628       sum5 += v[jrow]*x[5*idx[jrow]+4];
629       jrow++;
630      }
631     y[5*i]   = sum1;
632     y[5*i+1] = sum2;
633     y[5*i+2] = sum3;
634     y[5*i+3] = sum4;
635     y[5*i+4] = sum5;
636   }
637 
638   PLogFlops(10*a->nz - 5*m);
639   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
640   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 #undef __FUNC__
645 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5"
646 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
647 {
648   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
649   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
650   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
651   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
652 
653   PetscFunctionBegin;
654   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
655   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
656   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
657   y = y + shift; /* shift for Fortran start by 1 indexing */
658   for (i=0; i<m; i++) {
659     idx    = a->j + a->i[i] + shift;
660     v      = a->a + a->i[i] + shift;
661     n      = a->i[i+1] - a->i[i];
662     alpha1 = x[5*i];
663     alpha2 = x[5*i+1];
664     alpha3 = x[5*i+2];
665     alpha4 = x[5*i+3];
666     alpha5 = x[5*i+4];
667     while (n-->0) {
668       y[5*(*idx)]   += alpha1*(*v);
669       y[5*(*idx)+1] += alpha2*(*v);
670       y[5*(*idx)+2] += alpha3*(*v);
671       y[5*(*idx)+3] += alpha4*(*v);
672       y[5*(*idx)+4] += alpha5*(*v);
673       idx++; v++;
674     }
675   }
676   PLogFlops(10*a->nz - 5*b->AIJ->n);
677   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
678   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNC__
683 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5"
684 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
685 {
686   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
687   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
688   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
689   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
690   int         n,i,jrow,j;
691 
692   PetscFunctionBegin;
693   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
694   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
695   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
696   x    = x + shift;    /* shift for Fortran start by 1 indexing */
697   idx  = a->j;
698   v    = a->a;
699   ii   = a->i;
700 
701   v    += shift; /* shift for Fortran start by 1 indexing */
702   idx  += shift;
703   for (i=0; i<m; i++) {
704     jrow = ii[i];
705     n    = ii[i+1] - jrow;
706     sum1  = 0.0;
707     sum2  = 0.0;
708     sum3  = 0.0;
709     sum4  = 0.0;
710     sum5  = 0.0;
711     for (j=0; j<n; j++) {
712       sum1 += v[jrow]*x[5*idx[jrow]];
713       sum2 += v[jrow]*x[5*idx[jrow]+1];
714       sum3 += v[jrow]*x[5*idx[jrow]+2];
715       sum4 += v[jrow]*x[5*idx[jrow]+3];
716       sum5 += v[jrow]*x[5*idx[jrow]+4];
717       jrow++;
718      }
719     y[5*i]   += sum1;
720     y[5*i+1] += sum2;
721     y[5*i+2] += sum3;
722     y[5*i+3] += sum4;
723     y[5*i+4] += sum5;
724   }
725 
726   PLogFlops(10*a->nz);
727   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
728   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNC__
733 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5"
734 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
735 {
736   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
737   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
738   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
739   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
740 
741   PetscFunctionBegin;
742   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
743   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
744   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
745   y = y + shift; /* shift for Fortran start by 1 indexing */
746   for (i=0; i<m; i++) {
747     idx    = a->j + a->i[i] + shift;
748     v      = a->a + a->i[i] + shift;
749     n      = a->i[i+1] - a->i[i];
750     alpha1 = x[5*i];
751     alpha2 = x[5*i+1];
752     alpha3 = x[5*i+2];
753     alpha4 = x[5*i+3];
754     alpha5 = x[5*i+4];
755     while (n-->0) {
756       y[5*(*idx)]   += alpha1*(*v);
757       y[5*(*idx)+1] += alpha2*(*v);
758       y[5*(*idx)+2] += alpha3*(*v);
759       y[5*(*idx)+3] += alpha4*(*v);
760       y[5*(*idx)+4] += alpha5*(*v);
761       idx++; v++;
762     }
763   }
764   PLogFlops(10*a->nz);
765   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
766   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 /*===================================================================================*/
771 #undef __FUNC__
772 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof"
773 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
774 {
775   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
776   int         ierr;
777   PetscFunctionBegin;
778 
779   /* start the scatter */
780   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
781   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
782   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
783   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNC__
788 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_dof"></a>*/"MatMultTranspose_MPIMAIJ_dof"
789 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
790 {
791   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
792   int         ierr;
793   PetscFunctionBegin;
794   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
795   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
796   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
797   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNC__
802 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_dof"></a>*/"MatMultAdd_MPIMAIJ_dof"
803 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
804 {
805   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
806   int         ierr;
807   PetscFunctionBegin;
808 
809   /* start the scatter */
810   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
811   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
812   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
813   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNC__
818 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_dof"></a>*/"MatMultTransposeAdd_MPIMAIJ_dof"
819 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
820 {
821   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
822   int         ierr;
823   PetscFunctionBegin;
824   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
825   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
826   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
827   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 /* ---------------------------------------------------------------------------------- */
832 #undef __FUNC__
833 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ"
834 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
835 {
836   int         ierr,size,n;
837   Mat_MPIMAIJ *b;
838   Mat         B;
839 
840   PetscFunctionBegin;
841   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
842 
843   if (dof == 1) {
844     *maij = A;
845   } else {
846     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
847     B->assembled    = PETSC_TRUE;
848 
849     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
850     if (size == 1) {
851       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
852       B->ops->destroy = MatDestroy_SeqMAIJ;
853       b      = (Mat_MPIMAIJ*)B->data;
854       b->dof = dof;
855       b->AIJ = A;
856       if (dof == 2) {
857         B->ops->mult             = MatMult_SeqMAIJ_2;
858         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
859         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
860         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
861       } else if (dof == 3) {
862         B->ops->mult             = MatMult_SeqMAIJ_3;
863         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
864         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
865         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
866       } else if (dof == 4) {
867         B->ops->mult             = MatMult_SeqMAIJ_4;
868         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
869         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
870         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
871       } else if (dof == 5) {
872         B->ops->mult             = MatMult_SeqMAIJ_5;
873         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
874         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
875         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
876       } else {
877         SETERRQ1(1,"Cannot handle a dof of %d\n",dof);
878       }
879     } else {
880       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
881       IS         from,to;
882       Vec        gvec;
883       int        *garray,i;
884 
885       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
886       B->ops->destroy = MatDestroy_MPIMAIJ;
887       b      = (Mat_MPIMAIJ*)B->data;
888       b->dof = dof;
889       b->A   = A;
890       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
891       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
892 
893       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
894       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
895 
896       /* create two temporary Index sets for build scatter gather */
897       garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray);
898       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
899       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
900       ierr = PetscFree(garray);CHKERRQ(ierr);
901       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
902 
903       /* create temporary global vector to generate scatter context */
904       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
905 
906       /* generate the scatter context */
907       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
908 
909       ierr = ISDestroy(from);CHKERRQ(ierr);
910       ierr = ISDestroy(to);CHKERRQ(ierr);
911       ierr = VecDestroy(gvec);CHKERRQ(ierr);
912 
913       B->ops->mult             = MatMult_MPIMAIJ_dof;
914       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
915       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
916       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
917     }
918     *maij = B;
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 
924 
925 
926 
927 
928 
929 
930 
931 
932 
933 
934