xref: /petsc/src/mat/impls/maij/maij.c (revision 7843d17afa8c1ec0c9a03b3c2ea6d3e3db42e252)
1 /*$Id: maij.c,v 1.9 2000/09/28 21:11:37 bsmith Exp bsmith $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/aij/mpi/mpiaij.h"
20 
21 typedef struct {
22   int        dof;         /* number of components */
23   Mat        AIJ;        /* representation of interpolation for one component */
24 } Mat_SeqMAIJ;
25 
26 typedef struct {
27   int        dof;         /* number of components */
28   Mat        AIJ,OAIJ;    /* representation of interpolation for one component */
29   Mat        A;
30   VecScatter ctx;         /* update ghost points for parallel case */
31   Vec        w;           /* work space for ghost values for parallel case */
32 } Mat_MPIMAIJ;
33 
34 #undef __FUNC__
35 #define __FUNC__ /*<a name="MatMAIJGetAIJ"></a>*/"MatMAIJGetAIJ"
36 int MatMAIJGetAIJ(Mat A,Mat *B)
37 {
38   int         ierr;
39   PetscTruth  ismpimaij,isseqmaij;
40 
41   PetscFunctionBegin;
42   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
43   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
44   if (ismpimaij) {
45     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
46 
47     *B = b->A;
48   } else if (isseqmaij) {
49     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
50 
51     *B = b->AIJ;
52   } else {
53     *B = A;
54   }
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNC__
59 #define __FUNC__ /*<a name="MatMAIJRedimension"></a>*/"MatMAIJRedimension"
60 int MatMAIJRedimension(Mat A,int dof,Mat *B)
61 {
62   int ierr;
63   Mat Aij;
64 
65   PetscFunctionBegin;
66   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
67   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ /*<a name="MatDestroy_SeqMAIJ"></a>*/"MatDestroy_SeqMAIJ"
73 int MatDestroy_SeqMAIJ(Mat A)
74 {
75   int         ierr;
76   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
77 
78   PetscFunctionBegin;
79   if (b->AIJ) {
80     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
81   }
82   ierr = PetscFree(b);CHKERRQ(ierr);
83   PetscHeaderDestroy(A);
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ /*<a name="MatDestroy_MPIMAIJ"></a>*/"MatDestroy_MPIMAIJ"
89 int MatDestroy_MPIMAIJ(Mat A)
90 {
91   int         ierr;
92   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
93 
94   PetscFunctionBegin;
95   if (b->AIJ) {
96     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
97   }
98   if (b->OAIJ) {
99     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
100   }
101   if (b->A) {
102     ierr = MatDestroy(b->A);CHKERRQ(ierr);
103   }
104   if (b->ctx) {
105     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
106   }
107   if (b->w) {
108     ierr = VecDestroy(b->w);CHKERRQ(ierr);
109   }
110   ierr = PetscFree(b);CHKERRQ(ierr);
111   PetscHeaderDestroy(A);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNC__
116 #define __FUNC__ /*<a name="MatGetSize_MAIJ"></a>*/"MatGetSize_MAIJ"
117 static int MatGetSize_MAIJ(Mat A,int *m,int *n)
118 {
119   PetscFunctionBegin;
120   if (m) *m = A->M;
121   if (n) *n = A->N;
122   PetscFunctionReturn(0);
123 }
124 
125 EXTERN_C_BEGIN
126 #undef __FUNC__
127 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ"
128 int MatCreate_MAIJ(Mat A)
129 {
130   int         ierr;
131   Mat_MPIMAIJ *b;
132 
133   PetscFunctionBegin;
134   A->data             = (void*)(b = PetscNew(Mat_MPIMAIJ));CHKPTRQ(b);
135   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
136   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
137   A->ops->getsize     = MatGetSize_MAIJ;
138   A->factor           = 0;
139   A->mapping          = 0;
140 
141   b->AIJ  = 0;
142   b->dof  = 0;
143   b->OAIJ = 0;
144   b->ctx  = 0;
145   b->w    = 0;
146   PetscFunctionReturn(0);
147 }
148 EXTERN_C_END
149 
150 /* --------------------------------------------------------------------------------------*/
151 #undef __FUNC__
152 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2"
153 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
154 {
155   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
156   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
157   Scalar      *x,*y,*v,sum1, sum2;
158   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
159   int         n,i,jrow,j;
160 
161   PetscFunctionBegin;
162   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
163   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
164   x    = x + shift;    /* shift for Fortran start by 1 indexing */
165   idx  = a->j;
166   v    = a->a;
167   ii   = a->i;
168 
169   v    += shift; /* shift for Fortran start by 1 indexing */
170   idx  += shift;
171   for (i=0; i<m; i++) {
172     jrow = ii[i];
173     n    = ii[i+1] - jrow;
174     sum1  = 0.0;
175     sum2  = 0.0;
176     for (j=0; j<n; j++) {
177       sum1 += v[jrow]*x[2*idx[jrow]];
178       sum2 += v[jrow]*x[2*idx[jrow]+1];
179       jrow++;
180      }
181     y[2*i]   = sum1;
182     y[2*i+1] = sum2;
183   }
184 
185   PLogFlops(4*a->nz - 2*m);
186   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
187   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNC__
192 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2"
193 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
194 {
195   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
196   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
197   Scalar      *x,*y,*v,alpha1,alpha2,zero = 0.0;
198   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
199 
200   PetscFunctionBegin;
201   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
202   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
203   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
204   y = y + shift; /* shift for Fortran start by 1 indexing */
205   for (i=0; i<m; i++) {
206     idx    = a->j + a->i[i] + shift;
207     v      = a->a + a->i[i] + shift;
208     n      = a->i[i+1] - a->i[i];
209     alpha1 = x[2*i];
210     alpha2 = x[2*i+1];
211     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
212   }
213   PLogFlops(4*a->nz - 2*a->n);
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNC__
220 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2"
221 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
222 {
223   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
224   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
225   Scalar      *x,*y,*v,sum1, sum2;
226   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
227   int         n,i,jrow,j;
228 
229   PetscFunctionBegin;
230   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
231   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
232   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
233   x    = x + shift;    /* shift for Fortran start by 1 indexing */
234   idx  = a->j;
235   v    = a->a;
236   ii   = a->i;
237 
238   v    += shift; /* shift for Fortran start by 1 indexing */
239   idx  += shift;
240   for (i=0; i<m; i++) {
241     jrow = ii[i];
242     n    = ii[i+1] - jrow;
243     sum1  = 0.0;
244     sum2  = 0.0;
245     for (j=0; j<n; j++) {
246       sum1 += v[jrow]*x[2*idx[jrow]];
247       sum2 += v[jrow]*x[2*idx[jrow]+1];
248       jrow++;
249      }
250     y[2*i]   += sum1;
251     y[2*i+1] += sum2;
252   }
253 
254   PLogFlops(4*a->nz - 2*m);
255   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
256   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
257   PetscFunctionReturn(0);
258 }
259 #undef __FUNC__
260 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2"
261 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
262 {
263   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
264   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
265   Scalar      *x,*y,*v,alpha1,alpha2;
266   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
267 
268   PetscFunctionBegin;
269   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
270   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
271   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
272   y = y + shift; /* shift for Fortran start by 1 indexing */
273   for (i=0; i<m; i++) {
274     idx   = a->j + a->i[i] + shift;
275     v     = a->a + a->i[i] + shift;
276     n     = a->i[i+1] - a->i[i];
277     alpha1 = x[2*i];
278     alpha2 = x[2*i+1];
279     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
280   }
281   PLogFlops(4*a->nz - 2*a->n);
282   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
283   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 /* --------------------------------------------------------------------------------------*/
287 #undef __FUNC__
288 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3"
289 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
290 {
291   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
292   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
293   Scalar      *x,*y,*v,sum1, sum2, sum3;
294   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
295   int         n,i,jrow,j;
296 
297   PetscFunctionBegin;
298   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
299   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
300   x    = x + shift;    /* shift for Fortran start by 1 indexing */
301   idx  = a->j;
302   v    = a->a;
303   ii   = a->i;
304 
305   v    += shift; /* shift for Fortran start by 1 indexing */
306   idx  += shift;
307   for (i=0; i<m; i++) {
308     jrow = ii[i];
309     n    = ii[i+1] - jrow;
310     sum1  = 0.0;
311     sum2  = 0.0;
312     sum3  = 0.0;
313     for (j=0; j<n; j++) {
314       sum1 += v[jrow]*x[3*idx[jrow]];
315       sum2 += v[jrow]*x[3*idx[jrow]+1];
316       sum3 += v[jrow]*x[3*idx[jrow]+2];
317       jrow++;
318      }
319     y[3*i]   = sum1;
320     y[3*i+1] = sum2;
321     y[3*i+2] = sum3;
322   }
323 
324   PLogFlops(6*a->nz - 3*m);
325   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
326   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNC__
331 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3"
332 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
333 {
334   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
335   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
336   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
337   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
338 
339   PetscFunctionBegin;
340   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
341   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
342   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
343   y = y + shift; /* shift for Fortran start by 1 indexing */
344   for (i=0; i<m; i++) {
345     idx    = a->j + a->i[i] + shift;
346     v      = a->a + a->i[i] + shift;
347     n      = a->i[i+1] - a->i[i];
348     alpha1 = x[3*i];
349     alpha2 = x[3*i+1];
350     alpha3 = x[3*i+2];
351     while (n-->0) {
352       y[3*(*idx)]   += alpha1*(*v);
353       y[3*(*idx)+1] += alpha2*(*v);
354       y[3*(*idx)+2] += alpha3*(*v);
355       idx++; v++;
356     }
357   }
358   PLogFlops(6*a->nz - 3*a->n);
359   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
360   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNC__
365 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3"
366 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
367 {
368   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
369   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
370   Scalar      *x,*y,*v,sum1, sum2, sum3;
371   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
372   int         n,i,jrow,j;
373 
374   PetscFunctionBegin;
375   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
376   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
377   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
378   x    = x + shift;    /* shift for Fortran start by 1 indexing */
379   idx  = a->j;
380   v    = a->a;
381   ii   = a->i;
382 
383   v    += shift; /* shift for Fortran start by 1 indexing */
384   idx  += shift;
385   for (i=0; i<m; i++) {
386     jrow = ii[i];
387     n    = ii[i+1] - jrow;
388     sum1  = 0.0;
389     sum2  = 0.0;
390     sum3  = 0.0;
391     for (j=0; j<n; j++) {
392       sum1 += v[jrow]*x[3*idx[jrow]];
393       sum2 += v[jrow]*x[3*idx[jrow]+1];
394       sum3 += v[jrow]*x[3*idx[jrow]+2];
395       jrow++;
396      }
397     y[3*i]   += sum1;
398     y[3*i+1] += sum2;
399     y[3*i+2] += sum3;
400   }
401 
402   PLogFlops(6*a->nz);
403   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
404   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 #undef __FUNC__
408 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3"
409 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
410 {
411   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
412   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
413   Scalar      *x,*y,*v,alpha1,alpha2,alpha3;
414   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
415 
416   PetscFunctionBegin;
417   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
418   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
419   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
420   y = y + shift; /* shift for Fortran start by 1 indexing */
421   for (i=0; i<m; i++) {
422     idx    = a->j + a->i[i] + shift;
423     v      = a->a + a->i[i] + shift;
424     n      = a->i[i+1] - a->i[i];
425     alpha1 = x[3*i];
426     alpha2 = x[3*i+1];
427     alpha3 = x[3*i+2];
428     while (n-->0) {
429       y[3*(*idx)]   += alpha1*(*v);
430       y[3*(*idx)+1] += alpha2*(*v);
431       y[3*(*idx)+2] += alpha3*(*v);
432       idx++; v++;
433     }
434   }
435   PLogFlops(6*a->nz);
436   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
437   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 /* ------------------------------------------------------------------------------*/
442 #undef __FUNC__
443 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4"
444 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
445 {
446   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
447   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
448   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
449   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
450   int         n,i,jrow,j;
451 
452   PetscFunctionBegin;
453   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
454   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
455   x    = x + shift;    /* shift for Fortran start by 1 indexing */
456   idx  = a->j;
457   v    = a->a;
458   ii   = a->i;
459 
460   v    += shift; /* shift for Fortran start by 1 indexing */
461   idx  += shift;
462   for (i=0; i<m; i++) {
463     jrow = ii[i];
464     n    = ii[i+1] - jrow;
465     sum1  = 0.0;
466     sum2  = 0.0;
467     sum3  = 0.0;
468     sum4  = 0.0;
469     for (j=0; j<n; j++) {
470       sum1 += v[jrow]*x[4*idx[jrow]];
471       sum2 += v[jrow]*x[4*idx[jrow]+1];
472       sum3 += v[jrow]*x[4*idx[jrow]+2];
473       sum4 += v[jrow]*x[4*idx[jrow]+3];
474       jrow++;
475      }
476     y[4*i]   = sum1;
477     y[4*i+1] = sum2;
478     y[4*i+2] = sum3;
479     y[4*i+3] = sum4;
480   }
481 
482   PLogFlops(8*a->nz - 4*m);
483   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
484   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNC__
489 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4"
490 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
491 {
492   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
493   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
494   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
495   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
496 
497   PetscFunctionBegin;
498   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
499   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
500   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
501   y = y + shift; /* shift for Fortran start by 1 indexing */
502   for (i=0; i<m; i++) {
503     idx    = a->j + a->i[i] + shift;
504     v      = a->a + a->i[i] + shift;
505     n      = a->i[i+1] - a->i[i];
506     alpha1 = x[4*i];
507     alpha2 = x[4*i+1];
508     alpha3 = x[4*i+2];
509     alpha4 = x[4*i+3];
510     while (n-->0) {
511       y[4*(*idx)]   += alpha1*(*v);
512       y[4*(*idx)+1] += alpha2*(*v);
513       y[4*(*idx)+2] += alpha3*(*v);
514       y[4*(*idx)+3] += alpha4*(*v);
515       idx++; v++;
516     }
517   }
518   PLogFlops(8*a->nz - 4*a->n);
519   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
520   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNC__
525 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4"
526 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
527 {
528   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
529   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
530   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
531   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
532   int         n,i,jrow,j;
533 
534   PetscFunctionBegin;
535   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
536   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
537   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
538   x    = x + shift;    /* shift for Fortran start by 1 indexing */
539   idx  = a->j;
540   v    = a->a;
541   ii   = a->i;
542 
543   v    += shift; /* shift for Fortran start by 1 indexing */
544   idx  += shift;
545   for (i=0; i<m; i++) {
546     jrow = ii[i];
547     n    = ii[i+1] - jrow;
548     sum1  = 0.0;
549     sum2  = 0.0;
550     sum3  = 0.0;
551     sum4  = 0.0;
552     for (j=0; j<n; j++) {
553       sum1 += v[jrow]*x[4*idx[jrow]];
554       sum2 += v[jrow]*x[4*idx[jrow]+1];
555       sum3 += v[jrow]*x[4*idx[jrow]+2];
556       sum4 += v[jrow]*x[4*idx[jrow]+3];
557       jrow++;
558      }
559     y[4*i]   += sum1;
560     y[4*i+1] += sum2;
561     y[4*i+2] += sum3;
562     y[4*i+3] += sum4;
563   }
564 
565   PLogFlops(8*a->nz - 4*m);
566   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
567   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 #undef __FUNC__
571 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4"
572 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573 {
574   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
575   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
576   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
577   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
578 
579   PetscFunctionBegin;
580   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
581   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
582   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
583   y = y + shift; /* shift for Fortran start by 1 indexing */
584   for (i=0; i<m; i++) {
585     idx    = a->j + a->i[i] + shift;
586     v      = a->a + a->i[i] + shift;
587     n      = a->i[i+1] - a->i[i];
588     alpha1 = x[4*i];
589     alpha2 = x[4*i+1];
590     alpha3 = x[4*i+2];
591     alpha4 = x[4*i+3];
592     while (n-->0) {
593       y[4*(*idx)]   += alpha1*(*v);
594       y[4*(*idx)+1] += alpha2*(*v);
595       y[4*(*idx)+2] += alpha3*(*v);
596       y[4*(*idx)+3] += alpha4*(*v);
597       idx++; v++;
598     }
599   }
600   PLogFlops(8*a->nz - 4*a->n);
601   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
602   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 
605 }
606 
607 /* ------------------------------------------------------------------------------*/
608 #undef __FUNC__
609 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5"
610 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
611 {
612   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
613   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
614   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
615   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
616   int         n,i,jrow,j;
617 
618   PetscFunctionBegin;
619   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
620   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
621   x    = x + shift;    /* shift for Fortran start by 1 indexing */
622   idx  = a->j;
623   v    = a->a;
624   ii   = a->i;
625 
626   v    += shift; /* shift for Fortran start by 1 indexing */
627   idx  += shift;
628   for (i=0; i<m; i++) {
629     jrow = ii[i];
630     n    = ii[i+1] - jrow;
631     sum1  = 0.0;
632     sum2  = 0.0;
633     sum3  = 0.0;
634     sum4  = 0.0;
635     sum5  = 0.0;
636     for (j=0; j<n; j++) {
637       sum1 += v[jrow]*x[5*idx[jrow]];
638       sum2 += v[jrow]*x[5*idx[jrow]+1];
639       sum3 += v[jrow]*x[5*idx[jrow]+2];
640       sum4 += v[jrow]*x[5*idx[jrow]+3];
641       sum5 += v[jrow]*x[5*idx[jrow]+4];
642       jrow++;
643      }
644     y[5*i]   = sum1;
645     y[5*i+1] = sum2;
646     y[5*i+2] = sum3;
647     y[5*i+3] = sum4;
648     y[5*i+4] = sum5;
649   }
650 
651   PLogFlops(10*a->nz - 5*m);
652   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
653   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5"
659 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
660 {
661   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
662   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
663   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
664   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
665 
666   PetscFunctionBegin;
667   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
668   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
669   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
670   y = y + shift; /* shift for Fortran start by 1 indexing */
671   for (i=0; i<m; i++) {
672     idx    = a->j + a->i[i] + shift;
673     v      = a->a + a->i[i] + shift;
674     n      = a->i[i+1] - a->i[i];
675     alpha1 = x[5*i];
676     alpha2 = x[5*i+1];
677     alpha3 = x[5*i+2];
678     alpha4 = x[5*i+3];
679     alpha5 = x[5*i+4];
680     while (n-->0) {
681       y[5*(*idx)]   += alpha1*(*v);
682       y[5*(*idx)+1] += alpha2*(*v);
683       y[5*(*idx)+2] += alpha3*(*v);
684       y[5*(*idx)+3] += alpha4*(*v);
685       y[5*(*idx)+4] += alpha5*(*v);
686       idx++; v++;
687     }
688   }
689   PLogFlops(10*a->nz - 5*a->n);
690   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
691   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5"
697 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
698 {
699   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
700   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
701   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
702   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
703   int         n,i,jrow,j;
704 
705   PetscFunctionBegin;
706   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
707   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
708   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
709   x    = x + shift;    /* shift for Fortran start by 1 indexing */
710   idx  = a->j;
711   v    = a->a;
712   ii   = a->i;
713 
714   v    += shift; /* shift for Fortran start by 1 indexing */
715   idx  += shift;
716   for (i=0; i<m; i++) {
717     jrow = ii[i];
718     n    = ii[i+1] - jrow;
719     sum1  = 0.0;
720     sum2  = 0.0;
721     sum3  = 0.0;
722     sum4  = 0.0;
723     sum5  = 0.0;
724     for (j=0; j<n; j++) {
725       sum1 += v[jrow]*x[5*idx[jrow]];
726       sum2 += v[jrow]*x[5*idx[jrow]+1];
727       sum3 += v[jrow]*x[5*idx[jrow]+2];
728       sum4 += v[jrow]*x[5*idx[jrow]+3];
729       sum5 += v[jrow]*x[5*idx[jrow]+4];
730       jrow++;
731      }
732     y[5*i]   += sum1;
733     y[5*i+1] += sum2;
734     y[5*i+2] += sum3;
735     y[5*i+3] += sum4;
736     y[5*i+4] += sum5;
737   }
738 
739   PLogFlops(10*a->nz);
740   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
741   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNC__
746 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5"
747 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
748 {
749   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
750   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
751   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
752   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
753 
754   PetscFunctionBegin;
755   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
756   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
757   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
758   y = y + shift; /* shift for Fortran start by 1 indexing */
759   for (i=0; i<m; i++) {
760     idx    = a->j + a->i[i] + shift;
761     v      = a->a + a->i[i] + shift;
762     n      = a->i[i+1] - a->i[i];
763     alpha1 = x[5*i];
764     alpha2 = x[5*i+1];
765     alpha3 = x[5*i+2];
766     alpha4 = x[5*i+3];
767     alpha5 = x[5*i+4];
768     while (n-->0) {
769       y[5*(*idx)]   += alpha1*(*v);
770       y[5*(*idx)+1] += alpha2*(*v);
771       y[5*(*idx)+2] += alpha3*(*v);
772       y[5*(*idx)+3] += alpha4*(*v);
773       y[5*(*idx)+4] += alpha5*(*v);
774       idx++; v++;
775     }
776   }
777   PLogFlops(10*a->nz);
778   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
779   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782 
783 /*===================================================================================*/
784 #undef __FUNC__
785 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof"
786 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
787 {
788   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
789   int         ierr;
790   PetscFunctionBegin;
791 
792   /* start the scatter */
793   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
794   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
795   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
796   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNC__
801 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_dof"></a>*/"MatMultTranspose_MPIMAIJ_dof"
802 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
803 {
804   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
805   int         ierr;
806   PetscFunctionBegin;
807   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
808   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
809   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
810   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNC__
815 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_dof"></a>*/"MatMultAdd_MPIMAIJ_dof"
816 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
817 {
818   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
819   int         ierr;
820   PetscFunctionBegin;
821 
822   /* start the scatter */
823   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
824   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
825   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
826   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNC__
831 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_dof"></a>*/"MatMultTransposeAdd_MPIMAIJ_dof"
832 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
833 {
834   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
835   int         ierr;
836   PetscFunctionBegin;
837   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
838   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
839   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
840   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 /* ---------------------------------------------------------------------------------- */
845 #undef __FUNC__
846 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ"
847 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
848 {
849   int         ierr,size,n;
850   Mat_MPIMAIJ *b;
851   Mat         B;
852 
853   PetscFunctionBegin;
854   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
855 
856   if (dof == 1) {
857     *maij = A;
858   } else {
859     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
860     B->assembled    = PETSC_TRUE;
861 
862     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
863     if (size == 1) {
864       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
865       B->ops->destroy = MatDestroy_SeqMAIJ;
866       b      = (Mat_MPIMAIJ*)B->data;
867       b->dof = dof;
868       b->AIJ = A;
869       if (dof == 2) {
870         B->ops->mult             = MatMult_SeqMAIJ_2;
871         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
872         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
873         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
874       } else if (dof == 3) {
875         B->ops->mult             = MatMult_SeqMAIJ_3;
876         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
877         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
878         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
879       } else if (dof == 4) {
880         B->ops->mult             = MatMult_SeqMAIJ_4;
881         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
882         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
883         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
884       } else if (dof == 5) {
885         B->ops->mult             = MatMult_SeqMAIJ_5;
886         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
887         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
888         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
889       } else {
890         SETERRQ1(1,"Cannot handle a dof of %d\n",dof);
891       }
892     } else {
893       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
894       IS         from,to;
895       Vec        gvec;
896       int        *garray,i;
897 
898       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
899       B->ops->destroy = MatDestroy_MPIMAIJ;
900       b      = (Mat_MPIMAIJ*)B->data;
901       b->dof = dof;
902       b->A   = A;
903       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
904       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
905 
906       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
907       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
908 
909       /* create two temporary Index sets for build scatter gather */
910       garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray);
911       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
912       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
913       ierr = PetscFree(garray);CHKERRQ(ierr);
914       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
915 
916       /* create temporary global vector to generate scatter context */
917       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
918 
919       /* generate the scatter context */
920       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
921 
922       ierr = ISDestroy(from);CHKERRQ(ierr);
923       ierr = ISDestroy(to);CHKERRQ(ierr);
924       ierr = VecDestroy(gvec);CHKERRQ(ierr);
925 
926       B->ops->mult             = MatMult_MPIMAIJ_dof;
927       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
928       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
929       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
930     }
931     *maij = B;
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 
937 
938 
939 
940 
941 
942 
943 
944 
945 
946 
947