xref: /petsc/src/mat/impls/maij/maij.c (revision f7cf7585ec4ab5bbe01f2b7df7e992ca325859cc)
1 /*$Id: maij.c,v 1.6 2000/06/27 17:46:45 bsmith Exp bsmith $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/aij/mpi/mpiaij.h"
20 
21 typedef struct {
22   int        dof;         /* number of components */
23   Mat        AIJ;        /* representation of interpolation for one component */
24 } Mat_SeqMAIJ;
25 
26 typedef struct {
27   int        dof;         /* number of components */
28   Mat        AIJ,OAIJ;    /* representation of interpolation for one component */
29   Mat        A;
30   VecScatter ctx;         /* update ghost points for parallel case */
31   Vec        w;           /* work space for ghost values for parallel case */
32 } Mat_MPIMAIJ;
33 
34 #undef __FUNC__
35 #define __FUNC__ /*<a name="MatDestroy_SeqMAIJ"></a>*/"MatDestroy_SeqMAIJ"
36 int MatDestroy_SeqMAIJ(Mat A)
37 {
38   int         ierr;
39   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40 
41   PetscFunctionBegin;
42   if (b->AIJ) {
43     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
44   }
45   ierr = PetscFree(b);CHKERRQ(ierr);
46   PetscHeaderDestroy(A);
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ /*<a name="MatDestroy_MPIMAIJ"></a>*/"MatDestroy_MPIMAIJ"
52 int MatDestroy_MPIMAIJ(Mat A)
53 {
54   int         ierr;
55   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
56 
57   PetscFunctionBegin;
58   if (b->A) {
59     ierr = MatDestroy(b->A);CHKERRQ(ierr);
60   }
61   if (b->AIJ) {
62     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
63   }
64   if (b->OAIJ) {
65     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
66   }
67   if (b->ctx) {
68     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
69   }
70   if (b->w) {
71     ierr = VecDestroy(b->w);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(b);CHKERRQ(ierr);
74   PetscHeaderDestroy(A);
75   PetscFunctionReturn(0);
76 }
77 
78 EXTERN_C_BEGIN
79 #undef __FUNC__
80 #define __FUNC__ /*<a name="MatCreate_MAIJ"></a>*/"MatCreate_MAIJ"
81 int MatCreate_MAIJ(Mat A)
82 {
83   int         ierr;
84   Mat_MPIMAIJ *b;
85 
86   PetscFunctionBegin;
87   A->data             = (void*)(b = PetscNew(Mat_MPIMAIJ));CHKPTRQ(b);
88   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
89   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
90   A->factor           = 0;
91   A->mapping          = 0;
92 
93   b->AIJ  = 0;
94   b->dof  = 0;
95   b->OAIJ = 0;
96   b->ctx  = 0;
97   b->w    = 0;
98   PetscFunctionReturn(0);
99 }
100 EXTERN_C_END
101 
102 /* --------------------------------------------------------------------------------------*/
103 #undef __FUNC__
104 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_2"></a>*/"MatMult_SeqMAIJ_2"
105 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
106 {
107   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
108   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
109   Scalar      *x,*y,*v,sum1, sum2;
110   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
111   int         n,i,jrow,j;
112 
113   PetscFunctionBegin;
114   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
115   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
116   x    = x + shift;    /* shift for Fortran start by 1 indexing */
117   idx  = a->j;
118   v    = a->a;
119   ii   = a->i;
120 
121   v    += shift; /* shift for Fortran start by 1 indexing */
122   idx  += shift;
123   for (i=0; i<m; i++) {
124     jrow = ii[i];
125     n    = ii[i+1] - jrow;
126     sum1  = 0.0;
127     sum2  = 0.0;
128     for (j=0; j<n; j++) {
129       sum1 += v[jrow]*x[2*idx[jrow]];
130       sum2 += v[jrow]*x[2*idx[jrow]+1];
131       jrow++;
132      }
133     y[2*i]   = sum1;
134     y[2*i+1] = sum2;
135   }
136 
137   PLogFlops(4*a->nz - 2*m);
138   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
139   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNC__
144 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_2"></a>*/"MatMultTranspose_SeqMAIJ_2"
145 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
146 {
147   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
148   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
149   Scalar      *x,*y,*v,alpha1,alpha2,zero = 0.0;
150   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
151 
152   PetscFunctionBegin;
153   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
154   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
155   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
156   y = y + shift; /* shift for Fortran start by 1 indexing */
157   for (i=0; i<m; i++) {
158     idx    = a->j + a->i[i] + shift;
159     v      = a->a + a->i[i] + shift;
160     n      = a->i[i+1] - a->i[i];
161     alpha1 = x[2*i];
162     alpha2 = x[2*i+1];
163     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
164   }
165   PLogFlops(4*a->nz - 2*a->n);
166   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
167   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNC__
172 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_2"></a>*/"MatMultAdd_SeqMAIJ_2"
173 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
174 {
175   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
176   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
177   Scalar      *x,*y,*v,sum1, sum2;
178   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
179   int         n,i,jrow,j;
180 
181   PetscFunctionBegin;
182   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
183   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
184   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
185   x    = x + shift;    /* shift for Fortran start by 1 indexing */
186   idx  = a->j;
187   v    = a->a;
188   ii   = a->i;
189 
190   v    += shift; /* shift for Fortran start by 1 indexing */
191   idx  += shift;
192   for (i=0; i<m; i++) {
193     jrow = ii[i];
194     n    = ii[i+1] - jrow;
195     sum1  = 0.0;
196     sum2  = 0.0;
197     for (j=0; j<n; j++) {
198       sum1 += v[jrow]*x[2*idx[jrow]];
199       sum2 += v[jrow]*x[2*idx[jrow]+1];
200       jrow++;
201      }
202     y[2*i]   += sum1;
203     y[2*i+1] += sum2;
204   }
205 
206   PLogFlops(4*a->nz - 2*m);
207   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 #undef __FUNC__
212 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_2"></a>*/"MatMultTransposeAdd_SeqMAIJ_2"
213 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
214 {
215   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
216   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
217   Scalar      *x,*y,*v,alpha1,alpha2;
218   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
219 
220   PetscFunctionBegin;
221   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
222   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
223   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
224   y = y + shift; /* shift for Fortran start by 1 indexing */
225   for (i=0; i<m; i++) {
226     idx   = a->j + a->i[i] + shift;
227     v     = a->a + a->i[i] + shift;
228     n     = a->i[i+1] - a->i[i];
229     alpha1 = x[2*i];
230     alpha2 = x[2*i+1];
231     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
232   }
233   PLogFlops(4*a->nz - 2*a->n);
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 /* --------------------------------------------------------------------------------------*/
239 #undef __FUNC__
240 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_3"></a>*/"MatMult_SeqMAIJ_3"
241 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
242 {
243   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
244   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
245   Scalar      *x,*y,*v,sum1, sum2, sum3;
246   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
247   int         n,i,jrow,j;
248 
249   PetscFunctionBegin;
250   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
251   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
252   x    = x + shift;    /* shift for Fortran start by 1 indexing */
253   idx  = a->j;
254   v    = a->a;
255   ii   = a->i;
256 
257   v    += shift; /* shift for Fortran start by 1 indexing */
258   idx  += shift;
259   for (i=0; i<m; i++) {
260     jrow = ii[i];
261     n    = ii[i+1] - jrow;
262     sum1  = 0.0;
263     sum2  = 0.0;
264     sum3  = 0.0;
265     for (j=0; j<n; j++) {
266       sum1 += v[jrow]*x[3*idx[jrow]];
267       sum2 += v[jrow]*x[3*idx[jrow]+1];
268       sum3 += v[jrow]*x[3*idx[jrow]+2];
269       jrow++;
270      }
271     y[3*i]   = sum1;
272     y[3*i+1] = sum2;
273     y[3*i+2] = sum3;
274   }
275 
276   PLogFlops(6*a->nz - 3*m);
277   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
278   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNC__
283 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_3"></a>*/"MatMultTranspose_SeqMAIJ_3"
284 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
285 {
286   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
287   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
288   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
289   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
290 
291   PetscFunctionBegin;
292   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
293   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
294   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
295   y = y + shift; /* shift for Fortran start by 1 indexing */
296   for (i=0; i<m; i++) {
297     idx    = a->j + a->i[i] + shift;
298     v      = a->a + a->i[i] + shift;
299     n      = a->i[i+1] - a->i[i];
300     alpha1 = x[3*i];
301     alpha2 = x[3*i+1];
302     alpha3 = x[3*i+2];
303     while (n-->0) {
304       y[3*(*idx)]   += alpha1*(*v);
305       y[3*(*idx)+1] += alpha2*(*v);
306       y[3*(*idx)+2] += alpha3*(*v);
307       idx++; v++;
308     }
309   }
310   PLogFlops(6*a->nz - 3*a->n);
311   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
312   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNC__
317 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_3"></a>*/"MatMultAdd_SeqMAIJ_3"
318 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
319 {
320   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
321   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
322   Scalar      *x,*y,*v,sum1, sum2, sum3;
323   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
324   int         n,i,jrow,j;
325 
326   PetscFunctionBegin;
327   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
328   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
329   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
330   x    = x + shift;    /* shift for Fortran start by 1 indexing */
331   idx  = a->j;
332   v    = a->a;
333   ii   = a->i;
334 
335   v    += shift; /* shift for Fortran start by 1 indexing */
336   idx  += shift;
337   for (i=0; i<m; i++) {
338     jrow = ii[i];
339     n    = ii[i+1] - jrow;
340     sum1  = 0.0;
341     sum2  = 0.0;
342     sum3  = 0.0;
343     for (j=0; j<n; j++) {
344       sum1 += v[jrow]*x[3*idx[jrow]];
345       sum2 += v[jrow]*x[3*idx[jrow]+1];
346       sum3 += v[jrow]*x[3*idx[jrow]+2];
347       jrow++;
348      }
349     y[3*i]   += sum1;
350     y[3*i+1] += sum2;
351     y[3*i+2] += sum3;
352   }
353 
354   PLogFlops(6*a->nz);
355   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
356   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 #undef __FUNC__
360 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_3"></a>*/"MatMultTransposeAdd_SeqMAIJ_3"
361 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
362 {
363   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
364   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
365   Scalar      *x,*y,*v,alpha1,alpha2,alpha3;
366   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
367 
368   PetscFunctionBegin;
369   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
370   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
371   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
372   y = y + shift; /* shift for Fortran start by 1 indexing */
373   for (i=0; i<m; i++) {
374     idx    = a->j + a->i[i] + shift;
375     v      = a->a + a->i[i] + shift;
376     n      = a->i[i+1] - a->i[i];
377     alpha1 = x[3*i];
378     alpha2 = x[3*i+1];
379     alpha3 = x[3*i+2];
380     while (n-->0) {
381       y[3*(*idx)]   += alpha1*(*v);
382       y[3*(*idx)+1] += alpha2*(*v);
383       y[3*(*idx)+2] += alpha3*(*v);
384       idx++; v++;
385     }
386   }
387   PLogFlops(6*a->nz);
388   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
389   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 /* ------------------------------------------------------------------------------*/
394 #undef __FUNC__
395 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_4"></a>*/"MatMult_SeqMAIJ_4"
396 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
397 {
398   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
399   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
400   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
401   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
402   int         n,i,jrow,j;
403 
404   PetscFunctionBegin;
405   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
406   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
407   x    = x + shift;    /* shift for Fortran start by 1 indexing */
408   idx  = a->j;
409   v    = a->a;
410   ii   = a->i;
411 
412   v    += shift; /* shift for Fortran start by 1 indexing */
413   idx  += shift;
414   for (i=0; i<m; i++) {
415     jrow = ii[i];
416     n    = ii[i+1] - jrow;
417     sum1  = 0.0;
418     sum2  = 0.0;
419     sum3  = 0.0;
420     sum4  = 0.0;
421     for (j=0; j<n; j++) {
422       sum1 += v[jrow]*x[4*idx[jrow]];
423       sum2 += v[jrow]*x[4*idx[jrow]+1];
424       sum3 += v[jrow]*x[4*idx[jrow]+2];
425       sum4 += v[jrow]*x[4*idx[jrow]+3];
426       jrow++;
427      }
428     y[4*i]   = sum1;
429     y[4*i+1] = sum2;
430     y[4*i+2] = sum3;
431     y[4*i+3] = sum4;
432   }
433 
434   PLogFlops(8*a->nz - 4*m);
435   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
436   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNC__
441 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_4"></a>*/"MatMultTranspose_SeqMAIJ_4"
442 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
443 {
444   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
445   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
446   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
447   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
448 
449   PetscFunctionBegin;
450   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
451   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
452   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
453   y = y + shift; /* shift for Fortran start by 1 indexing */
454   for (i=0; i<m; i++) {
455     idx    = a->j + a->i[i] + shift;
456     v      = a->a + a->i[i] + shift;
457     n      = a->i[i+1] - a->i[i];
458     alpha1 = x[4*i];
459     alpha2 = x[4*i+1];
460     alpha3 = x[4*i+2];
461     alpha4 = x[4*i+3];
462     while (n-->0) {
463       y[4*(*idx)]   += alpha1*(*v);
464       y[4*(*idx)+1] += alpha2*(*v);
465       y[4*(*idx)+2] += alpha3*(*v);
466       y[4*(*idx)+3] += alpha4*(*v);
467       idx++; v++;
468     }
469   }
470   PLogFlops(8*a->nz - 4*a->n);
471   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
472   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_4"></a>*/"MatMultAdd_SeqMAIJ_4"
478 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
479 {
480   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
481   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
482   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
483   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
484   int         n,i,jrow,j;
485 
486   PetscFunctionBegin;
487   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
488   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
489   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
490   x    = x + shift;    /* shift for Fortran start by 1 indexing */
491   idx  = a->j;
492   v    = a->a;
493   ii   = a->i;
494 
495   v    += shift; /* shift for Fortran start by 1 indexing */
496   idx  += shift;
497   for (i=0; i<m; i++) {
498     jrow = ii[i];
499     n    = ii[i+1] - jrow;
500     sum1  = 0.0;
501     sum2  = 0.0;
502     sum3  = 0.0;
503     sum4  = 0.0;
504     for (j=0; j<n; j++) {
505       sum1 += v[jrow]*x[4*idx[jrow]];
506       sum2 += v[jrow]*x[4*idx[jrow]+1];
507       sum3 += v[jrow]*x[4*idx[jrow]+2];
508       sum4 += v[jrow]*x[4*idx[jrow]+3];
509       jrow++;
510      }
511     y[4*i]   += sum1;
512     y[4*i+1] += sum2;
513     y[4*i+2] += sum3;
514     y[4*i+3] += sum4;
515   }
516 
517   PLogFlops(8*a->nz - 4*m);
518   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
519   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 #undef __FUNC__
523 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_4"></a>*/"MatMultTransposeAdd_SeqMAIJ_4"
524 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
525 {
526   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
527   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
528   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
529   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
530 
531   PetscFunctionBegin;
532   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
533   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
534   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
535   y = y + shift; /* shift for Fortran start by 1 indexing */
536   for (i=0; i<m; i++) {
537     idx    = a->j + a->i[i] + shift;
538     v      = a->a + a->i[i] + shift;
539     n      = a->i[i+1] - a->i[i];
540     alpha1 = x[4*i];
541     alpha2 = x[4*i+1];
542     alpha3 = x[4*i+2];
543     alpha4 = x[4*i+3];
544     while (n-->0) {
545       y[4*(*idx)]   += alpha1*(*v);
546       y[4*(*idx)+1] += alpha2*(*v);
547       y[4*(*idx)+2] += alpha3*(*v);
548       y[4*(*idx)+3] += alpha4*(*v);
549       idx++; v++;
550     }
551   }
552   PLogFlops(8*a->nz - 4*a->n);
553   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
554   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 
557 }
558 
559 /* ------------------------------------------------------------------------------*/
560 #undef __FUNC__
561 #define __FUNC__ /*<a name="MatMult_SeqMAIJ_5"></a>*/"MatMult_SeqMAIJ_5"
562 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
563 {
564   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
565   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
566   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
567   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
568   int         n,i,jrow,j;
569 
570   PetscFunctionBegin;
571   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
572   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
573   x    = x + shift;    /* shift for Fortran start by 1 indexing */
574   idx  = a->j;
575   v    = a->a;
576   ii   = a->i;
577 
578   v    += shift; /* shift for Fortran start by 1 indexing */
579   idx  += shift;
580   for (i=0; i<m; i++) {
581     jrow = ii[i];
582     n    = ii[i+1] - jrow;
583     sum1  = 0.0;
584     sum2  = 0.0;
585     sum3  = 0.0;
586     sum4  = 0.0;
587     sum5  = 0.0;
588     for (j=0; j<n; j++) {
589       sum1 += v[jrow]*x[5*idx[jrow]];
590       sum2 += v[jrow]*x[5*idx[jrow]+1];
591       sum3 += v[jrow]*x[5*idx[jrow]+2];
592       sum4 += v[jrow]*x[5*idx[jrow]+3];
593       sum5 += v[jrow]*x[5*idx[jrow]+4];
594       jrow++;
595      }
596     y[5*i]   = sum1;
597     y[5*i+1] = sum2;
598     y[5*i+2] = sum3;
599     y[5*i+3] = sum4;
600     y[5*i+4] = sum5;
601   }
602 
603   PLogFlops(10*a->nz - 5*m);
604   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
605   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNC__
610 #define __FUNC__ /*<a name="MatMultTranspose_SeqMAIJ_5"></a>*/"MatMultTranspose_SeqMAIJ_5"
611 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
612 {
613   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
614   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
615   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
616   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
617 
618   PetscFunctionBegin;
619   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
620   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
621   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
622   y = y + shift; /* shift for Fortran start by 1 indexing */
623   for (i=0; i<m; i++) {
624     idx    = a->j + a->i[i] + shift;
625     v      = a->a + a->i[i] + shift;
626     n      = a->i[i+1] - a->i[i];
627     alpha1 = x[5*i];
628     alpha2 = x[5*i+1];
629     alpha3 = x[5*i+2];
630     alpha4 = x[5*i+3];
631     alpha5 = x[5*i+4];
632     while (n-->0) {
633       y[5*(*idx)]   += alpha1*(*v);
634       y[5*(*idx)+1] += alpha2*(*v);
635       y[5*(*idx)+2] += alpha3*(*v);
636       y[5*(*idx)+3] += alpha4*(*v);
637       y[5*(*idx)+4] += alpha5*(*v);
638       idx++; v++;
639     }
640   }
641   PLogFlops(10*a->nz - 5*a->n);
642   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
643   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNC__
648 #define __FUNC__ /*<a name="MatMultAdd_SeqMAIJ_5"></a>*/"MatMultAdd_SeqMAIJ_5"
649 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
650 {
651   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
652   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
653   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
654   int         ierr,m = a->m,*idx,shift = a->indexshift,*ii;
655   int         n,i,jrow,j;
656 
657   PetscFunctionBegin;
658   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
659   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
660   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
661   x    = x + shift;    /* shift for Fortran start by 1 indexing */
662   idx  = a->j;
663   v    = a->a;
664   ii   = a->i;
665 
666   v    += shift; /* shift for Fortran start by 1 indexing */
667   idx  += shift;
668   for (i=0; i<m; i++) {
669     jrow = ii[i];
670     n    = ii[i+1] - jrow;
671     sum1  = 0.0;
672     sum2  = 0.0;
673     sum3  = 0.0;
674     sum4  = 0.0;
675     sum5  = 0.0;
676     for (j=0; j<n; j++) {
677       sum1 += v[jrow]*x[5*idx[jrow]];
678       sum2 += v[jrow]*x[5*idx[jrow]+1];
679       sum3 += v[jrow]*x[5*idx[jrow]+2];
680       sum4 += v[jrow]*x[5*idx[jrow]+3];
681       sum5 += v[jrow]*x[5*idx[jrow]+4];
682       jrow++;
683      }
684     y[5*i]   += sum1;
685     y[5*i+1] += sum2;
686     y[5*i+2] += sum3;
687     y[5*i+3] += sum4;
688     y[5*i+4] += sum5;
689   }
690 
691   PLogFlops(10*a->nz);
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNC__
698 #define __FUNC__ /*<a name="MatMultTransposeAdd_SeqMAIJ_5"></a>*/"MatMultTransposeAdd_SeqMAIJ_5"
699 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
700 {
701   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
702   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
703   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
704   int         ierr,m = a->m,n,i,*idx,shift = a->indexshift;
705 
706   PetscFunctionBegin;
707   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
708   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
709   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
710   y = y + shift; /* shift for Fortran start by 1 indexing */
711   for (i=0; i<m; i++) {
712     idx    = a->j + a->i[i] + shift;
713     v      = a->a + a->i[i] + shift;
714     n      = a->i[i+1] - a->i[i];
715     alpha1 = x[5*i];
716     alpha2 = x[5*i+1];
717     alpha3 = x[5*i+2];
718     alpha4 = x[5*i+3];
719     alpha5 = x[5*i+4];
720     while (n-->0) {
721       y[5*(*idx)]   += alpha1*(*v);
722       y[5*(*idx)+1] += alpha2*(*v);
723       y[5*(*idx)+2] += alpha3*(*v);
724       y[5*(*idx)+3] += alpha4*(*v);
725       y[5*(*idx)+4] += alpha5*(*v);
726       idx++; v++;
727     }
728   }
729   PLogFlops(10*a->nz);
730   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
731   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
732   PetscFunctionReturn(0);
733 }
734 
735 /*===================================================================================*/
736 #undef __FUNC__
737 #define __FUNC__ /*<a name="MatMult_MPIMAIJ_dof"></a>*/"MatMult_MPIMAIJ_dof"
738 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
739 {
740   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
741   int         ierr;
742   PetscFunctionBegin;
743 
744   /* start the scatter */
745   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
746   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
747   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
748   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNC__
753 #define __FUNC__ /*<a name="MatMultTranspose_MPIMAIJ_dof"></a>*/"MatMultTranspose_MPIMAIJ_dof"
754 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
755 {
756   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
757   int         ierr;
758   PetscFunctionBegin;
759   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
760   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
761   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
762   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNC__
767 #define __FUNC__ /*<a name="MatMultAdd_MPIMAIJ_dof"></a>*/"MatMultAdd_MPIMAIJ_dof"
768 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
769 {
770   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
771   int         ierr;
772   PetscFunctionBegin;
773 
774   /* start the scatter */
775   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
776   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
777   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
778   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNC__
783 #define __FUNC__ /*<a name="MatMultTransposeAdd_MPIMAIJ_dof"></a>*/"MatMultTransposeAdd_MPIMAIJ_dof"
784 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
785 {
786   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
787   int         ierr;
788   PetscFunctionBegin;
789   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
790   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
791   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
792   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 /* ---------------------------------------------------------------------------------- */
797 #undef __FUNC__
798 #define __FUNC__ /*<a name="MatCreateMAIJ"></a>*/"MatCreateMAIJ"
799 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
800 {
801   int         ierr,size,n;
802   Mat_MPIMAIJ *b;
803   Mat         B;
804 
805   PetscFunctionBegin;
806   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
807 
808   if (dof == 1) {
809     *maij = A;
810   } else {
811     ierr = MATCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
812     ierr = MatSetType(B,MATMAIJ);CHKERRQ(ierr);
813     B->assembled    = PETSC_TRUE;
814     b = (Mat_MPIMAIJ*)B->data;
815     b->dof = dof;
816 
817     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
818     if (size == 1) {
819       B->ops->destroy = MatDestroy_SeqMAIJ;
820       b->AIJ = A;
821       if (dof == 2) {
822         B->ops->mult             = MatMult_SeqMAIJ_2;
823         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
824         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
825         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
826       } else if (dof == 3) {
827         B->ops->mult             = MatMult_SeqMAIJ_3;
828         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
829         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
830         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
831       } else if (dof == 4) {
832         B->ops->mult             = MatMult_SeqMAIJ_4;
833         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
834         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
835         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
836       } else if (dof == 5) {
837         B->ops->mult             = MatMult_SeqMAIJ_5;
838         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
839         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
840         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
841       } else {
842         SETERRQ1(1,1,"Cannot handle a dof of %d\n",dof);
843       }
844     } else {
845       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
846       IS         from,to;
847       Vec        gvec;
848       int        *garray,i;
849 
850       b->A = A;
851       B->ops->destroy = MatDestroy_MPIMAIJ;
852       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
853       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
854 
855       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
856       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
857 
858       /* create two temporary Index sets for build scatter gather */
859       garray = (int*)PetscMalloc((n+1)*sizeof(int));CHKPTRQ(garray);
860       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
861       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
862       ierr = PetscFree(garray);CHKERRQ(ierr);
863       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
864 
865       /* create temporary global vector to generate scatter context */
866       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
867 
868       /* generate the scatter context */
869       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
870 
871       ierr = ISDestroy(from);CHKERRQ(ierr);
872       ierr = ISDestroy(to);CHKERRQ(ierr);
873       ierr = VecDestroy(gvec);CHKERRQ(ierr);
874 
875       B->ops->mult             = MatMult_MPIMAIJ_dof;
876       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
877       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
878       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
879     }
880     *maij = B;
881   }
882   PetscFunctionReturn(0);
883 }
884 
885 
886 
887 
888 
889 
890 
891 
892 
893 
894 
895 
896