xref: /petsc/src/mat/impls/maij/maij.c (revision 15e8a5b3e3ad67526f50ce4d2e8f9da59aa8d948)
1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/maij/maij.h"
20 #include "src/vec/vecimpl.h"
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatMAIJGetAIJ"
24 int MatMAIJGetAIJ(Mat A,Mat *B)
25 {
26   int         ierr;
27   PetscTruth  ismpimaij,isseqmaij;
28 
29   PetscFunctionBegin;
30   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32   if (ismpimaij) {
33     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34 
35     *B = b->A;
36   } else if (isseqmaij) {
37     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38 
39     *B = b->AIJ;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatMAIJRedimension"
48 int MatMAIJRedimension(Mat A,int dof,Mat *B)
49 {
50   int ierr;
51   Mat Aij;
52 
53   PetscFunctionBegin;
54   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SeqMAIJ"
61 int MatDestroy_SeqMAIJ(Mat A)
62 {
63   int         ierr;
64   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
65 
66   PetscFunctionBegin;
67   if (b->AIJ) {
68     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
69   }
70   ierr = PetscFree(b);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatDestroy_MPIMAIJ"
76 int MatDestroy_MPIMAIJ(Mat A)
77 {
78   int         ierr;
79   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
80 
81   PetscFunctionBegin;
82   if (b->AIJ) {
83     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
84   }
85   if (b->OAIJ) {
86     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
87   }
88   if (b->A) {
89     ierr = MatDestroy(b->A);CHKERRQ(ierr);
90   }
91   if (b->ctx) {
92     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
93   }
94   if (b->w) {
95     ierr = VecDestroy(b->w);CHKERRQ(ierr);
96   }
97   ierr = PetscFree(b);CHKERRQ(ierr);
98   PetscFunctionReturn(0);
99 }
100 
101 EXTERN_C_BEGIN
102 #undef __FUNCT__
103 #define __FUNCT__ "MatCreate_MAIJ"
104 int MatCreate_MAIJ(Mat A)
105 {
106   int         ierr;
107   Mat_MPIMAIJ *b;
108 
109   PetscFunctionBegin;
110   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
111   A->data  = (void*)b;
112   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
113   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
114   A->factor           = 0;
115   A->mapping          = 0;
116 
117   b->AIJ  = 0;
118   b->dof  = 0;
119   b->OAIJ = 0;
120   b->ctx  = 0;
121   b->w    = 0;
122   PetscFunctionReturn(0);
123 }
124 EXTERN_C_END
125 
126 /* --------------------------------------------------------------------------------------*/
127 #undef __FUNCT__
128 #define __FUNCT__ "MatMult_SeqMAIJ_2"
129 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
130 {
131   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
132   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
133   PetscScalar   *x,*y,*v,sum1, sum2;
134   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
135   int           n,i,jrow,j;
136 
137   PetscFunctionBegin;
138   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
139   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
140   x    = x + shift;    /* shift for Fortran start by 1 indexing */
141   idx  = a->j;
142   v    = a->a;
143   ii   = a->i;
144 
145   v    += shift; /* shift for Fortran start by 1 indexing */
146   idx  += shift;
147   for (i=0; i<m; i++) {
148     jrow = ii[i];
149     n    = ii[i+1] - jrow;
150     sum1  = 0.0;
151     sum2  = 0.0;
152     for (j=0; j<n; j++) {
153       sum1 += v[jrow]*x[2*idx[jrow]];
154       sum2 += v[jrow]*x[2*idx[jrow]+1];
155       jrow++;
156      }
157     y[2*i]   = sum1;
158     y[2*i+1] = sum2;
159   }
160 
161   PetscLogFlops(4*a->nz - 2*m);
162   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
163   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
169 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
170 {
171   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
172   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
173   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
174   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
175 
176   PetscFunctionBegin;
177   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
178   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
179   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
180   y = y + shift; /* shift for Fortran start by 1 indexing */
181   for (i=0; i<m; i++) {
182     idx    = a->j + a->i[i] + shift;
183     v      = a->a + a->i[i] + shift;
184     n      = a->i[i+1] - a->i[i];
185     alpha1 = x[2*i];
186     alpha2 = x[2*i+1];
187     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
188   }
189   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
190   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
191   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
197 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
198 {
199   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
200   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
201   PetscScalar   *x,*y,*v,sum1, sum2;
202   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
203   int           n,i,jrow,j;
204 
205   PetscFunctionBegin;
206   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
207   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
208   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
209   x    = x + shift;    /* shift for Fortran start by 1 indexing */
210   idx  = a->j;
211   v    = a->a;
212   ii   = a->i;
213 
214   v    += shift; /* shift for Fortran start by 1 indexing */
215   idx  += shift;
216   for (i=0; i<m; i++) {
217     jrow = ii[i];
218     n    = ii[i+1] - jrow;
219     sum1  = 0.0;
220     sum2  = 0.0;
221     for (j=0; j<n; j++) {
222       sum1 += v[jrow]*x[2*idx[jrow]];
223       sum2 += v[jrow]*x[2*idx[jrow]+1];
224       jrow++;
225      }
226     y[2*i]   += sum1;
227     y[2*i+1] += sum2;
228   }
229 
230   PetscLogFlops(4*a->nz - 2*m);
231   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
232   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 #undef __FUNCT__
236 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
237 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
238 {
239   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
240   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
241   PetscScalar   *x,*y,*v,alpha1,alpha2;
242   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
243 
244   PetscFunctionBegin;
245   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
246   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
247   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
248   y = y + shift; /* shift for Fortran start by 1 indexing */
249   for (i=0; i<m; i++) {
250     idx   = a->j + a->i[i] + shift;
251     v     = a->a + a->i[i] + shift;
252     n     = a->i[i+1] - a->i[i];
253     alpha1 = x[2*i];
254     alpha2 = x[2*i+1];
255     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
256   }
257   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
258   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
259   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 /* --------------------------------------------------------------------------------------*/
263 #undef __FUNCT__
264 #define __FUNCT__ "MatMult_SeqMAIJ_3"
265 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
266 {
267   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
268   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
269   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
270   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
271   int           n,i,jrow,j;
272 
273   PetscFunctionBegin;
274   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
275   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
276   x    = x + shift;    /* shift for Fortran start by 1 indexing */
277   idx  = a->j;
278   v    = a->a;
279   ii   = a->i;
280 
281   v    += shift; /* shift for Fortran start by 1 indexing */
282   idx  += shift;
283   for (i=0; i<m; i++) {
284     jrow = ii[i];
285     n    = ii[i+1] - jrow;
286     sum1  = 0.0;
287     sum2  = 0.0;
288     sum3  = 0.0;
289     for (j=0; j<n; j++) {
290       sum1 += v[jrow]*x[3*idx[jrow]];
291       sum2 += v[jrow]*x[3*idx[jrow]+1];
292       sum3 += v[jrow]*x[3*idx[jrow]+2];
293       jrow++;
294      }
295     y[3*i]   = sum1;
296     y[3*i+1] = sum2;
297     y[3*i+2] = sum3;
298   }
299 
300   PetscLogFlops(6*a->nz - 3*m);
301   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
302   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
308 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309 {
310   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
311   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
312   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
313   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
314 
315   PetscFunctionBegin;
316   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
317   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
318   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
319   y = y + shift; /* shift for Fortran start by 1 indexing */
320   for (i=0; i<m; i++) {
321     idx    = a->j + a->i[i] + shift;
322     v      = a->a + a->i[i] + shift;
323     n      = a->i[i+1] - a->i[i];
324     alpha1 = x[3*i];
325     alpha2 = x[3*i+1];
326     alpha3 = x[3*i+2];
327     while (n-->0) {
328       y[3*(*idx)]   += alpha1*(*v);
329       y[3*(*idx)+1] += alpha2*(*v);
330       y[3*(*idx)+2] += alpha3*(*v);
331       idx++; v++;
332     }
333   }
334   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
335   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
336   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 #undef __FUNCT__
341 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
342 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
343 {
344   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
345   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
346   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
347   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
348   int           n,i,jrow,j;
349 
350   PetscFunctionBegin;
351   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
352   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
353   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
354   x    = x + shift;    /* shift for Fortran start by 1 indexing */
355   idx  = a->j;
356   v    = a->a;
357   ii   = a->i;
358 
359   v    += shift; /* shift for Fortran start by 1 indexing */
360   idx  += shift;
361   for (i=0; i<m; i++) {
362     jrow = ii[i];
363     n    = ii[i+1] - jrow;
364     sum1  = 0.0;
365     sum2  = 0.0;
366     sum3  = 0.0;
367     for (j=0; j<n; j++) {
368       sum1 += v[jrow]*x[3*idx[jrow]];
369       sum2 += v[jrow]*x[3*idx[jrow]+1];
370       sum3 += v[jrow]*x[3*idx[jrow]+2];
371       jrow++;
372      }
373     y[3*i]   += sum1;
374     y[3*i+1] += sum2;
375     y[3*i+2] += sum3;
376   }
377 
378   PetscLogFlops(6*a->nz);
379   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
380   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 #undef __FUNCT__
384 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
385 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
386 {
387   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
388   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
389   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
390   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
391 
392   PetscFunctionBegin;
393   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
394   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
395   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
396   y = y + shift; /* shift for Fortran start by 1 indexing */
397   for (i=0; i<m; i++) {
398     idx    = a->j + a->i[i] + shift;
399     v      = a->a + a->i[i] + shift;
400     n      = a->i[i+1] - a->i[i];
401     alpha1 = x[3*i];
402     alpha2 = x[3*i+1];
403     alpha3 = x[3*i+2];
404     while (n-->0) {
405       y[3*(*idx)]   += alpha1*(*v);
406       y[3*(*idx)+1] += alpha2*(*v);
407       y[3*(*idx)+2] += alpha3*(*v);
408       idx++; v++;
409     }
410   }
411   PetscLogFlops(6*a->nz);
412   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
413   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 /* ------------------------------------------------------------------------------*/
418 #undef __FUNCT__
419 #define __FUNCT__ "MatMult_SeqMAIJ_4"
420 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
421 {
422   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
423   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
424   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
425   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
426   int           n,i,jrow,j;
427 
428   PetscFunctionBegin;
429   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
430   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
431   x    = x + shift;    /* shift for Fortran start by 1 indexing */
432   idx  = a->j;
433   v    = a->a;
434   ii   = a->i;
435 
436   v    += shift; /* shift for Fortran start by 1 indexing */
437   idx  += shift;
438   for (i=0; i<m; i++) {
439     jrow = ii[i];
440     n    = ii[i+1] - jrow;
441     sum1  = 0.0;
442     sum2  = 0.0;
443     sum3  = 0.0;
444     sum4  = 0.0;
445     for (j=0; j<n; j++) {
446       sum1 += v[jrow]*x[4*idx[jrow]];
447       sum2 += v[jrow]*x[4*idx[jrow]+1];
448       sum3 += v[jrow]*x[4*idx[jrow]+2];
449       sum4 += v[jrow]*x[4*idx[jrow]+3];
450       jrow++;
451      }
452     y[4*i]   = sum1;
453     y[4*i+1] = sum2;
454     y[4*i+2] = sum3;
455     y[4*i+3] = sum4;
456   }
457 
458   PetscLogFlops(8*a->nz - 4*m);
459   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
460   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
466 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467 {
468   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
469   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
470   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
472 
473   PetscFunctionBegin;
474   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
475   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
476   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
477   y = y + shift; /* shift for Fortran start by 1 indexing */
478   for (i=0; i<m; i++) {
479     idx    = a->j + a->i[i] + shift;
480     v      = a->a + a->i[i] + shift;
481     n      = a->i[i+1] - a->i[i];
482     alpha1 = x[4*i];
483     alpha2 = x[4*i+1];
484     alpha3 = x[4*i+2];
485     alpha4 = x[4*i+3];
486     while (n-->0) {
487       y[4*(*idx)]   += alpha1*(*v);
488       y[4*(*idx)+1] += alpha2*(*v);
489       y[4*(*idx)+2] += alpha3*(*v);
490       y[4*(*idx)+3] += alpha4*(*v);
491       idx++; v++;
492     }
493   }
494   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
495   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
496   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
502 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
503 {
504   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
505   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
506   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
507   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
508   int           n,i,jrow,j;
509 
510   PetscFunctionBegin;
511   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
512   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
513   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
514   x    = x + shift;    /* shift for Fortran start by 1 indexing */
515   idx  = a->j;
516   v    = a->a;
517   ii   = a->i;
518 
519   v    += shift; /* shift for Fortran start by 1 indexing */
520   idx  += shift;
521   for (i=0; i<m; i++) {
522     jrow = ii[i];
523     n    = ii[i+1] - jrow;
524     sum1  = 0.0;
525     sum2  = 0.0;
526     sum3  = 0.0;
527     sum4  = 0.0;
528     for (j=0; j<n; j++) {
529       sum1 += v[jrow]*x[4*idx[jrow]];
530       sum2 += v[jrow]*x[4*idx[jrow]+1];
531       sum3 += v[jrow]*x[4*idx[jrow]+2];
532       sum4 += v[jrow]*x[4*idx[jrow]+3];
533       jrow++;
534      }
535     y[4*i]   += sum1;
536     y[4*i+1] += sum2;
537     y[4*i+2] += sum3;
538     y[4*i+3] += sum4;
539   }
540 
541   PetscLogFlops(8*a->nz - 4*m);
542   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
548 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
551   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
552   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
553   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
554 
555   PetscFunctionBegin;
556   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
557   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
558   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
559   y = y + shift; /* shift for Fortran start by 1 indexing */
560   for (i=0; i<m; i++) {
561     idx    = a->j + a->i[i] + shift;
562     v      = a->a + a->i[i] + shift;
563     n      = a->i[i+1] - a->i[i];
564     alpha1 = x[4*i];
565     alpha2 = x[4*i+1];
566     alpha3 = x[4*i+2];
567     alpha4 = x[4*i+3];
568     while (n-->0) {
569       y[4*(*idx)]   += alpha1*(*v);
570       y[4*(*idx)+1] += alpha2*(*v);
571       y[4*(*idx)+2] += alpha3*(*v);
572       y[4*(*idx)+3] += alpha4*(*v);
573       idx++; v++;
574     }
575   }
576   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
577   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
578   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 /* ------------------------------------------------------------------------------*/
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatMult_SeqMAIJ_5"
585 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
586 {
587   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
588   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
589   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
590   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
591   int           n,i,jrow,j;
592 
593   PetscFunctionBegin;
594   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
595   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
596   x    = x + shift;    /* shift for Fortran start by 1 indexing */
597   idx  = a->j;
598   v    = a->a;
599   ii   = a->i;
600 
601   v    += shift; /* shift for Fortran start by 1 indexing */
602   idx  += shift;
603   for (i=0; i<m; i++) {
604     jrow = ii[i];
605     n    = ii[i+1] - jrow;
606     sum1  = 0.0;
607     sum2  = 0.0;
608     sum3  = 0.0;
609     sum4  = 0.0;
610     sum5  = 0.0;
611     for (j=0; j<n; j++) {
612       sum1 += v[jrow]*x[5*idx[jrow]];
613       sum2 += v[jrow]*x[5*idx[jrow]+1];
614       sum3 += v[jrow]*x[5*idx[jrow]+2];
615       sum4 += v[jrow]*x[5*idx[jrow]+3];
616       sum5 += v[jrow]*x[5*idx[jrow]+4];
617       jrow++;
618      }
619     y[5*i]   = sum1;
620     y[5*i+1] = sum2;
621     y[5*i+2] = sum3;
622     y[5*i+3] = sum4;
623     y[5*i+4] = sum5;
624   }
625 
626   PetscLogFlops(10*a->nz - 5*m);
627   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
628   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
634 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
635 {
636   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
637   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
638   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
639   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
640 
641   PetscFunctionBegin;
642   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
643   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
644   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
645   y = y + shift; /* shift for Fortran start by 1 indexing */
646   for (i=0; i<m; i++) {
647     idx    = a->j + a->i[i] + shift;
648     v      = a->a + a->i[i] + shift;
649     n      = a->i[i+1] - a->i[i];
650     alpha1 = x[5*i];
651     alpha2 = x[5*i+1];
652     alpha3 = x[5*i+2];
653     alpha4 = x[5*i+3];
654     alpha5 = x[5*i+4];
655     while (n-->0) {
656       y[5*(*idx)]   += alpha1*(*v);
657       y[5*(*idx)+1] += alpha2*(*v);
658       y[5*(*idx)+2] += alpha3*(*v);
659       y[5*(*idx)+3] += alpha4*(*v);
660       y[5*(*idx)+4] += alpha5*(*v);
661       idx++; v++;
662     }
663   }
664   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
665   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
666   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
672 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
673 {
674   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
675   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
676   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
677   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
678   int           n,i,jrow,j;
679 
680   PetscFunctionBegin;
681   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
682   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
683   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
684   x    = x + shift;    /* shift for Fortran start by 1 indexing */
685   idx  = a->j;
686   v    = a->a;
687   ii   = a->i;
688 
689   v    += shift; /* shift for Fortran start by 1 indexing */
690   idx  += shift;
691   for (i=0; i<m; i++) {
692     jrow = ii[i];
693     n    = ii[i+1] - jrow;
694     sum1  = 0.0;
695     sum2  = 0.0;
696     sum3  = 0.0;
697     sum4  = 0.0;
698     sum5  = 0.0;
699     for (j=0; j<n; j++) {
700       sum1 += v[jrow]*x[5*idx[jrow]];
701       sum2 += v[jrow]*x[5*idx[jrow]+1];
702       sum3 += v[jrow]*x[5*idx[jrow]+2];
703       sum4 += v[jrow]*x[5*idx[jrow]+3];
704       sum5 += v[jrow]*x[5*idx[jrow]+4];
705       jrow++;
706      }
707     y[5*i]   += sum1;
708     y[5*i+1] += sum2;
709     y[5*i+2] += sum3;
710     y[5*i+3] += sum4;
711     y[5*i+4] += sum5;
712   }
713 
714   PetscLogFlops(10*a->nz);
715   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
716   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
722 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
723 {
724   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
725   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
726   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
727   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
728 
729   PetscFunctionBegin;
730   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
731   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
732   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
733   y = y + shift; /* shift for Fortran start by 1 indexing */
734   for (i=0; i<m; i++) {
735     idx    = a->j + a->i[i] + shift;
736     v      = a->a + a->i[i] + shift;
737     n      = a->i[i+1] - a->i[i];
738     alpha1 = x[5*i];
739     alpha2 = x[5*i+1];
740     alpha3 = x[5*i+2];
741     alpha4 = x[5*i+3];
742     alpha5 = x[5*i+4];
743     while (n-->0) {
744       y[5*(*idx)]   += alpha1*(*v);
745       y[5*(*idx)+1] += alpha2*(*v);
746       y[5*(*idx)+2] += alpha3*(*v);
747       y[5*(*idx)+3] += alpha4*(*v);
748       y[5*(*idx)+4] += alpha5*(*v);
749       idx++; v++;
750     }
751   }
752   PetscLogFlops(10*a->nz);
753   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
754   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 /* ------------------------------------------------------------------------------*/
759 #undef __FUNCT__
760 #define __FUNCT__ "MatMult_SeqMAIJ_6"
761 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
762 {
763   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
764   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
765   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
766   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
767   int           n,i,jrow,j;
768 
769   PetscFunctionBegin;
770   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
771   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
772   x    = x + shift;    /* shift for Fortran start by 1 indexing */
773   idx  = a->j;
774   v    = a->a;
775   ii   = a->i;
776 
777   v    += shift; /* shift for Fortran start by 1 indexing */
778   idx  += shift;
779   for (i=0; i<m; i++) {
780     jrow = ii[i];
781     n    = ii[i+1] - jrow;
782     sum1  = 0.0;
783     sum2  = 0.0;
784     sum3  = 0.0;
785     sum4  = 0.0;
786     sum5  = 0.0;
787     sum6  = 0.0;
788     for (j=0; j<n; j++) {
789       sum1 += v[jrow]*x[6*idx[jrow]];
790       sum2 += v[jrow]*x[6*idx[jrow]+1];
791       sum3 += v[jrow]*x[6*idx[jrow]+2];
792       sum4 += v[jrow]*x[6*idx[jrow]+3];
793       sum5 += v[jrow]*x[6*idx[jrow]+4];
794       sum6 += v[jrow]*x[6*idx[jrow]+5];
795       jrow++;
796      }
797     y[6*i]   = sum1;
798     y[6*i+1] = sum2;
799     y[6*i+2] = sum3;
800     y[6*i+3] = sum4;
801     y[6*i+4] = sum5;
802     y[6*i+5] = sum6;
803   }
804 
805   PetscLogFlops(12*a->nz - 6*m);
806   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
807   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
813 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
814 {
815   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
816   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
817   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
818   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
819 
820   PetscFunctionBegin;
821   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
822   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
823   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
824   y = y + shift; /* shift for Fortran start by 1 indexing */
825   for (i=0; i<m; i++) {
826     idx    = a->j + a->i[i] + shift;
827     v      = a->a + a->i[i] + shift;
828     n      = a->i[i+1] - a->i[i];
829     alpha1 = x[6*i];
830     alpha2 = x[6*i+1];
831     alpha3 = x[6*i+2];
832     alpha4 = x[6*i+3];
833     alpha5 = x[6*i+4];
834     alpha6 = x[6*i+5];
835     while (n-->0) {
836       y[6*(*idx)]   += alpha1*(*v);
837       y[6*(*idx)+1] += alpha2*(*v);
838       y[6*(*idx)+2] += alpha3*(*v);
839       y[6*(*idx)+3] += alpha4*(*v);
840       y[6*(*idx)+4] += alpha5*(*v);
841       y[6*(*idx)+5] += alpha6*(*v);
842       idx++; v++;
843     }
844   }
845   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
846   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
847   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
853 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
854 {
855   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
856   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
857   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
858   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
859   int           n,i,jrow,j;
860 
861   PetscFunctionBegin;
862   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
863   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
864   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
865   x    = x + shift;    /* shift for Fortran start by 1 indexing */
866   idx  = a->j;
867   v    = a->a;
868   ii   = a->i;
869 
870   v    += shift; /* shift for Fortran start by 1 indexing */
871   idx  += shift;
872   for (i=0; i<m; i++) {
873     jrow = ii[i];
874     n    = ii[i+1] - jrow;
875     sum1  = 0.0;
876     sum2  = 0.0;
877     sum3  = 0.0;
878     sum4  = 0.0;
879     sum5  = 0.0;
880     sum6  = 0.0;
881     for (j=0; j<n; j++) {
882       sum1 += v[jrow]*x[6*idx[jrow]];
883       sum2 += v[jrow]*x[6*idx[jrow]+1];
884       sum3 += v[jrow]*x[6*idx[jrow]+2];
885       sum4 += v[jrow]*x[6*idx[jrow]+3];
886       sum5 += v[jrow]*x[6*idx[jrow]+4];
887       sum6 += v[jrow]*x[6*idx[jrow]+5];
888       jrow++;
889      }
890     y[6*i]   += sum1;
891     y[6*i+1] += sum2;
892     y[6*i+2] += sum3;
893     y[6*i+3] += sum4;
894     y[6*i+4] += sum5;
895     y[6*i+5] += sum6;
896   }
897 
898   PetscLogFlops(12*a->nz);
899   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
900   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
906 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
907 {
908   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
909   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
910   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
911   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
912 
913   PetscFunctionBegin;
914   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
915   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
916   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
917   y = y + shift; /* shift for Fortran start by 1 indexing */
918   for (i=0; i<m; i++) {
919     idx    = a->j + a->i[i] + shift;
920     v      = a->a + a->i[i] + shift;
921     n      = a->i[i+1] - a->i[i];
922     alpha1 = x[6*i];
923     alpha2 = x[6*i+1];
924     alpha3 = x[6*i+2];
925     alpha4 = x[6*i+3];
926     alpha5 = x[6*i+4];
927     alpha6 = x[6*i+5];
928     while (n-->0) {
929       y[6*(*idx)]   += alpha1*(*v);
930       y[6*(*idx)+1] += alpha2*(*v);
931       y[6*(*idx)+2] += alpha3*(*v);
932       y[6*(*idx)+3] += alpha4*(*v);
933       y[6*(*idx)+4] += alpha5*(*v);
934       y[6*(*idx)+5] += alpha6*(*v);
935       idx++; v++;
936     }
937   }
938   PetscLogFlops(12*a->nz);
939   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
940   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
941   PetscFunctionReturn(0);
942 }
943 
944 /* ------------------------------------------------------------------------------*/
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_SeqMAIJ_8"
947 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
948 {
949   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
950   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
951   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
952   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
953   int           n,i,jrow,j;
954 
955   PetscFunctionBegin;
956   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
957   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
958   x    = x + shift;    /* shift for Fortran start by 1 indexing */
959   idx  = a->j;
960   v    = a->a;
961   ii   = a->i;
962 
963   v    += shift; /* shift for Fortran start by 1 indexing */
964   idx  += shift;
965   for (i=0; i<m; i++) {
966     jrow = ii[i];
967     n    = ii[i+1] - jrow;
968     sum1  = 0.0;
969     sum2  = 0.0;
970     sum3  = 0.0;
971     sum4  = 0.0;
972     sum5  = 0.0;
973     sum6  = 0.0;
974     sum7  = 0.0;
975     sum8  = 0.0;
976     for (j=0; j<n; j++) {
977       sum1 += v[jrow]*x[8*idx[jrow]];
978       sum2 += v[jrow]*x[8*idx[jrow]+1];
979       sum3 += v[jrow]*x[8*idx[jrow]+2];
980       sum4 += v[jrow]*x[8*idx[jrow]+3];
981       sum5 += v[jrow]*x[8*idx[jrow]+4];
982       sum6 += v[jrow]*x[8*idx[jrow]+5];
983       sum7 += v[jrow]*x[8*idx[jrow]+6];
984       sum8 += v[jrow]*x[8*idx[jrow]+7];
985       jrow++;
986      }
987     y[8*i]   = sum1;
988     y[8*i+1] = sum2;
989     y[8*i+2] = sum3;
990     y[8*i+3] = sum4;
991     y[8*i+4] = sum5;
992     y[8*i+5] = sum6;
993     y[8*i+6] = sum7;
994     y[8*i+7] = sum8;
995   }
996 
997   PetscLogFlops(16*a->nz - 8*m);
998   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
999   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1005 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1006 {
1007   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1008   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1009   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1010   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1014   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1015   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1016   y = y + shift; /* shift for Fortran start by 1 indexing */
1017   for (i=0; i<m; i++) {
1018     idx    = a->j + a->i[i] + shift;
1019     v      = a->a + a->i[i] + shift;
1020     n      = a->i[i+1] - a->i[i];
1021     alpha1 = x[8*i];
1022     alpha2 = x[8*i+1];
1023     alpha3 = x[8*i+2];
1024     alpha4 = x[8*i+3];
1025     alpha5 = x[8*i+4];
1026     alpha6 = x[8*i+5];
1027     alpha7 = x[8*i+6];
1028     alpha8 = x[8*i+7];
1029     while (n-->0) {
1030       y[8*(*idx)]   += alpha1*(*v);
1031       y[8*(*idx)+1] += alpha2*(*v);
1032       y[8*(*idx)+2] += alpha3*(*v);
1033       y[8*(*idx)+3] += alpha4*(*v);
1034       y[8*(*idx)+4] += alpha5*(*v);
1035       y[8*(*idx)+5] += alpha6*(*v);
1036       y[8*(*idx)+6] += alpha7*(*v);
1037       y[8*(*idx)+7] += alpha8*(*v);
1038       idx++; v++;
1039     }
1040   }
1041   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1042   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1043   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1049 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1050 {
1051   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1052   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1053   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1054   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1055   int           n,i,jrow,j;
1056 
1057   PetscFunctionBegin;
1058   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1059   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1060   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1061   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1062   idx  = a->j;
1063   v    = a->a;
1064   ii   = a->i;
1065 
1066   v    += shift; /* shift for Fortran start by 1 indexing */
1067   idx  += shift;
1068   for (i=0; i<m; i++) {
1069     jrow = ii[i];
1070     n    = ii[i+1] - jrow;
1071     sum1  = 0.0;
1072     sum2  = 0.0;
1073     sum3  = 0.0;
1074     sum4  = 0.0;
1075     sum5  = 0.0;
1076     sum6  = 0.0;
1077     sum7  = 0.0;
1078     sum8  = 0.0;
1079     for (j=0; j<n; j++) {
1080       sum1 += v[jrow]*x[8*idx[jrow]];
1081       sum2 += v[jrow]*x[8*idx[jrow]+1];
1082       sum3 += v[jrow]*x[8*idx[jrow]+2];
1083       sum4 += v[jrow]*x[8*idx[jrow]+3];
1084       sum5 += v[jrow]*x[8*idx[jrow]+4];
1085       sum6 += v[jrow]*x[8*idx[jrow]+5];
1086       sum7 += v[jrow]*x[8*idx[jrow]+6];
1087       sum8 += v[jrow]*x[8*idx[jrow]+7];
1088       jrow++;
1089      }
1090     y[8*i]   += sum1;
1091     y[8*i+1] += sum2;
1092     y[8*i+2] += sum3;
1093     y[8*i+3] += sum4;
1094     y[8*i+4] += sum5;
1095     y[8*i+5] += sum6;
1096     y[8*i+6] += sum7;
1097     y[8*i+7] += sum8;
1098   }
1099 
1100   PetscLogFlops(16*a->nz);
1101   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1102   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1108 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1111   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1112   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1113   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1114 
1115   PetscFunctionBegin;
1116   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1117   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1118   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1119   y = y + shift; /* shift for Fortran start by 1 indexing */
1120   for (i=0; i<m; i++) {
1121     idx    = a->j + a->i[i] + shift;
1122     v      = a->a + a->i[i] + shift;
1123     n      = a->i[i+1] - a->i[i];
1124     alpha1 = x[8*i];
1125     alpha2 = x[8*i+1];
1126     alpha3 = x[8*i+2];
1127     alpha4 = x[8*i+3];
1128     alpha5 = x[8*i+4];
1129     alpha6 = x[8*i+5];
1130     alpha7 = x[8*i+6];
1131     alpha8 = x[8*i+7];
1132     while (n-->0) {
1133       y[8*(*idx)]   += alpha1*(*v);
1134       y[8*(*idx)+1] += alpha2*(*v);
1135       y[8*(*idx)+2] += alpha3*(*v);
1136       y[8*(*idx)+3] += alpha4*(*v);
1137       y[8*(*idx)+4] += alpha5*(*v);
1138       y[8*(*idx)+5] += alpha6*(*v);
1139       y[8*(*idx)+6] += alpha7*(*v);
1140       y[8*(*idx)+7] += alpha8*(*v);
1141       idx++; v++;
1142     }
1143   }
1144   PetscLogFlops(16*a->nz);
1145   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1146   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 /*--------------------------------------------------------------------------------------------*/
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1153 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1154 {
1155   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1156   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1157   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1158   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1159   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1160   int           n,i,jrow,j;
1161 
1162   PetscFunctionBegin;
1163   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1164   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1165   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1166   idx  = a->j;
1167   v    = a->a;
1168   ii   = a->i;
1169 
1170   v    += shift; /* shift for Fortran start by 1 indexing */
1171   idx  += shift;
1172   for (i=0; i<m; i++) {
1173     jrow = ii[i];
1174     n    = ii[i+1] - jrow;
1175     sum1  = 0.0;
1176     sum2  = 0.0;
1177     sum3  = 0.0;
1178     sum4  = 0.0;
1179     sum5  = 0.0;
1180     sum6  = 0.0;
1181     sum7  = 0.0;
1182     sum8  = 0.0;
1183     sum9  = 0.0;
1184     sum10 = 0.0;
1185     sum11 = 0.0;
1186     sum12 = 0.0;
1187     sum13 = 0.0;
1188     sum14 = 0.0;
1189     sum15 = 0.0;
1190     sum16 = 0.0;
1191     for (j=0; j<n; j++) {
1192       sum1  += v[jrow]*x[16*idx[jrow]];
1193       sum2  += v[jrow]*x[16*idx[jrow]+1];
1194       sum3  += v[jrow]*x[16*idx[jrow]+2];
1195       sum4  += v[jrow]*x[16*idx[jrow]+3];
1196       sum5  += v[jrow]*x[16*idx[jrow]+4];
1197       sum6  += v[jrow]*x[16*idx[jrow]+5];
1198       sum7  += v[jrow]*x[16*idx[jrow]+6];
1199       sum8  += v[jrow]*x[16*idx[jrow]+7];
1200       sum9  += v[jrow]*x[16*idx[jrow]+8];
1201       sum10 += v[jrow]*x[16*idx[jrow]+9];
1202       sum11 += v[jrow]*x[16*idx[jrow]+10];
1203       sum12 += v[jrow]*x[16*idx[jrow]+11];
1204       sum13 += v[jrow]*x[16*idx[jrow]+12];
1205       sum14 += v[jrow]*x[16*idx[jrow]+13];
1206       sum15 += v[jrow]*x[16*idx[jrow]+14];
1207       sum16 += v[jrow]*x[16*idx[jrow]+15];
1208       jrow++;
1209      }
1210     y[16*i]    = sum1;
1211     y[16*i+1]  = sum2;
1212     y[16*i+2]  = sum3;
1213     y[16*i+3]  = sum4;
1214     y[16*i+4]  = sum5;
1215     y[16*i+5]  = sum6;
1216     y[16*i+6]  = sum7;
1217     y[16*i+7]  = sum8;
1218     y[16*i+8]  = sum9;
1219     y[16*i+9]  = sum10;
1220     y[16*i+10] = sum11;
1221     y[16*i+11] = sum12;
1222     y[16*i+12] = sum13;
1223     y[16*i+13] = sum14;
1224     y[16*i+14] = sum15;
1225     y[16*i+15] = sum16;
1226   }
1227 
1228   PetscLogFlops(32*a->nz - 16*m);
1229   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1230   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1236 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1237 {
1238   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1239   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1240   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1241   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1242   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1243 
1244   PetscFunctionBegin;
1245   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1246   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1247   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1248   y = y + shift; /* shift for Fortran start by 1 indexing */
1249   for (i=0; i<m; i++) {
1250     idx    = a->j + a->i[i] + shift;
1251     v      = a->a + a->i[i] + shift;
1252     n      = a->i[i+1] - a->i[i];
1253     alpha1  = x[16*i];
1254     alpha2  = x[16*i+1];
1255     alpha3  = x[16*i+2];
1256     alpha4  = x[16*i+3];
1257     alpha5  = x[16*i+4];
1258     alpha6  = x[16*i+5];
1259     alpha7  = x[16*i+6];
1260     alpha8  = x[16*i+7];
1261     alpha9  = x[16*i+8];
1262     alpha10 = x[16*i+9];
1263     alpha11 = x[16*i+10];
1264     alpha12 = x[16*i+11];
1265     alpha13 = x[16*i+12];
1266     alpha14 = x[16*i+13];
1267     alpha15 = x[16*i+14];
1268     alpha16 = x[16*i+15];
1269     while (n-->0) {
1270       y[16*(*idx)]    += alpha1*(*v);
1271       y[16*(*idx)+1]  += alpha2*(*v);
1272       y[16*(*idx)+2]  += alpha3*(*v);
1273       y[16*(*idx)+3]  += alpha4*(*v);
1274       y[16*(*idx)+4]  += alpha5*(*v);
1275       y[16*(*idx)+5]  += alpha6*(*v);
1276       y[16*(*idx)+6]  += alpha7*(*v);
1277       y[16*(*idx)+7]  += alpha8*(*v);
1278       y[16*(*idx)+8]  += alpha9*(*v);
1279       y[16*(*idx)+9]  += alpha10*(*v);
1280       y[16*(*idx)+10] += alpha11*(*v);
1281       y[16*(*idx)+11] += alpha12*(*v);
1282       y[16*(*idx)+12] += alpha13*(*v);
1283       y[16*(*idx)+13] += alpha14*(*v);
1284       y[16*(*idx)+14] += alpha15*(*v);
1285       y[16*(*idx)+15] += alpha16*(*v);
1286       idx++; v++;
1287     }
1288   }
1289   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1290   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1291   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1297 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1298 {
1299   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1300   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1301   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1302   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1303   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1304   int           n,i,jrow,j;
1305 
1306   PetscFunctionBegin;
1307   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1308   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1309   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1310   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1311   idx  = a->j;
1312   v    = a->a;
1313   ii   = a->i;
1314 
1315   v    += shift; /* shift for Fortran start by 1 indexing */
1316   idx  += shift;
1317   for (i=0; i<m; i++) {
1318     jrow = ii[i];
1319     n    = ii[i+1] - jrow;
1320     sum1  = 0.0;
1321     sum2  = 0.0;
1322     sum3  = 0.0;
1323     sum4  = 0.0;
1324     sum5  = 0.0;
1325     sum6  = 0.0;
1326     sum7  = 0.0;
1327     sum8  = 0.0;
1328     sum9  = 0.0;
1329     sum10 = 0.0;
1330     sum11 = 0.0;
1331     sum12 = 0.0;
1332     sum13 = 0.0;
1333     sum14 = 0.0;
1334     sum15 = 0.0;
1335     sum16 = 0.0;
1336     for (j=0; j<n; j++) {
1337       sum1  += v[jrow]*x[16*idx[jrow]];
1338       sum2  += v[jrow]*x[16*idx[jrow]+1];
1339       sum3  += v[jrow]*x[16*idx[jrow]+2];
1340       sum4  += v[jrow]*x[16*idx[jrow]+3];
1341       sum5  += v[jrow]*x[16*idx[jrow]+4];
1342       sum6  += v[jrow]*x[16*idx[jrow]+5];
1343       sum7  += v[jrow]*x[16*idx[jrow]+6];
1344       sum8  += v[jrow]*x[16*idx[jrow]+7];
1345       sum9  += v[jrow]*x[16*idx[jrow]+8];
1346       sum10 += v[jrow]*x[16*idx[jrow]+9];
1347       sum11 += v[jrow]*x[16*idx[jrow]+10];
1348       sum12 += v[jrow]*x[16*idx[jrow]+11];
1349       sum13 += v[jrow]*x[16*idx[jrow]+12];
1350       sum14 += v[jrow]*x[16*idx[jrow]+13];
1351       sum15 += v[jrow]*x[16*idx[jrow]+14];
1352       sum16 += v[jrow]*x[16*idx[jrow]+15];
1353       jrow++;
1354      }
1355     y[16*i]    += sum1;
1356     y[16*i+1]  += sum2;
1357     y[16*i+2]  += sum3;
1358     y[16*i+3]  += sum4;
1359     y[16*i+4]  += sum5;
1360     y[16*i+5]  += sum6;
1361     y[16*i+6]  += sum7;
1362     y[16*i+7]  += sum8;
1363     y[16*i+8]  += sum9;
1364     y[16*i+9]  += sum10;
1365     y[16*i+10] += sum11;
1366     y[16*i+11] += sum12;
1367     y[16*i+12] += sum13;
1368     y[16*i+13] += sum14;
1369     y[16*i+14] += sum15;
1370     y[16*i+15] += sum16;
1371   }
1372 
1373   PetscLogFlops(32*a->nz);
1374   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1375   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1381 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1382 {
1383   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1384   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1385   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1386   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1387   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1388 
1389   PetscFunctionBegin;
1390   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1391   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1392   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1393   y = y + shift; /* shift for Fortran start by 1 indexing */
1394   for (i=0; i<m; i++) {
1395     idx    = a->j + a->i[i] + shift;
1396     v      = a->a + a->i[i] + shift;
1397     n      = a->i[i+1] - a->i[i];
1398     alpha1 = x[16*i];
1399     alpha2 = x[16*i+1];
1400     alpha3 = x[16*i+2];
1401     alpha4 = x[16*i+3];
1402     alpha5 = x[16*i+4];
1403     alpha6 = x[16*i+5];
1404     alpha7 = x[16*i+6];
1405     alpha8 = x[16*i+7];
1406     alpha9  = x[16*i+8];
1407     alpha10 = x[16*i+9];
1408     alpha11 = x[16*i+10];
1409     alpha12 = x[16*i+11];
1410     alpha13 = x[16*i+12];
1411     alpha14 = x[16*i+13];
1412     alpha15 = x[16*i+14];
1413     alpha16 = x[16*i+15];
1414     while (n-->0) {
1415       y[16*(*idx)]   += alpha1*(*v);
1416       y[16*(*idx)+1] += alpha2*(*v);
1417       y[16*(*idx)+2] += alpha3*(*v);
1418       y[16*(*idx)+3] += alpha4*(*v);
1419       y[16*(*idx)+4] += alpha5*(*v);
1420       y[16*(*idx)+5] += alpha6*(*v);
1421       y[16*(*idx)+6] += alpha7*(*v);
1422       y[16*(*idx)+7] += alpha8*(*v);
1423       y[16*(*idx)+8]  += alpha9*(*v);
1424       y[16*(*idx)+9]  += alpha10*(*v);
1425       y[16*(*idx)+10] += alpha11*(*v);
1426       y[16*(*idx)+11] += alpha12*(*v);
1427       y[16*(*idx)+12] += alpha13*(*v);
1428       y[16*(*idx)+13] += alpha14*(*v);
1429       y[16*(*idx)+14] += alpha15*(*v);
1430       y[16*(*idx)+15] += alpha16*(*v);
1431       idx++; v++;
1432     }
1433   }
1434   PetscLogFlops(32*a->nz);
1435   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1436   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 /*===================================================================================*/
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1443 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1444 {
1445   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1446   int         ierr;
1447   PetscFunctionBegin;
1448 
1449   /* start the scatter */
1450   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1451   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1452   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1453   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1459 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1460 {
1461   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1462   int         ierr;
1463   PetscFunctionBegin;
1464   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1465   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1466   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1467   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1473 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1474 {
1475   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1476   int         ierr;
1477   PetscFunctionBegin;
1478 
1479   /* start the scatter */
1480   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1481   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1482   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1483   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1489 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1490 {
1491   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1492   int         ierr;
1493   PetscFunctionBegin;
1494   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1495   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1496   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1497   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 /* ---------------------------------------------------------------------------------- */
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatCreateMAIJ"
1504 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1505 {
1506   int         ierr,size,n;
1507   Mat_MPIMAIJ *b;
1508   Mat         B;
1509 
1510   PetscFunctionBegin;
1511   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1512 
1513   if (dof == 1) {
1514     *maij = A;
1515   } else {
1516     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1517     B->assembled    = PETSC_TRUE;
1518 
1519     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1520     if (size == 1) {
1521       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
1522       B->ops->destroy = MatDestroy_SeqMAIJ;
1523       b      = (Mat_MPIMAIJ*)B->data;
1524       b->dof = dof;
1525       b->AIJ = A;
1526       if (dof == 2) {
1527         B->ops->mult             = MatMult_SeqMAIJ_2;
1528         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1529         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1530         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1531       } else if (dof == 3) {
1532         B->ops->mult             = MatMult_SeqMAIJ_3;
1533         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1534         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1535         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1536       } else if (dof == 4) {
1537         B->ops->mult             = MatMult_SeqMAIJ_4;
1538         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1539         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1540         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1541       } else if (dof == 5) {
1542         B->ops->mult             = MatMult_SeqMAIJ_5;
1543         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1544         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1545         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1546       } else if (dof == 6) {
1547         B->ops->mult             = MatMult_SeqMAIJ_6;
1548         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1549         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1550         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1551       } else if (dof == 8) {
1552         B->ops->mult             = MatMult_SeqMAIJ_8;
1553         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1554         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1555         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1556       } else if (dof == 16) {
1557         B->ops->mult             = MatMult_SeqMAIJ_16;
1558         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1559         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1560         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1561       } else {
1562         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1563       }
1564     } else {
1565       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1566       IS         from,to;
1567       Vec        gvec;
1568       int        *garray,i;
1569 
1570       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1571       B->ops->destroy = MatDestroy_MPIMAIJ;
1572       b      = (Mat_MPIMAIJ*)B->data;
1573       b->dof = dof;
1574       b->A   = A;
1575       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
1576       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
1577 
1578       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1579       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1580 
1581       /* create two temporary Index sets for build scatter gather */
1582       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1583       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1584       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1585       ierr = PetscFree(garray);CHKERRQ(ierr);
1586       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1587 
1588       /* create temporary global vector to generate scatter context */
1589       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1590 
1591       /* generate the scatter context */
1592       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1593 
1594       ierr = ISDestroy(from);CHKERRQ(ierr);
1595       ierr = ISDestroy(to);CHKERRQ(ierr);
1596       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1597 
1598       B->ops->mult             = MatMult_MPIMAIJ_dof;
1599       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1600       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1601       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1602     }
1603     *maij = B;
1604   }
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 
1609 
1610 
1611 
1612 
1613 
1614 
1615 
1616 
1617 
1618 
1619