xref: /petsc/src/mat/impls/maij/maij.c (revision 456192e25527e6dfd94efe19f31bdabe85ed0893)
1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/maij/maij.h"
20 
21 #undef __FUNCT__
22 #define __FUNCT__ "MatMAIJGetAIJ"
23 int MatMAIJGetAIJ(Mat A,Mat *B)
24 {
25   int         ierr;
26   PetscTruth  ismpimaij,isseqmaij;
27 
28   PetscFunctionBegin;
29   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
30   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
31   if (ismpimaij) {
32     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
33 
34     *B = b->A;
35   } else if (isseqmaij) {
36     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
37 
38     *B = b->AIJ;
39   } else {
40     *B = A;
41   }
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatMAIJRedimension"
47 int MatMAIJRedimension(Mat A,int dof,Mat *B)
48 {
49   int ierr;
50   Mat Aij;
51 
52   PetscFunctionBegin;
53   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
54   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDestroy_SeqMAIJ"
60 int MatDestroy_SeqMAIJ(Mat A)
61 {
62   int         ierr;
63   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
64 
65   PetscFunctionBegin;
66   if (b->AIJ) {
67     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
68   }
69   ierr = PetscFree(b);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatDestroy_MPIMAIJ"
75 int MatDestroy_MPIMAIJ(Mat A)
76 {
77   int         ierr;
78   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
79 
80   PetscFunctionBegin;
81   if (b->AIJ) {
82     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
83   }
84   if (b->OAIJ) {
85     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
86   }
87   if (b->A) {
88     ierr = MatDestroy(b->A);CHKERRQ(ierr);
89   }
90   if (b->ctx) {
91     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
92   }
93   if (b->w) {
94     ierr = VecDestroy(b->w);CHKERRQ(ierr);
95   }
96   ierr = PetscFree(b);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 
100 EXTERN_C_BEGIN
101 #undef __FUNCT__
102 #define __FUNCT__ "MatCreate_MAIJ"
103 int MatCreate_MAIJ(Mat A)
104 {
105   int         ierr;
106   Mat_MPIMAIJ *b;
107 
108   PetscFunctionBegin;
109   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
110   A->data  = (void*)b;
111   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
112   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
113   A->factor           = 0;
114   A->mapping          = 0;
115 
116   b->AIJ  = 0;
117   b->dof  = 0;
118   b->OAIJ = 0;
119   b->ctx  = 0;
120   b->w    = 0;
121   PetscFunctionReturn(0);
122 }
123 EXTERN_C_END
124 
125 /* --------------------------------------------------------------------------------------*/
126 #undef __FUNCT__
127 #define __FUNCT__ "MatMult_SeqMAIJ_2"
128 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
129 {
130   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
131   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
132   PetscScalar   *x,*y,*v,sum1, sum2;
133   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
134   int           n,i,jrow,j;
135 
136   PetscFunctionBegin;
137   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
138   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
139   x    = x + shift;    /* shift for Fortran start by 1 indexing */
140   idx  = a->j;
141   v    = a->a;
142   ii   = a->i;
143 
144   v    += shift; /* shift for Fortran start by 1 indexing */
145   idx  += shift;
146   for (i=0; i<m; i++) {
147     jrow = ii[i];
148     n    = ii[i+1] - jrow;
149     sum1  = 0.0;
150     sum2  = 0.0;
151     for (j=0; j<n; j++) {
152       sum1 += v[jrow]*x[2*idx[jrow]];
153       sum2 += v[jrow]*x[2*idx[jrow]+1];
154       jrow++;
155      }
156     y[2*i]   = sum1;
157     y[2*i+1] = sum2;
158   }
159 
160   PetscLogFlops(4*a->nz - 2*m);
161   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
162   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNCT__
167 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
168 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
169 {
170   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
171   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
172   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
173   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
174 
175   PetscFunctionBegin;
176   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
177   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
178   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
179   y = y + shift; /* shift for Fortran start by 1 indexing */
180   for (i=0; i<m; i++) {
181     idx    = a->j + a->i[i] + shift;
182     v      = a->a + a->i[i] + shift;
183     n      = a->i[i+1] - a->i[i];
184     alpha1 = x[2*i];
185     alpha2 = x[2*i+1];
186     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
187   }
188   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
189   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
190   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
191   PetscFunctionReturn(0);
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
196 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
197 {
198   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
199   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
200   PetscScalar   *x,*y,*v,sum1, sum2;
201   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
202   int           n,i,jrow,j;
203 
204   PetscFunctionBegin;
205   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
206   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
207   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
208   x    = x + shift;    /* shift for Fortran start by 1 indexing */
209   idx  = a->j;
210   v    = a->a;
211   ii   = a->i;
212 
213   v    += shift; /* shift for Fortran start by 1 indexing */
214   idx  += shift;
215   for (i=0; i<m; i++) {
216     jrow = ii[i];
217     n    = ii[i+1] - jrow;
218     sum1  = 0.0;
219     sum2  = 0.0;
220     for (j=0; j<n; j++) {
221       sum1 += v[jrow]*x[2*idx[jrow]];
222       sum2 += v[jrow]*x[2*idx[jrow]+1];
223       jrow++;
224      }
225     y[2*i]   += sum1;
226     y[2*i+1] += sum2;
227   }
228 
229   PetscLogFlops(4*a->nz - 2*m);
230   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
231   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
232   PetscFunctionReturn(0);
233 }
234 #undef __FUNCT__
235 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
236 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
237 {
238   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
239   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
240   PetscScalar   *x,*y,*v,alpha1,alpha2;
241   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
242 
243   PetscFunctionBegin;
244   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
245   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
246   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
247   y = y + shift; /* shift for Fortran start by 1 indexing */
248   for (i=0; i<m; i++) {
249     idx   = a->j + a->i[i] + shift;
250     v     = a->a + a->i[i] + shift;
251     n     = a->i[i+1] - a->i[i];
252     alpha1 = x[2*i];
253     alpha2 = x[2*i+1];
254     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
255   }
256   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
257   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
258   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 /* --------------------------------------------------------------------------------------*/
262 #undef __FUNCT__
263 #define __FUNCT__ "MatMult_SeqMAIJ_3"
264 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
265 {
266   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
267   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
268   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
269   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
270   int           n,i,jrow,j;
271 
272   PetscFunctionBegin;
273   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
274   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
275   x    = x + shift;    /* shift for Fortran start by 1 indexing */
276   idx  = a->j;
277   v    = a->a;
278   ii   = a->i;
279 
280   v    += shift; /* shift for Fortran start by 1 indexing */
281   idx  += shift;
282   for (i=0; i<m; i++) {
283     jrow = ii[i];
284     n    = ii[i+1] - jrow;
285     sum1  = 0.0;
286     sum2  = 0.0;
287     sum3  = 0.0;
288     for (j=0; j<n; j++) {
289       sum1 += v[jrow]*x[3*idx[jrow]];
290       sum2 += v[jrow]*x[3*idx[jrow]+1];
291       sum3 += v[jrow]*x[3*idx[jrow]+2];
292       jrow++;
293      }
294     y[3*i]   = sum1;
295     y[3*i+1] = sum2;
296     y[3*i+2] = sum3;
297   }
298 
299   PetscLogFlops(6*a->nz - 3*m);
300   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
301   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
307 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
308 {
309   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
310   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
311   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
312   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
313 
314   PetscFunctionBegin;
315   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
318   y = y + shift; /* shift for Fortran start by 1 indexing */
319   for (i=0; i<m; i++) {
320     idx    = a->j + a->i[i] + shift;
321     v      = a->a + a->i[i] + shift;
322     n      = a->i[i+1] - a->i[i];
323     alpha1 = x[3*i];
324     alpha2 = x[3*i+1];
325     alpha3 = x[3*i+2];
326     while (n-->0) {
327       y[3*(*idx)]   += alpha1*(*v);
328       y[3*(*idx)+1] += alpha2*(*v);
329       y[3*(*idx)+2] += alpha3*(*v);
330       idx++; v++;
331     }
332   }
333   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
334   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
335   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
341 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
342 {
343   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
344   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
345   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
346   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
347   int           n,i,jrow,j;
348 
349   PetscFunctionBegin;
350   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
353   x    = x + shift;    /* shift for Fortran start by 1 indexing */
354   idx  = a->j;
355   v    = a->a;
356   ii   = a->i;
357 
358   v    += shift; /* shift for Fortran start by 1 indexing */
359   idx  += shift;
360   for (i=0; i<m; i++) {
361     jrow = ii[i];
362     n    = ii[i+1] - jrow;
363     sum1  = 0.0;
364     sum2  = 0.0;
365     sum3  = 0.0;
366     for (j=0; j<n; j++) {
367       sum1 += v[jrow]*x[3*idx[jrow]];
368       sum2 += v[jrow]*x[3*idx[jrow]+1];
369       sum3 += v[jrow]*x[3*idx[jrow]+2];
370       jrow++;
371      }
372     y[3*i]   += sum1;
373     y[3*i+1] += sum2;
374     y[3*i+2] += sum3;
375   }
376 
377   PetscLogFlops(6*a->nz);
378   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
379   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
384 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385 {
386   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
387   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
388   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
389   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
390 
391   PetscFunctionBegin;
392   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
393   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
394   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
395   y = y + shift; /* shift for Fortran start by 1 indexing */
396   for (i=0; i<m; i++) {
397     idx    = a->j + a->i[i] + shift;
398     v      = a->a + a->i[i] + shift;
399     n      = a->i[i+1] - a->i[i];
400     alpha1 = x[3*i];
401     alpha2 = x[3*i+1];
402     alpha3 = x[3*i+2];
403     while (n-->0) {
404       y[3*(*idx)]   += alpha1*(*v);
405       y[3*(*idx)+1] += alpha2*(*v);
406       y[3*(*idx)+2] += alpha3*(*v);
407       idx++; v++;
408     }
409   }
410   PetscLogFlops(6*a->nz);
411   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
412   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
413   PetscFunctionReturn(0);
414 }
415 
416 /* ------------------------------------------------------------------------------*/
417 #undef __FUNCT__
418 #define __FUNCT__ "MatMult_SeqMAIJ_4"
419 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
420 {
421   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
422   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
423   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
424   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
425   int           n,i,jrow,j;
426 
427   PetscFunctionBegin;
428   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
430   x    = x + shift;    /* shift for Fortran start by 1 indexing */
431   idx  = a->j;
432   v    = a->a;
433   ii   = a->i;
434 
435   v    += shift; /* shift for Fortran start by 1 indexing */
436   idx  += shift;
437   for (i=0; i<m; i++) {
438     jrow = ii[i];
439     n    = ii[i+1] - jrow;
440     sum1  = 0.0;
441     sum2  = 0.0;
442     sum3  = 0.0;
443     sum4  = 0.0;
444     for (j=0; j<n; j++) {
445       sum1 += v[jrow]*x[4*idx[jrow]];
446       sum2 += v[jrow]*x[4*idx[jrow]+1];
447       sum3 += v[jrow]*x[4*idx[jrow]+2];
448       sum4 += v[jrow]*x[4*idx[jrow]+3];
449       jrow++;
450      }
451     y[4*i]   = sum1;
452     y[4*i+1] = sum2;
453     y[4*i+2] = sum3;
454     y[4*i+3] = sum4;
455   }
456 
457   PetscLogFlops(8*a->nz - 4*m);
458   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
465 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
466 {
467   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
468   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
469   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
470   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
471 
472   PetscFunctionBegin;
473   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
474   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
475   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
476   y = y + shift; /* shift for Fortran start by 1 indexing */
477   for (i=0; i<m; i++) {
478     idx    = a->j + a->i[i] + shift;
479     v      = a->a + a->i[i] + shift;
480     n      = a->i[i+1] - a->i[i];
481     alpha1 = x[4*i];
482     alpha2 = x[4*i+1];
483     alpha3 = x[4*i+2];
484     alpha4 = x[4*i+3];
485     while (n-->0) {
486       y[4*(*idx)]   += alpha1*(*v);
487       y[4*(*idx)+1] += alpha2*(*v);
488       y[4*(*idx)+2] += alpha3*(*v);
489       y[4*(*idx)+3] += alpha4*(*v);
490       idx++; v++;
491     }
492   }
493   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
495   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
501 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
504   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
505   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
506   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
507   int           n,i,jrow,j;
508 
509   PetscFunctionBegin;
510   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
511   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
512   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
513   x    = x + shift;    /* shift for Fortran start by 1 indexing */
514   idx  = a->j;
515   v    = a->a;
516   ii   = a->i;
517 
518   v    += shift; /* shift for Fortran start by 1 indexing */
519   idx  += shift;
520   for (i=0; i<m; i++) {
521     jrow = ii[i];
522     n    = ii[i+1] - jrow;
523     sum1  = 0.0;
524     sum2  = 0.0;
525     sum3  = 0.0;
526     sum4  = 0.0;
527     for (j=0; j<n; j++) {
528       sum1 += v[jrow]*x[4*idx[jrow]];
529       sum2 += v[jrow]*x[4*idx[jrow]+1];
530       sum3 += v[jrow]*x[4*idx[jrow]+2];
531       sum4 += v[jrow]*x[4*idx[jrow]+3];
532       jrow++;
533      }
534     y[4*i]   += sum1;
535     y[4*i+1] += sum2;
536     y[4*i+2] += sum3;
537     y[4*i+3] += sum4;
538   }
539 
540   PetscLogFlops(8*a->nz - 4*m);
541   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
542   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 #undef __FUNCT__
546 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
547 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
548 {
549   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
550   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
551   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
552   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
553 
554   PetscFunctionBegin;
555   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
556   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
557   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
558   y = y + shift; /* shift for Fortran start by 1 indexing */
559   for (i=0; i<m; i++) {
560     idx    = a->j + a->i[i] + shift;
561     v      = a->a + a->i[i] + shift;
562     n      = a->i[i+1] - a->i[i];
563     alpha1 = x[4*i];
564     alpha2 = x[4*i+1];
565     alpha3 = x[4*i+2];
566     alpha4 = x[4*i+3];
567     while (n-->0) {
568       y[4*(*idx)]   += alpha1*(*v);
569       y[4*(*idx)+1] += alpha2*(*v);
570       y[4*(*idx)+2] += alpha3*(*v);
571       y[4*(*idx)+3] += alpha4*(*v);
572       idx++; v++;
573     }
574   }
575   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
576   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 /* ------------------------------------------------------------------------------*/
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatMult_SeqMAIJ_5"
584 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
585 {
586   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
587   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
588   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
589   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
590   int           n,i,jrow,j;
591 
592   PetscFunctionBegin;
593   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
594   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
595   x    = x + shift;    /* shift for Fortran start by 1 indexing */
596   idx  = a->j;
597   v    = a->a;
598   ii   = a->i;
599 
600   v    += shift; /* shift for Fortran start by 1 indexing */
601   idx  += shift;
602   for (i=0; i<m; i++) {
603     jrow = ii[i];
604     n    = ii[i+1] - jrow;
605     sum1  = 0.0;
606     sum2  = 0.0;
607     sum3  = 0.0;
608     sum4  = 0.0;
609     sum5  = 0.0;
610     for (j=0; j<n; j++) {
611       sum1 += v[jrow]*x[5*idx[jrow]];
612       sum2 += v[jrow]*x[5*idx[jrow]+1];
613       sum3 += v[jrow]*x[5*idx[jrow]+2];
614       sum4 += v[jrow]*x[5*idx[jrow]+3];
615       sum5 += v[jrow]*x[5*idx[jrow]+4];
616       jrow++;
617      }
618     y[5*i]   = sum1;
619     y[5*i+1] = sum2;
620     y[5*i+2] = sum3;
621     y[5*i+3] = sum4;
622     y[5*i+4] = sum5;
623   }
624 
625   PetscLogFlops(10*a->nz - 5*m);
626   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
627   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
633 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634 {
635   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
636   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
637   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
638   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
639 
640   PetscFunctionBegin;
641   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
642   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
643   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
644   y = y + shift; /* shift for Fortran start by 1 indexing */
645   for (i=0; i<m; i++) {
646     idx    = a->j + a->i[i] + shift;
647     v      = a->a + a->i[i] + shift;
648     n      = a->i[i+1] - a->i[i];
649     alpha1 = x[5*i];
650     alpha2 = x[5*i+1];
651     alpha3 = x[5*i+2];
652     alpha4 = x[5*i+3];
653     alpha5 = x[5*i+4];
654     while (n-->0) {
655       y[5*(*idx)]   += alpha1*(*v);
656       y[5*(*idx)+1] += alpha2*(*v);
657       y[5*(*idx)+2] += alpha3*(*v);
658       y[5*(*idx)+3] += alpha4*(*v);
659       y[5*(*idx)+4] += alpha5*(*v);
660       idx++; v++;
661     }
662   }
663   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
664   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
665   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
671 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
674   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
675   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
676   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
677   int           n,i,jrow,j;
678 
679   PetscFunctionBegin;
680   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
681   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
682   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
683   x    = x + shift;    /* shift for Fortran start by 1 indexing */
684   idx  = a->j;
685   v    = a->a;
686   ii   = a->i;
687 
688   v    += shift; /* shift for Fortran start by 1 indexing */
689   idx  += shift;
690   for (i=0; i<m; i++) {
691     jrow = ii[i];
692     n    = ii[i+1] - jrow;
693     sum1  = 0.0;
694     sum2  = 0.0;
695     sum3  = 0.0;
696     sum4  = 0.0;
697     sum5  = 0.0;
698     for (j=0; j<n; j++) {
699       sum1 += v[jrow]*x[5*idx[jrow]];
700       sum2 += v[jrow]*x[5*idx[jrow]+1];
701       sum3 += v[jrow]*x[5*idx[jrow]+2];
702       sum4 += v[jrow]*x[5*idx[jrow]+3];
703       sum5 += v[jrow]*x[5*idx[jrow]+4];
704       jrow++;
705      }
706     y[5*i]   += sum1;
707     y[5*i+1] += sum2;
708     y[5*i+2] += sum3;
709     y[5*i+3] += sum4;
710     y[5*i+4] += sum5;
711   }
712 
713   PetscLogFlops(10*a->nz);
714   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
715   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
721 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
722 {
723   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
724   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
725   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
726   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
727 
728   PetscFunctionBegin;
729   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
730   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
731   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
732   y = y + shift; /* shift for Fortran start by 1 indexing */
733   for (i=0; i<m; i++) {
734     idx    = a->j + a->i[i] + shift;
735     v      = a->a + a->i[i] + shift;
736     n      = a->i[i+1] - a->i[i];
737     alpha1 = x[5*i];
738     alpha2 = x[5*i+1];
739     alpha3 = x[5*i+2];
740     alpha4 = x[5*i+3];
741     alpha5 = x[5*i+4];
742     while (n-->0) {
743       y[5*(*idx)]   += alpha1*(*v);
744       y[5*(*idx)+1] += alpha2*(*v);
745       y[5*(*idx)+2] += alpha3*(*v);
746       y[5*(*idx)+3] += alpha4*(*v);
747       y[5*(*idx)+4] += alpha5*(*v);
748       idx++; v++;
749     }
750   }
751   PetscLogFlops(10*a->nz);
752   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
753   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 /* ------------------------------------------------------------------------------*/
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMult_SeqMAIJ_6"
760 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
761 {
762   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
763   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
764   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
765   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
766   int           n,i,jrow,j;
767 
768   PetscFunctionBegin;
769   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
770   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
771   x    = x + shift;    /* shift for Fortran start by 1 indexing */
772   idx  = a->j;
773   v    = a->a;
774   ii   = a->i;
775 
776   v    += shift; /* shift for Fortran start by 1 indexing */
777   idx  += shift;
778   for (i=0; i<m; i++) {
779     jrow = ii[i];
780     n    = ii[i+1] - jrow;
781     sum1  = 0.0;
782     sum2  = 0.0;
783     sum3  = 0.0;
784     sum4  = 0.0;
785     sum5  = 0.0;
786     sum6  = 0.0;
787     for (j=0; j<n; j++) {
788       sum1 += v[jrow]*x[6*idx[jrow]];
789       sum2 += v[jrow]*x[6*idx[jrow]+1];
790       sum3 += v[jrow]*x[6*idx[jrow]+2];
791       sum4 += v[jrow]*x[6*idx[jrow]+3];
792       sum5 += v[jrow]*x[6*idx[jrow]+4];
793       sum6 += v[jrow]*x[6*idx[jrow]+5];
794       jrow++;
795      }
796     y[6*i]   = sum1;
797     y[6*i+1] = sum2;
798     y[6*i+2] = sum3;
799     y[6*i+3] = sum4;
800     y[6*i+4] = sum5;
801     y[6*i+5] = sum6;
802   }
803 
804   PetscLogFlops(12*a->nz - 6*m);
805   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
806   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
812 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
813 {
814   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
815   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
816   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
817   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
818 
819   PetscFunctionBegin;
820   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
821   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
822   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
823   y = y + shift; /* shift for Fortran start by 1 indexing */
824   for (i=0; i<m; i++) {
825     idx    = a->j + a->i[i] + shift;
826     v      = a->a + a->i[i] + shift;
827     n      = a->i[i+1] - a->i[i];
828     alpha1 = x[6*i];
829     alpha2 = x[6*i+1];
830     alpha3 = x[6*i+2];
831     alpha4 = x[6*i+3];
832     alpha5 = x[6*i+4];
833     alpha6 = x[6*i+5];
834     while (n-->0) {
835       y[6*(*idx)]   += alpha1*(*v);
836       y[6*(*idx)+1] += alpha2*(*v);
837       y[6*(*idx)+2] += alpha3*(*v);
838       y[6*(*idx)+3] += alpha4*(*v);
839       y[6*(*idx)+4] += alpha5*(*v);
840       y[6*(*idx)+5] += alpha6*(*v);
841       idx++; v++;
842     }
843   }
844   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
845   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
846   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
852 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
853 {
854   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
855   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
856   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
857   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
858   int           n,i,jrow,j;
859 
860   PetscFunctionBegin;
861   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
862   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
863   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
864   x    = x + shift;    /* shift for Fortran start by 1 indexing */
865   idx  = a->j;
866   v    = a->a;
867   ii   = a->i;
868 
869   v    += shift; /* shift for Fortran start by 1 indexing */
870   idx  += shift;
871   for (i=0; i<m; i++) {
872     jrow = ii[i];
873     n    = ii[i+1] - jrow;
874     sum1  = 0.0;
875     sum2  = 0.0;
876     sum3  = 0.0;
877     sum4  = 0.0;
878     sum5  = 0.0;
879     sum6  = 0.0;
880     for (j=0; j<n; j++) {
881       sum1 += v[jrow]*x[6*idx[jrow]];
882       sum2 += v[jrow]*x[6*idx[jrow]+1];
883       sum3 += v[jrow]*x[6*idx[jrow]+2];
884       sum4 += v[jrow]*x[6*idx[jrow]+3];
885       sum5 += v[jrow]*x[6*idx[jrow]+4];
886       sum6 += v[jrow]*x[6*idx[jrow]+5];
887       jrow++;
888      }
889     y[6*i]   += sum1;
890     y[6*i+1] += sum2;
891     y[6*i+2] += sum3;
892     y[6*i+3] += sum4;
893     y[6*i+4] += sum5;
894     y[6*i+5] += sum6;
895   }
896 
897   PetscLogFlops(12*a->nz);
898   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
899   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
905 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
906 {
907   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
908   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
909   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
910   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
911 
912   PetscFunctionBegin;
913   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
914   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
915   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
916   y = y + shift; /* shift for Fortran start by 1 indexing */
917   for (i=0; i<m; i++) {
918     idx    = a->j + a->i[i] + shift;
919     v      = a->a + a->i[i] + shift;
920     n      = a->i[i+1] - a->i[i];
921     alpha1 = x[6*i];
922     alpha2 = x[6*i+1];
923     alpha3 = x[6*i+2];
924     alpha4 = x[6*i+3];
925     alpha5 = x[6*i+4];
926     alpha6 = x[6*i+5];
927     while (n-->0) {
928       y[6*(*idx)]   += alpha1*(*v);
929       y[6*(*idx)+1] += alpha2*(*v);
930       y[6*(*idx)+2] += alpha3*(*v);
931       y[6*(*idx)+3] += alpha4*(*v);
932       y[6*(*idx)+4] += alpha5*(*v);
933       y[6*(*idx)+5] += alpha6*(*v);
934       idx++; v++;
935     }
936   }
937   PetscLogFlops(12*a->nz);
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /* ------------------------------------------------------------------------------*/
944 #undef __FUNCT__
945 #define __FUNCT__ "MatMult_SeqMAIJ_8"
946 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
947 {
948   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
949   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
950   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
951   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
952   int           n,i,jrow,j;
953 
954   PetscFunctionBegin;
955   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
956   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
957   x    = x + shift;    /* shift for Fortran start by 1 indexing */
958   idx  = a->j;
959   v    = a->a;
960   ii   = a->i;
961 
962   v    += shift; /* shift for Fortran start by 1 indexing */
963   idx  += shift;
964   for (i=0; i<m; i++) {
965     jrow = ii[i];
966     n    = ii[i+1] - jrow;
967     sum1  = 0.0;
968     sum2  = 0.0;
969     sum3  = 0.0;
970     sum4  = 0.0;
971     sum5  = 0.0;
972     sum6  = 0.0;
973     sum7  = 0.0;
974     sum8  = 0.0;
975     for (j=0; j<n; j++) {
976       sum1 += v[jrow]*x[8*idx[jrow]];
977       sum2 += v[jrow]*x[8*idx[jrow]+1];
978       sum3 += v[jrow]*x[8*idx[jrow]+2];
979       sum4 += v[jrow]*x[8*idx[jrow]+3];
980       sum5 += v[jrow]*x[8*idx[jrow]+4];
981       sum6 += v[jrow]*x[8*idx[jrow]+5];
982       sum7 += v[jrow]*x[8*idx[jrow]+6];
983       sum8 += v[jrow]*x[8*idx[jrow]+7];
984       jrow++;
985      }
986     y[8*i]   = sum1;
987     y[8*i+1] = sum2;
988     y[8*i+2] = sum3;
989     y[8*i+3] = sum4;
990     y[8*i+4] = sum5;
991     y[8*i+5] = sum6;
992     y[8*i+6] = sum7;
993     y[8*i+7] = sum8;
994   }
995 
996   PetscLogFlops(16*a->nz - 8*m);
997   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
998   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1004 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1005 {
1006   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1007   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1008   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1009   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1010 
1011   PetscFunctionBegin;
1012   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1013   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1014   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1015   y = y + shift; /* shift for Fortran start by 1 indexing */
1016   for (i=0; i<m; i++) {
1017     idx    = a->j + a->i[i] + shift;
1018     v      = a->a + a->i[i] + shift;
1019     n      = a->i[i+1] - a->i[i];
1020     alpha1 = x[8*i];
1021     alpha2 = x[8*i+1];
1022     alpha3 = x[8*i+2];
1023     alpha4 = x[8*i+3];
1024     alpha5 = x[8*i+4];
1025     alpha6 = x[8*i+5];
1026     alpha7 = x[8*i+6];
1027     alpha8 = x[8*i+7];
1028     while (n-->0) {
1029       y[8*(*idx)]   += alpha1*(*v);
1030       y[8*(*idx)+1] += alpha2*(*v);
1031       y[8*(*idx)+2] += alpha3*(*v);
1032       y[8*(*idx)+3] += alpha4*(*v);
1033       y[8*(*idx)+4] += alpha5*(*v);
1034       y[8*(*idx)+5] += alpha6*(*v);
1035       y[8*(*idx)+6] += alpha7*(*v);
1036       y[8*(*idx)+7] += alpha8*(*v);
1037       idx++; v++;
1038     }
1039   }
1040   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1041   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1042   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1048 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1049 {
1050   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1051   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1052   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1053   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1054   int           n,i,jrow,j;
1055 
1056   PetscFunctionBegin;
1057   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1058   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1059   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1060   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1061   idx  = a->j;
1062   v    = a->a;
1063   ii   = a->i;
1064 
1065   v    += shift; /* shift for Fortran start by 1 indexing */
1066   idx  += shift;
1067   for (i=0; i<m; i++) {
1068     jrow = ii[i];
1069     n    = ii[i+1] - jrow;
1070     sum1  = 0.0;
1071     sum2  = 0.0;
1072     sum3  = 0.0;
1073     sum4  = 0.0;
1074     sum5  = 0.0;
1075     sum6  = 0.0;
1076     sum7  = 0.0;
1077     sum8  = 0.0;
1078     for (j=0; j<n; j++) {
1079       sum1 += v[jrow]*x[8*idx[jrow]];
1080       sum2 += v[jrow]*x[8*idx[jrow]+1];
1081       sum3 += v[jrow]*x[8*idx[jrow]+2];
1082       sum4 += v[jrow]*x[8*idx[jrow]+3];
1083       sum5 += v[jrow]*x[8*idx[jrow]+4];
1084       sum6 += v[jrow]*x[8*idx[jrow]+5];
1085       sum7 += v[jrow]*x[8*idx[jrow]+6];
1086       sum8 += v[jrow]*x[8*idx[jrow]+7];
1087       jrow++;
1088      }
1089     y[8*i]   += sum1;
1090     y[8*i+1] += sum2;
1091     y[8*i+2] += sum3;
1092     y[8*i+3] += sum4;
1093     y[8*i+4] += sum5;
1094     y[8*i+5] += sum6;
1095     y[8*i+6] += sum7;
1096     y[8*i+7] += sum8;
1097   }
1098 
1099   PetscLogFlops(16*a->nz);
1100   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1101   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1107 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1108 {
1109   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1110   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1111   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1112   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
1113 
1114   PetscFunctionBegin;
1115   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1116   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1117   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1118   y = y + shift; /* shift for Fortran start by 1 indexing */
1119   for (i=0; i<m; i++) {
1120     idx    = a->j + a->i[i] + shift;
1121     v      = a->a + a->i[i] + shift;
1122     n      = a->i[i+1] - a->i[i];
1123     alpha1 = x[8*i];
1124     alpha2 = x[8*i+1];
1125     alpha3 = x[8*i+2];
1126     alpha4 = x[8*i+3];
1127     alpha5 = x[8*i+4];
1128     alpha6 = x[8*i+5];
1129     alpha7 = x[8*i+6];
1130     alpha8 = x[8*i+7];
1131     while (n-->0) {
1132       y[8*(*idx)]   += alpha1*(*v);
1133       y[8*(*idx)+1] += alpha2*(*v);
1134       y[8*(*idx)+2] += alpha3*(*v);
1135       y[8*(*idx)+3] += alpha4*(*v);
1136       y[8*(*idx)+4] += alpha5*(*v);
1137       y[8*(*idx)+5] += alpha6*(*v);
1138       y[8*(*idx)+6] += alpha7*(*v);
1139       y[8*(*idx)+7] += alpha8*(*v);
1140       idx++; v++;
1141     }
1142   }
1143   PetscLogFlops(16*a->nz);
1144   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1145   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 /*===================================================================================*/
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1152 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1153 {
1154   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1155   int         ierr;
1156   PetscFunctionBegin;
1157 
1158   /* start the scatter */
1159   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1160   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1161   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1162   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1168 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1169 {
1170   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1171   int         ierr;
1172   PetscFunctionBegin;
1173   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1174   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1175   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1176   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1182 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1183 {
1184   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1185   int         ierr;
1186   PetscFunctionBegin;
1187 
1188   /* start the scatter */
1189   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1190   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1191   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1192   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1198 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1199 {
1200   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1201   int         ierr;
1202   PetscFunctionBegin;
1203   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1204   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1205   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1206   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 /* ---------------------------------------------------------------------------------- */
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatCreateMAIJ"
1213 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1214 {
1215   int         ierr,size,n;
1216   Mat_MPIMAIJ *b;
1217   Mat         B;
1218 
1219   PetscFunctionBegin;
1220   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1221 
1222   if (dof == 1) {
1223     *maij = A;
1224   } else {
1225     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1226     B->assembled    = PETSC_TRUE;
1227 
1228     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1229     if (size == 1) {
1230       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
1231       B->ops->destroy = MatDestroy_SeqMAIJ;
1232       b      = (Mat_MPIMAIJ*)B->data;
1233       b->dof = dof;
1234       b->AIJ = A;
1235       if (dof == 2) {
1236         B->ops->mult             = MatMult_SeqMAIJ_2;
1237         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1238         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1239         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1240       } else if (dof == 3) {
1241         B->ops->mult             = MatMult_SeqMAIJ_3;
1242         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1243         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1244         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1245       } else if (dof == 4) {
1246         B->ops->mult             = MatMult_SeqMAIJ_4;
1247         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1248         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1249         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1250       } else if (dof == 5) {
1251         B->ops->mult             = MatMult_SeqMAIJ_5;
1252         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1253         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1254         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1255       } else if (dof == 6) {
1256         B->ops->mult             = MatMult_SeqMAIJ_6;
1257         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1258         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1259         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1260       } else if (dof == 8) {
1261         B->ops->mult             = MatMult_SeqMAIJ_8;
1262         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1263         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1264         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1265       } else {
1266         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1267       }
1268     } else {
1269       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1270       IS         from,to;
1271       Vec        gvec;
1272       int        *garray,i;
1273 
1274       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1275       B->ops->destroy = MatDestroy_MPIMAIJ;
1276       b      = (Mat_MPIMAIJ*)B->data;
1277       b->dof = dof;
1278       b->A   = A;
1279       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
1280       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
1281 
1282       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1283       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1284 
1285       /* create two temporary Index sets for build scatter gather */
1286       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1287       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1288       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1289       ierr = PetscFree(garray);CHKERRQ(ierr);
1290       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1291 
1292       /* create temporary global vector to generate scatter context */
1293       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1294 
1295       /* generate the scatter context */
1296       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1297 
1298       ierr = ISDestroy(from);CHKERRQ(ierr);
1299       ierr = ISDestroy(to);CHKERRQ(ierr);
1300       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1301 
1302       B->ops->mult             = MatMult_MPIMAIJ_dof;
1303       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1304       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1305       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1306     }
1307     *maij = B;
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 
1313 
1314 
1315 
1316 
1317 
1318 
1319 
1320 
1321 
1322 
1323