xref: /petsc/src/mat/impls/maij/maij.c (revision 63b9fa5e258ef37d5bcff491ebacde33f30ed49b)
1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/maij/maij.h"
20 #include "src/vec/vecimpl.h"
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatMAIJGetAIJ"
24 int MatMAIJGetAIJ(Mat A,Mat *B)
25 {
26   int         ierr;
27   PetscTruth  ismpimaij,isseqmaij;
28 
29   PetscFunctionBegin;
30   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32   if (ismpimaij) {
33     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34 
35     *B = b->A;
36   } else if (isseqmaij) {
37     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38 
39     *B = b->AIJ;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatMAIJRedimension"
48 int MatMAIJRedimension(Mat A,int dof,Mat *B)
49 {
50   int ierr;
51   Mat Aij;
52 
53   PetscFunctionBegin;
54   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SeqMAIJ"
61 int MatDestroy_SeqMAIJ(Mat A)
62 {
63   int         ierr;
64   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
65 
66   PetscFunctionBegin;
67   if (b->AIJ) {
68     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
69   }
70   ierr = PetscFree(b);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatDestroy_MPIMAIJ"
76 int MatDestroy_MPIMAIJ(Mat A)
77 {
78   int         ierr;
79   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
80 
81   PetscFunctionBegin;
82   if (b->AIJ) {
83     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
84   }
85   if (b->OAIJ) {
86     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
87   }
88   if (b->A) {
89     ierr = MatDestroy(b->A);CHKERRQ(ierr);
90   }
91   if (b->ctx) {
92     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
93   }
94   if (b->w) {
95     ierr = VecDestroy(b->w);CHKERRQ(ierr);
96   }
97   ierr = PetscFree(b);CHKERRQ(ierr);
98   PetscFunctionReturn(0);
99 }
100 
101 EXTERN_C_BEGIN
102 #undef __FUNCT__
103 #define __FUNCT__ "MatCreate_MAIJ"
104 int MatCreate_MAIJ(Mat A)
105 {
106   int         ierr;
107   Mat_MPIMAIJ *b;
108 
109   PetscFunctionBegin;
110   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
111   A->data  = (void*)b;
112   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
113   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
114   A->factor           = 0;
115   A->mapping          = 0;
116 
117   b->AIJ  = 0;
118   b->dof  = 0;
119   b->OAIJ = 0;
120   b->ctx  = 0;
121   b->w    = 0;
122   PetscFunctionReturn(0);
123 }
124 EXTERN_C_END
125 
126 /* --------------------------------------------------------------------------------------*/
127 #undef __FUNCT__
128 #define __FUNCT__ "MatMult_SeqMAIJ_2"
129 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
130 {
131   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
132   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
133   PetscScalar   *x,*y,*v,sum1, sum2;
134   int           ierr,m = b->AIJ->m,*idx,*ii;
135   int           n,i,jrow,j;
136 
137   PetscFunctionBegin;
138   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
139   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
140   idx  = a->j;
141   v    = a->a;
142   ii   = a->i;
143 
144   for (i=0; i<m; i++) {
145     jrow = ii[i];
146     n    = ii[i+1] - jrow;
147     sum1  = 0.0;
148     sum2  = 0.0;
149     for (j=0; j<n; j++) {
150       sum1 += v[jrow]*x[2*idx[jrow]];
151       sum2 += v[jrow]*x[2*idx[jrow]+1];
152       jrow++;
153      }
154     y[2*i]   = sum1;
155     y[2*i+1] = sum2;
156   }
157 
158   PetscLogFlops(4*a->nz - 2*m);
159   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
160   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
161   PetscFunctionReturn(0);
162 }
163 
164 #undef __FUNCT__
165 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
166 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
167 {
168   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
169   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
170   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
171   int           ierr,m = b->AIJ->m,n,i,*idx;
172 
173   PetscFunctionBegin;
174   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
175   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
176   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
177 
178   for (i=0; i<m; i++) {
179     idx    = a->j + a->i[i] ;
180     v      = a->a + a->i[i] ;
181     n      = a->i[i+1] - a->i[i];
182     alpha1 = x[2*i];
183     alpha2 = x[2*i+1];
184     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
185   }
186   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
187   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
188   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
194 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
195 {
196   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
197   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
198   PetscScalar   *x,*y,*v,sum1, sum2;
199   int           ierr,m = b->AIJ->m,*idx,*ii;
200   int           n,i,jrow,j;
201 
202   PetscFunctionBegin;
203   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
204   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
205   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
206   idx  = a->j;
207   v    = a->a;
208   ii   = a->i;
209 
210   for (i=0; i<m; i++) {
211     jrow = ii[i];
212     n    = ii[i+1] - jrow;
213     sum1  = 0.0;
214     sum2  = 0.0;
215     for (j=0; j<n; j++) {
216       sum1 += v[jrow]*x[2*idx[jrow]];
217       sum2 += v[jrow]*x[2*idx[jrow]+1];
218       jrow++;
219      }
220     y[2*i]   += sum1;
221     y[2*i+1] += sum2;
222   }
223 
224   PetscLogFlops(4*a->nz - 2*m);
225   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
226   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 #undef __FUNCT__
230 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
231 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
232 {
233   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
234   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
235   PetscScalar   *x,*y,*v,alpha1,alpha2;
236   int           ierr,m = b->AIJ->m,n,i,*idx;
237 
238   PetscFunctionBegin;
239   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
240   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
241   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
242 
243   for (i=0; i<m; i++) {
244     idx   = a->j + a->i[i] ;
245     v     = a->a + a->i[i] ;
246     n     = a->i[i+1] - a->i[i];
247     alpha1 = x[2*i];
248     alpha2 = x[2*i+1];
249     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
250   }
251   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
252   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
253   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
254   PetscFunctionReturn(0);
255 }
256 /* --------------------------------------------------------------------------------------*/
257 #undef __FUNCT__
258 #define __FUNCT__ "MatMult_SeqMAIJ_3"
259 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
260 {
261   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
262   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
263   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
264   int           ierr,m = b->AIJ->m,*idx,*ii;
265   int           n,i,jrow,j;
266 
267   PetscFunctionBegin;
268   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
269   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
270   idx  = a->j;
271   v    = a->a;
272   ii   = a->i;
273 
274   for (i=0; i<m; i++) {
275     jrow = ii[i];
276     n    = ii[i+1] - jrow;
277     sum1  = 0.0;
278     sum2  = 0.0;
279     sum3  = 0.0;
280     for (j=0; j<n; j++) {
281       sum1 += v[jrow]*x[3*idx[jrow]];
282       sum2 += v[jrow]*x[3*idx[jrow]+1];
283       sum3 += v[jrow]*x[3*idx[jrow]+2];
284       jrow++;
285      }
286     y[3*i]   = sum1;
287     y[3*i+1] = sum2;
288     y[3*i+2] = sum3;
289   }
290 
291   PetscLogFlops(6*a->nz - 3*m);
292   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
293   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
299 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
300 {
301   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
302   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
303   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
304   int           ierr,m = b->AIJ->m,n,i,*idx;
305 
306   PetscFunctionBegin;
307   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
308   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
309   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
310 
311   for (i=0; i<m; i++) {
312     idx    = a->j + a->i[i];
313     v      = a->a + a->i[i];
314     n      = a->i[i+1] - a->i[i];
315     alpha1 = x[3*i];
316     alpha2 = x[3*i+1];
317     alpha3 = x[3*i+2];
318     while (n-->0) {
319       y[3*(*idx)]   += alpha1*(*v);
320       y[3*(*idx)+1] += alpha2*(*v);
321       y[3*(*idx)+2] += alpha3*(*v);
322       idx++; v++;
323     }
324   }
325   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
326   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
327   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
333 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
334 {
335   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
336   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
337   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
338   int           ierr,m = b->AIJ->m,*idx,*ii;
339   int           n,i,jrow,j;
340 
341   PetscFunctionBegin;
342   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
343   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
345   idx  = a->j;
346   v    = a->a;
347   ii   = a->i;
348 
349   for (i=0; i<m; i++) {
350     jrow = ii[i];
351     n    = ii[i+1] - jrow;
352     sum1  = 0.0;
353     sum2  = 0.0;
354     sum3  = 0.0;
355     for (j=0; j<n; j++) {
356       sum1 += v[jrow]*x[3*idx[jrow]];
357       sum2 += v[jrow]*x[3*idx[jrow]+1];
358       sum3 += v[jrow]*x[3*idx[jrow]+2];
359       jrow++;
360      }
361     y[3*i]   += sum1;
362     y[3*i+1] += sum2;
363     y[3*i+2] += sum3;
364   }
365 
366   PetscLogFlops(6*a->nz);
367   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
368   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #undef __FUNCT__
372 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
373 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
374 {
375   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
376   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
377   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
378   int           ierr,m = b->AIJ->m,n,i,*idx;
379 
380   PetscFunctionBegin;
381   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
382   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
383   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
384   for (i=0; i<m; i++) {
385     idx    = a->j + a->i[i] ;
386     v      = a->a + a->i[i] ;
387     n      = a->i[i+1] - a->i[i];
388     alpha1 = x[3*i];
389     alpha2 = x[3*i+1];
390     alpha3 = x[3*i+2];
391     while (n-->0) {
392       y[3*(*idx)]   += alpha1*(*v);
393       y[3*(*idx)+1] += alpha2*(*v);
394       y[3*(*idx)+2] += alpha3*(*v);
395       idx++; v++;
396     }
397   }
398   PetscLogFlops(6*a->nz);
399   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
400   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 /* ------------------------------------------------------------------------------*/
405 #undef __FUNCT__
406 #define __FUNCT__ "MatMult_SeqMAIJ_4"
407 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
408 {
409   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
410   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
411   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
412   int           ierr,m = b->AIJ->m,*idx,*ii;
413   int           n,i,jrow,j;
414 
415   PetscFunctionBegin;
416   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
417   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
418   idx  = a->j;
419   v    = a->a;
420   ii   = a->i;
421 
422   for (i=0; i<m; i++) {
423     jrow = ii[i];
424     n    = ii[i+1] - jrow;
425     sum1  = 0.0;
426     sum2  = 0.0;
427     sum3  = 0.0;
428     sum4  = 0.0;
429     for (j=0; j<n; j++) {
430       sum1 += v[jrow]*x[4*idx[jrow]];
431       sum2 += v[jrow]*x[4*idx[jrow]+1];
432       sum3 += v[jrow]*x[4*idx[jrow]+2];
433       sum4 += v[jrow]*x[4*idx[jrow]+3];
434       jrow++;
435      }
436     y[4*i]   = sum1;
437     y[4*i+1] = sum2;
438     y[4*i+2] = sum3;
439     y[4*i+3] = sum4;
440   }
441 
442   PetscLogFlops(8*a->nz - 4*m);
443   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
450 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
451 {
452   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
453   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
454   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
455   int           ierr,m = b->AIJ->m,n,i,*idx;
456 
457   PetscFunctionBegin;
458   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
459   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
460   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
461   for (i=0; i<m; i++) {
462     idx    = a->j + a->i[i] ;
463     v      = a->a + a->i[i] ;
464     n      = a->i[i+1] - a->i[i];
465     alpha1 = x[4*i];
466     alpha2 = x[4*i+1];
467     alpha3 = x[4*i+2];
468     alpha4 = x[4*i+3];
469     while (n-->0) {
470       y[4*(*idx)]   += alpha1*(*v);
471       y[4*(*idx)+1] += alpha2*(*v);
472       y[4*(*idx)+2] += alpha3*(*v);
473       y[4*(*idx)+3] += alpha4*(*v);
474       idx++; v++;
475     }
476   }
477   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
478   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
479   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
485 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
488   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
489   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
490   int           ierr,m = b->AIJ->m,*idx,*ii;
491   int           n,i,jrow,j;
492 
493   PetscFunctionBegin;
494   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
495   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
496   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
497   idx  = a->j;
498   v    = a->a;
499   ii   = a->i;
500 
501   for (i=0; i<m; i++) {
502     jrow = ii[i];
503     n    = ii[i+1] - jrow;
504     sum1  = 0.0;
505     sum2  = 0.0;
506     sum3  = 0.0;
507     sum4  = 0.0;
508     for (j=0; j<n; j++) {
509       sum1 += v[jrow]*x[4*idx[jrow]];
510       sum2 += v[jrow]*x[4*idx[jrow]+1];
511       sum3 += v[jrow]*x[4*idx[jrow]+2];
512       sum4 += v[jrow]*x[4*idx[jrow]+3];
513       jrow++;
514      }
515     y[4*i]   += sum1;
516     y[4*i+1] += sum2;
517     y[4*i+2] += sum3;
518     y[4*i+3] += sum4;
519   }
520 
521   PetscLogFlops(8*a->nz - 4*m);
522   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
523   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 #undef __FUNCT__
527 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
528 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
529 {
530   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
531   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
532   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
533   int           ierr,m = b->AIJ->m,n,i,*idx;
534 
535   PetscFunctionBegin;
536   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
537   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
538   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
539 
540   for (i=0; i<m; i++) {
541     idx    = a->j + a->i[i] ;
542     v      = a->a + a->i[i] ;
543     n      = a->i[i+1] - a->i[i];
544     alpha1 = x[4*i];
545     alpha2 = x[4*i+1];
546     alpha3 = x[4*i+2];
547     alpha4 = x[4*i+3];
548     while (n-->0) {
549       y[4*(*idx)]   += alpha1*(*v);
550       y[4*(*idx)+1] += alpha2*(*v);
551       y[4*(*idx)+2] += alpha3*(*v);
552       y[4*(*idx)+3] += alpha4*(*v);
553       idx++; v++;
554     }
555   }
556   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
557   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
558   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
559   PetscFunctionReturn(0);
560 }
561 /* ------------------------------------------------------------------------------*/
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatMult_SeqMAIJ_5"
565 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
566 {
567   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
568   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
569   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
570   int           ierr,m = b->AIJ->m,*idx,*ii;
571   int           n,i,jrow,j;
572 
573   PetscFunctionBegin;
574   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
575   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
576   idx  = a->j;
577   v    = a->a;
578   ii   = a->i;
579 
580   for (i=0; i<m; i++) {
581     jrow = ii[i];
582     n    = ii[i+1] - jrow;
583     sum1  = 0.0;
584     sum2  = 0.0;
585     sum3  = 0.0;
586     sum4  = 0.0;
587     sum5  = 0.0;
588     for (j=0; j<n; j++) {
589       sum1 += v[jrow]*x[5*idx[jrow]];
590       sum2 += v[jrow]*x[5*idx[jrow]+1];
591       sum3 += v[jrow]*x[5*idx[jrow]+2];
592       sum4 += v[jrow]*x[5*idx[jrow]+3];
593       sum5 += v[jrow]*x[5*idx[jrow]+4];
594       jrow++;
595      }
596     y[5*i]   = sum1;
597     y[5*i+1] = sum2;
598     y[5*i+2] = sum3;
599     y[5*i+3] = sum4;
600     y[5*i+4] = sum5;
601   }
602 
603   PetscLogFlops(10*a->nz - 5*m);
604   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
605   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNCT__
610 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
611 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
612 {
613   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
614   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
615   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
616   int           ierr,m = b->AIJ->m,n,i,*idx;
617 
618   PetscFunctionBegin;
619   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
620   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
621   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
622 
623   for (i=0; i<m; i++) {
624     idx    = a->j + a->i[i] ;
625     v      = a->a + a->i[i] ;
626     n      = a->i[i+1] - a->i[i];
627     alpha1 = x[5*i];
628     alpha2 = x[5*i+1];
629     alpha3 = x[5*i+2];
630     alpha4 = x[5*i+3];
631     alpha5 = x[5*i+4];
632     while (n-->0) {
633       y[5*(*idx)]   += alpha1*(*v);
634       y[5*(*idx)+1] += alpha2*(*v);
635       y[5*(*idx)+2] += alpha3*(*v);
636       y[5*(*idx)+3] += alpha4*(*v);
637       y[5*(*idx)+4] += alpha5*(*v);
638       idx++; v++;
639     }
640   }
641   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
642   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
643   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNCT__
648 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
649 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
650 {
651   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
652   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
653   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
654   int           ierr,m = b->AIJ->m,*idx,*ii;
655   int           n,i,jrow,j;
656 
657   PetscFunctionBegin;
658   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
659   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
660   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
661   idx  = a->j;
662   v    = a->a;
663   ii   = a->i;
664 
665   for (i=0; i<m; i++) {
666     jrow = ii[i];
667     n    = ii[i+1] - jrow;
668     sum1  = 0.0;
669     sum2  = 0.0;
670     sum3  = 0.0;
671     sum4  = 0.0;
672     sum5  = 0.0;
673     for (j=0; j<n; j++) {
674       sum1 += v[jrow]*x[5*idx[jrow]];
675       sum2 += v[jrow]*x[5*idx[jrow]+1];
676       sum3 += v[jrow]*x[5*idx[jrow]+2];
677       sum4 += v[jrow]*x[5*idx[jrow]+3];
678       sum5 += v[jrow]*x[5*idx[jrow]+4];
679       jrow++;
680      }
681     y[5*i]   += sum1;
682     y[5*i+1] += sum2;
683     y[5*i+2] += sum3;
684     y[5*i+3] += sum4;
685     y[5*i+4] += sum5;
686   }
687 
688   PetscLogFlops(10*a->nz);
689   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
690   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
696 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
697 {
698   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
699   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
700   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
701   int           ierr,m = b->AIJ->m,n,i,*idx;
702 
703   PetscFunctionBegin;
704   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
705   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
706   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
707 
708   for (i=0; i<m; i++) {
709     idx    = a->j + a->i[i] ;
710     v      = a->a + a->i[i] ;
711     n      = a->i[i+1] - a->i[i];
712     alpha1 = x[5*i];
713     alpha2 = x[5*i+1];
714     alpha3 = x[5*i+2];
715     alpha4 = x[5*i+3];
716     alpha5 = x[5*i+4];
717     while (n-->0) {
718       y[5*(*idx)]   += alpha1*(*v);
719       y[5*(*idx)+1] += alpha2*(*v);
720       y[5*(*idx)+2] += alpha3*(*v);
721       y[5*(*idx)+3] += alpha4*(*v);
722       y[5*(*idx)+4] += alpha5*(*v);
723       idx++; v++;
724     }
725   }
726   PetscLogFlops(10*a->nz);
727   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
728   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 /* ------------------------------------------------------------------------------*/
733 #undef __FUNCT__
734 #define __FUNCT__ "MatMult_SeqMAIJ_6"
735 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
736 {
737   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
738   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
739   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
740   int           ierr,m = b->AIJ->m,*idx,*ii;
741   int           n,i,jrow,j;
742 
743   PetscFunctionBegin;
744   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
745   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
746   idx  = a->j;
747   v    = a->a;
748   ii   = a->i;
749 
750   for (i=0; i<m; i++) {
751     jrow = ii[i];
752     n    = ii[i+1] - jrow;
753     sum1  = 0.0;
754     sum2  = 0.0;
755     sum3  = 0.0;
756     sum4  = 0.0;
757     sum5  = 0.0;
758     sum6  = 0.0;
759     for (j=0; j<n; j++) {
760       sum1 += v[jrow]*x[6*idx[jrow]];
761       sum2 += v[jrow]*x[6*idx[jrow]+1];
762       sum3 += v[jrow]*x[6*idx[jrow]+2];
763       sum4 += v[jrow]*x[6*idx[jrow]+3];
764       sum5 += v[jrow]*x[6*idx[jrow]+4];
765       sum6 += v[jrow]*x[6*idx[jrow]+5];
766       jrow++;
767      }
768     y[6*i]   = sum1;
769     y[6*i+1] = sum2;
770     y[6*i+2] = sum3;
771     y[6*i+3] = sum4;
772     y[6*i+4] = sum5;
773     y[6*i+5] = sum6;
774   }
775 
776   PetscLogFlops(12*a->nz - 6*m);
777   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
778   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
784 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
785 {
786   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
787   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
788   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
789   int           ierr,m = b->AIJ->m,n,i,*idx;
790 
791   PetscFunctionBegin;
792   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
793   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
794   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
795 
796   for (i=0; i<m; i++) {
797     idx    = a->j + a->i[i] ;
798     v      = a->a + a->i[i] ;
799     n      = a->i[i+1] - a->i[i];
800     alpha1 = x[6*i];
801     alpha2 = x[6*i+1];
802     alpha3 = x[6*i+2];
803     alpha4 = x[6*i+3];
804     alpha5 = x[6*i+4];
805     alpha6 = x[6*i+5];
806     while (n-->0) {
807       y[6*(*idx)]   += alpha1*(*v);
808       y[6*(*idx)+1] += alpha2*(*v);
809       y[6*(*idx)+2] += alpha3*(*v);
810       y[6*(*idx)+3] += alpha4*(*v);
811       y[6*(*idx)+4] += alpha5*(*v);
812       y[6*(*idx)+5] += alpha6*(*v);
813       idx++; v++;
814     }
815   }
816   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
817   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
818   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
824 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
827   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
828   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
829   int           ierr,m = b->AIJ->m,*idx,*ii;
830   int           n,i,jrow,j;
831 
832   PetscFunctionBegin;
833   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
834   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
835   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
836   idx  = a->j;
837   v    = a->a;
838   ii   = a->i;
839 
840   for (i=0; i<m; i++) {
841     jrow = ii[i];
842     n    = ii[i+1] - jrow;
843     sum1  = 0.0;
844     sum2  = 0.0;
845     sum3  = 0.0;
846     sum4  = 0.0;
847     sum5  = 0.0;
848     sum6  = 0.0;
849     for (j=0; j<n; j++) {
850       sum1 += v[jrow]*x[6*idx[jrow]];
851       sum2 += v[jrow]*x[6*idx[jrow]+1];
852       sum3 += v[jrow]*x[6*idx[jrow]+2];
853       sum4 += v[jrow]*x[6*idx[jrow]+3];
854       sum5 += v[jrow]*x[6*idx[jrow]+4];
855       sum6 += v[jrow]*x[6*idx[jrow]+5];
856       jrow++;
857      }
858     y[6*i]   += sum1;
859     y[6*i+1] += sum2;
860     y[6*i+2] += sum3;
861     y[6*i+3] += sum4;
862     y[6*i+4] += sum5;
863     y[6*i+5] += sum6;
864   }
865 
866   PetscLogFlops(12*a->nz);
867   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
868   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
874 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
875 {
876   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
877   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
878   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
879   int           ierr,m = b->AIJ->m,n,i,*idx;
880 
881   PetscFunctionBegin;
882   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
883   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
884   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
885 
886   for (i=0; i<m; i++) {
887     idx    = a->j + a->i[i] ;
888     v      = a->a + a->i[i] ;
889     n      = a->i[i+1] - a->i[i];
890     alpha1 = x[6*i];
891     alpha2 = x[6*i+1];
892     alpha3 = x[6*i+2];
893     alpha4 = x[6*i+3];
894     alpha5 = x[6*i+4];
895     alpha6 = x[6*i+5];
896     while (n-->0) {
897       y[6*(*idx)]   += alpha1*(*v);
898       y[6*(*idx)+1] += alpha2*(*v);
899       y[6*(*idx)+2] += alpha3*(*v);
900       y[6*(*idx)+3] += alpha4*(*v);
901       y[6*(*idx)+4] += alpha5*(*v);
902       y[6*(*idx)+5] += alpha6*(*v);
903       idx++; v++;
904     }
905   }
906   PetscLogFlops(12*a->nz);
907   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
908   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 /* ------------------------------------------------------------------------------*/
913 #undef __FUNCT__
914 #define __FUNCT__ "MatMult_SeqMAIJ_8"
915 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
916 {
917   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
918   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
919   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
920   int           ierr,m = b->AIJ->m,*idx,*ii;
921   int           n,i,jrow,j;
922 
923   PetscFunctionBegin;
924   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
925   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
926   idx  = a->j;
927   v    = a->a;
928   ii   = a->i;
929 
930   for (i=0; i<m; i++) {
931     jrow = ii[i];
932     n    = ii[i+1] - jrow;
933     sum1  = 0.0;
934     sum2  = 0.0;
935     sum3  = 0.0;
936     sum4  = 0.0;
937     sum5  = 0.0;
938     sum6  = 0.0;
939     sum7  = 0.0;
940     sum8  = 0.0;
941     for (j=0; j<n; j++) {
942       sum1 += v[jrow]*x[8*idx[jrow]];
943       sum2 += v[jrow]*x[8*idx[jrow]+1];
944       sum3 += v[jrow]*x[8*idx[jrow]+2];
945       sum4 += v[jrow]*x[8*idx[jrow]+3];
946       sum5 += v[jrow]*x[8*idx[jrow]+4];
947       sum6 += v[jrow]*x[8*idx[jrow]+5];
948       sum7 += v[jrow]*x[8*idx[jrow]+6];
949       sum8 += v[jrow]*x[8*idx[jrow]+7];
950       jrow++;
951      }
952     y[8*i]   = sum1;
953     y[8*i+1] = sum2;
954     y[8*i+2] = sum3;
955     y[8*i+3] = sum4;
956     y[8*i+4] = sum5;
957     y[8*i+5] = sum6;
958     y[8*i+6] = sum7;
959     y[8*i+7] = sum8;
960   }
961 
962   PetscLogFlops(16*a->nz - 8*m);
963   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
964   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
970 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
971 {
972   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
973   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
974   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
975   int           ierr,m = b->AIJ->m,n,i,*idx;
976 
977   PetscFunctionBegin;
978   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
979   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
980   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
981 
982   for (i=0; i<m; i++) {
983     idx    = a->j + a->i[i] ;
984     v      = a->a + a->i[i] ;
985     n      = a->i[i+1] - a->i[i];
986     alpha1 = x[8*i];
987     alpha2 = x[8*i+1];
988     alpha3 = x[8*i+2];
989     alpha4 = x[8*i+3];
990     alpha5 = x[8*i+4];
991     alpha6 = x[8*i+5];
992     alpha7 = x[8*i+6];
993     alpha8 = x[8*i+7];
994     while (n-->0) {
995       y[8*(*idx)]   += alpha1*(*v);
996       y[8*(*idx)+1] += alpha2*(*v);
997       y[8*(*idx)+2] += alpha3*(*v);
998       y[8*(*idx)+3] += alpha4*(*v);
999       y[8*(*idx)+4] += alpha5*(*v);
1000       y[8*(*idx)+5] += alpha6*(*v);
1001       y[8*(*idx)+6] += alpha7*(*v);
1002       y[8*(*idx)+7] += alpha8*(*v);
1003       idx++; v++;
1004     }
1005   }
1006   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1007   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1008   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1014 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1017   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1018   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1019   int           ierr,m = b->AIJ->m,*idx,*ii;
1020   int           n,i,jrow,j;
1021 
1022   PetscFunctionBegin;
1023   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1024   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1025   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1026   idx  = a->j;
1027   v    = a->a;
1028   ii   = a->i;
1029 
1030   for (i=0; i<m; i++) {
1031     jrow = ii[i];
1032     n    = ii[i+1] - jrow;
1033     sum1  = 0.0;
1034     sum2  = 0.0;
1035     sum3  = 0.0;
1036     sum4  = 0.0;
1037     sum5  = 0.0;
1038     sum6  = 0.0;
1039     sum7  = 0.0;
1040     sum8  = 0.0;
1041     for (j=0; j<n; j++) {
1042       sum1 += v[jrow]*x[8*idx[jrow]];
1043       sum2 += v[jrow]*x[8*idx[jrow]+1];
1044       sum3 += v[jrow]*x[8*idx[jrow]+2];
1045       sum4 += v[jrow]*x[8*idx[jrow]+3];
1046       sum5 += v[jrow]*x[8*idx[jrow]+4];
1047       sum6 += v[jrow]*x[8*idx[jrow]+5];
1048       sum7 += v[jrow]*x[8*idx[jrow]+6];
1049       sum8 += v[jrow]*x[8*idx[jrow]+7];
1050       jrow++;
1051      }
1052     y[8*i]   += sum1;
1053     y[8*i+1] += sum2;
1054     y[8*i+2] += sum3;
1055     y[8*i+3] += sum4;
1056     y[8*i+4] += sum5;
1057     y[8*i+5] += sum6;
1058     y[8*i+6] += sum7;
1059     y[8*i+7] += sum8;
1060   }
1061 
1062   PetscLogFlops(16*a->nz);
1063   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1064   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1070 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1071 {
1072   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1073   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1074   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1075   int           ierr,m = b->AIJ->m,n,i,*idx;
1076 
1077   PetscFunctionBegin;
1078   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1079   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1080   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1081   for (i=0; i<m; i++) {
1082     idx    = a->j + a->i[i] ;
1083     v      = a->a + a->i[i] ;
1084     n      = a->i[i+1] - a->i[i];
1085     alpha1 = x[8*i];
1086     alpha2 = x[8*i+1];
1087     alpha3 = x[8*i+2];
1088     alpha4 = x[8*i+3];
1089     alpha5 = x[8*i+4];
1090     alpha6 = x[8*i+5];
1091     alpha7 = x[8*i+6];
1092     alpha8 = x[8*i+7];
1093     while (n-->0) {
1094       y[8*(*idx)]   += alpha1*(*v);
1095       y[8*(*idx)+1] += alpha2*(*v);
1096       y[8*(*idx)+2] += alpha3*(*v);
1097       y[8*(*idx)+3] += alpha4*(*v);
1098       y[8*(*idx)+4] += alpha5*(*v);
1099       y[8*(*idx)+5] += alpha6*(*v);
1100       y[8*(*idx)+6] += alpha7*(*v);
1101       y[8*(*idx)+7] += alpha8*(*v);
1102       idx++; v++;
1103     }
1104   }
1105   PetscLogFlops(16*a->nz);
1106   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1107   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 /*--------------------------------------------------------------------------------------------*/
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1114 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1115 {
1116   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1117   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1118   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1119   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1120   int           ierr,m = b->AIJ->m,*idx,*ii;
1121   int           n,i,jrow,j;
1122 
1123   PetscFunctionBegin;
1124   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1125   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1126   idx  = a->j;
1127   v    = a->a;
1128   ii   = a->i;
1129 
1130   for (i=0; i<m; i++) {
1131     jrow = ii[i];
1132     n    = ii[i+1] - jrow;
1133     sum1  = 0.0;
1134     sum2  = 0.0;
1135     sum3  = 0.0;
1136     sum4  = 0.0;
1137     sum5  = 0.0;
1138     sum6  = 0.0;
1139     sum7  = 0.0;
1140     sum8  = 0.0;
1141     sum9  = 0.0;
1142     sum10 = 0.0;
1143     sum11 = 0.0;
1144     sum12 = 0.0;
1145     sum13 = 0.0;
1146     sum14 = 0.0;
1147     sum15 = 0.0;
1148     sum16 = 0.0;
1149     for (j=0; j<n; j++) {
1150       sum1  += v[jrow]*x[16*idx[jrow]];
1151       sum2  += v[jrow]*x[16*idx[jrow]+1];
1152       sum3  += v[jrow]*x[16*idx[jrow]+2];
1153       sum4  += v[jrow]*x[16*idx[jrow]+3];
1154       sum5  += v[jrow]*x[16*idx[jrow]+4];
1155       sum6  += v[jrow]*x[16*idx[jrow]+5];
1156       sum7  += v[jrow]*x[16*idx[jrow]+6];
1157       sum8  += v[jrow]*x[16*idx[jrow]+7];
1158       sum9  += v[jrow]*x[16*idx[jrow]+8];
1159       sum10 += v[jrow]*x[16*idx[jrow]+9];
1160       sum11 += v[jrow]*x[16*idx[jrow]+10];
1161       sum12 += v[jrow]*x[16*idx[jrow]+11];
1162       sum13 += v[jrow]*x[16*idx[jrow]+12];
1163       sum14 += v[jrow]*x[16*idx[jrow]+13];
1164       sum15 += v[jrow]*x[16*idx[jrow]+14];
1165       sum16 += v[jrow]*x[16*idx[jrow]+15];
1166       jrow++;
1167      }
1168     y[16*i]    = sum1;
1169     y[16*i+1]  = sum2;
1170     y[16*i+2]  = sum3;
1171     y[16*i+3]  = sum4;
1172     y[16*i+4]  = sum5;
1173     y[16*i+5]  = sum6;
1174     y[16*i+6]  = sum7;
1175     y[16*i+7]  = sum8;
1176     y[16*i+8]  = sum9;
1177     y[16*i+9]  = sum10;
1178     y[16*i+10] = sum11;
1179     y[16*i+11] = sum12;
1180     y[16*i+12] = sum13;
1181     y[16*i+13] = sum14;
1182     y[16*i+14] = sum15;
1183     y[16*i+15] = sum16;
1184   }
1185 
1186   PetscLogFlops(32*a->nz - 16*m);
1187   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1188   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1194 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1195 {
1196   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1197   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1198   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1199   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1200   int           ierr,m = b->AIJ->m,n,i,*idx;
1201 
1202   PetscFunctionBegin;
1203   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1204   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1205   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1206 
1207   for (i=0; i<m; i++) {
1208     idx    = a->j + a->i[i] ;
1209     v      = a->a + a->i[i] ;
1210     n      = a->i[i+1] - a->i[i];
1211     alpha1  = x[16*i];
1212     alpha2  = x[16*i+1];
1213     alpha3  = x[16*i+2];
1214     alpha4  = x[16*i+3];
1215     alpha5  = x[16*i+4];
1216     alpha6  = x[16*i+5];
1217     alpha7  = x[16*i+6];
1218     alpha8  = x[16*i+7];
1219     alpha9  = x[16*i+8];
1220     alpha10 = x[16*i+9];
1221     alpha11 = x[16*i+10];
1222     alpha12 = x[16*i+11];
1223     alpha13 = x[16*i+12];
1224     alpha14 = x[16*i+13];
1225     alpha15 = x[16*i+14];
1226     alpha16 = x[16*i+15];
1227     while (n-->0) {
1228       y[16*(*idx)]    += alpha1*(*v);
1229       y[16*(*idx)+1]  += alpha2*(*v);
1230       y[16*(*idx)+2]  += alpha3*(*v);
1231       y[16*(*idx)+3]  += alpha4*(*v);
1232       y[16*(*idx)+4]  += alpha5*(*v);
1233       y[16*(*idx)+5]  += alpha6*(*v);
1234       y[16*(*idx)+6]  += alpha7*(*v);
1235       y[16*(*idx)+7]  += alpha8*(*v);
1236       y[16*(*idx)+8]  += alpha9*(*v);
1237       y[16*(*idx)+9]  += alpha10*(*v);
1238       y[16*(*idx)+10] += alpha11*(*v);
1239       y[16*(*idx)+11] += alpha12*(*v);
1240       y[16*(*idx)+12] += alpha13*(*v);
1241       y[16*(*idx)+13] += alpha14*(*v);
1242       y[16*(*idx)+14] += alpha15*(*v);
1243       y[16*(*idx)+15] += alpha16*(*v);
1244       idx++; v++;
1245     }
1246   }
1247   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1248   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1249   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1255 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1256 {
1257   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1258   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1259   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1260   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1261   int           ierr,m = b->AIJ->m,*idx,*ii;
1262   int           n,i,jrow,j;
1263 
1264   PetscFunctionBegin;
1265   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1266   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1267   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1268   idx  = a->j;
1269   v    = a->a;
1270   ii   = a->i;
1271 
1272   for (i=0; i<m; i++) {
1273     jrow = ii[i];
1274     n    = ii[i+1] - jrow;
1275     sum1  = 0.0;
1276     sum2  = 0.0;
1277     sum3  = 0.0;
1278     sum4  = 0.0;
1279     sum5  = 0.0;
1280     sum6  = 0.0;
1281     sum7  = 0.0;
1282     sum8  = 0.0;
1283     sum9  = 0.0;
1284     sum10 = 0.0;
1285     sum11 = 0.0;
1286     sum12 = 0.0;
1287     sum13 = 0.0;
1288     sum14 = 0.0;
1289     sum15 = 0.0;
1290     sum16 = 0.0;
1291     for (j=0; j<n; j++) {
1292       sum1  += v[jrow]*x[16*idx[jrow]];
1293       sum2  += v[jrow]*x[16*idx[jrow]+1];
1294       sum3  += v[jrow]*x[16*idx[jrow]+2];
1295       sum4  += v[jrow]*x[16*idx[jrow]+3];
1296       sum5  += v[jrow]*x[16*idx[jrow]+4];
1297       sum6  += v[jrow]*x[16*idx[jrow]+5];
1298       sum7  += v[jrow]*x[16*idx[jrow]+6];
1299       sum8  += v[jrow]*x[16*idx[jrow]+7];
1300       sum9  += v[jrow]*x[16*idx[jrow]+8];
1301       sum10 += v[jrow]*x[16*idx[jrow]+9];
1302       sum11 += v[jrow]*x[16*idx[jrow]+10];
1303       sum12 += v[jrow]*x[16*idx[jrow]+11];
1304       sum13 += v[jrow]*x[16*idx[jrow]+12];
1305       sum14 += v[jrow]*x[16*idx[jrow]+13];
1306       sum15 += v[jrow]*x[16*idx[jrow]+14];
1307       sum16 += v[jrow]*x[16*idx[jrow]+15];
1308       jrow++;
1309      }
1310     y[16*i]    += sum1;
1311     y[16*i+1]  += sum2;
1312     y[16*i+2]  += sum3;
1313     y[16*i+3]  += sum4;
1314     y[16*i+4]  += sum5;
1315     y[16*i+5]  += sum6;
1316     y[16*i+6]  += sum7;
1317     y[16*i+7]  += sum8;
1318     y[16*i+8]  += sum9;
1319     y[16*i+9]  += sum10;
1320     y[16*i+10] += sum11;
1321     y[16*i+11] += sum12;
1322     y[16*i+12] += sum13;
1323     y[16*i+13] += sum14;
1324     y[16*i+14] += sum15;
1325     y[16*i+15] += sum16;
1326   }
1327 
1328   PetscLogFlops(32*a->nz);
1329   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1330   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1336 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1337 {
1338   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1339   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1340   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1341   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1342   int           ierr,m = b->AIJ->m,n,i,*idx;
1343 
1344   PetscFunctionBegin;
1345   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1346   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1347   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1348   for (i=0; i<m; i++) {
1349     idx    = a->j + a->i[i] ;
1350     v      = a->a + a->i[i] ;
1351     n      = a->i[i+1] - a->i[i];
1352     alpha1 = x[16*i];
1353     alpha2 = x[16*i+1];
1354     alpha3 = x[16*i+2];
1355     alpha4 = x[16*i+3];
1356     alpha5 = x[16*i+4];
1357     alpha6 = x[16*i+5];
1358     alpha7 = x[16*i+6];
1359     alpha8 = x[16*i+7];
1360     alpha9  = x[16*i+8];
1361     alpha10 = x[16*i+9];
1362     alpha11 = x[16*i+10];
1363     alpha12 = x[16*i+11];
1364     alpha13 = x[16*i+12];
1365     alpha14 = x[16*i+13];
1366     alpha15 = x[16*i+14];
1367     alpha16 = x[16*i+15];
1368     while (n-->0) {
1369       y[16*(*idx)]   += alpha1*(*v);
1370       y[16*(*idx)+1] += alpha2*(*v);
1371       y[16*(*idx)+2] += alpha3*(*v);
1372       y[16*(*idx)+3] += alpha4*(*v);
1373       y[16*(*idx)+4] += alpha5*(*v);
1374       y[16*(*idx)+5] += alpha6*(*v);
1375       y[16*(*idx)+6] += alpha7*(*v);
1376       y[16*(*idx)+7] += alpha8*(*v);
1377       y[16*(*idx)+8]  += alpha9*(*v);
1378       y[16*(*idx)+9]  += alpha10*(*v);
1379       y[16*(*idx)+10] += alpha11*(*v);
1380       y[16*(*idx)+11] += alpha12*(*v);
1381       y[16*(*idx)+12] += alpha13*(*v);
1382       y[16*(*idx)+13] += alpha14*(*v);
1383       y[16*(*idx)+14] += alpha15*(*v);
1384       y[16*(*idx)+15] += alpha16*(*v);
1385       idx++; v++;
1386     }
1387   }
1388   PetscLogFlops(32*a->nz);
1389   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1390   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 /*===================================================================================*/
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1397 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1398 {
1399   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1400   int         ierr;
1401   PetscFunctionBegin;
1402 
1403   /* start the scatter */
1404   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1405   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1406   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1407   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1413 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1414 {
1415   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1416   int         ierr;
1417   PetscFunctionBegin;
1418   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1419   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1420   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1421   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1427 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1428 {
1429   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1430   int         ierr;
1431   PetscFunctionBegin;
1432 
1433   /* start the scatter */
1434   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1435   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1436   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1437   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1443 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1444 {
1445   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1446   int         ierr;
1447   PetscFunctionBegin;
1448   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1449   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1450   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1451   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 /* ---------------------------------------------------------------------------------- */
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatCreateMAIJ"
1458 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1459 {
1460   int         ierr,size,n;
1461   Mat_MPIMAIJ *b;
1462   Mat         B;
1463 
1464   PetscFunctionBegin;
1465   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1466 
1467   if (dof == 1) {
1468     *maij = A;
1469   } else {
1470     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1471     B->assembled    = PETSC_TRUE;
1472 
1473     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1474     if (size == 1) {
1475       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
1476       B->ops->destroy = MatDestroy_SeqMAIJ;
1477       b      = (Mat_MPIMAIJ*)B->data;
1478       b->dof = dof;
1479       b->AIJ = A;
1480       if (dof == 2) {
1481         B->ops->mult             = MatMult_SeqMAIJ_2;
1482         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1483         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1484         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1485       } else if (dof == 3) {
1486         B->ops->mult             = MatMult_SeqMAIJ_3;
1487         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1488         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1489         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1490       } else if (dof == 4) {
1491         B->ops->mult             = MatMult_SeqMAIJ_4;
1492         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1493         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1494         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1495       } else if (dof == 5) {
1496         B->ops->mult             = MatMult_SeqMAIJ_5;
1497         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1498         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1499         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1500       } else if (dof == 6) {
1501         B->ops->mult             = MatMult_SeqMAIJ_6;
1502         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1503         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1504         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1505       } else if (dof == 8) {
1506         B->ops->mult             = MatMult_SeqMAIJ_8;
1507         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1508         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1509         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1510       } else if (dof == 16) {
1511         B->ops->mult             = MatMult_SeqMAIJ_16;
1512         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1513         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1514         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1515       } else {
1516         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1517       }
1518     } else {
1519       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1520       IS         from,to;
1521       Vec        gvec;
1522       int        *garray,i;
1523 
1524       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1525       B->ops->destroy = MatDestroy_MPIMAIJ;
1526       b      = (Mat_MPIMAIJ*)B->data;
1527       b->dof = dof;
1528       b->A   = A;
1529       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
1530       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
1531 
1532       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1533       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1534 
1535       /* create two temporary Index sets for build scatter gather */
1536       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1537       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1538       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1539       ierr = PetscFree(garray);CHKERRQ(ierr);
1540       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1541 
1542       /* create temporary global vector to generate scatter context */
1543       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1544 
1545       /* generate the scatter context */
1546       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1547 
1548       ierr = ISDestroy(from);CHKERRQ(ierr);
1549       ierr = ISDestroy(to);CHKERRQ(ierr);
1550       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1551 
1552       B->ops->mult             = MatMult_MPIMAIJ_dof;
1553       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1554       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1555       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1556     }
1557     *maij = B;
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 
1563 
1564 
1565 
1566 
1567 
1568 
1569 
1570 
1571 
1572 
1573