xref: /petsc/src/mat/impls/maij/maij.c (revision a2ec6df8280ae283e8b0f45d20ebe04f5d3dfc7b)
1 /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include "src/mat/impls/maij/maij.h"
20 #include "src/vec/vecimpl.h"
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatMAIJGetAIJ"
24 int MatMAIJGetAIJ(Mat A,Mat *B)
25 {
26   int         ierr;
27   PetscTruth  ismpimaij,isseqmaij;
28 
29   PetscFunctionBegin;
30   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
31   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
32   if (ismpimaij) {
33     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
34 
35     *B = b->A;
36   } else if (isseqmaij) {
37     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
38 
39     *B = b->AIJ;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatMAIJRedimension"
48 int MatMAIJRedimension(Mat A,int dof,Mat *B)
49 {
50   int ierr;
51   Mat Aij;
52 
53   PetscFunctionBegin;
54   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
55   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SeqMAIJ"
61 int MatDestroy_SeqMAIJ(Mat A)
62 {
63   int         ierr;
64   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
65 
66   PetscFunctionBegin;
67   if (b->AIJ) {
68     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
69   }
70   ierr = PetscFree(b);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatDestroy_MPIMAIJ"
76 int MatDestroy_MPIMAIJ(Mat A)
77 {
78   int         ierr;
79   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
80 
81   PetscFunctionBegin;
82   if (b->AIJ) {
83     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
84   }
85   if (b->OAIJ) {
86     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
87   }
88   if (b->A) {
89     ierr = MatDestroy(b->A);CHKERRQ(ierr);
90   }
91   if (b->ctx) {
92     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
93   }
94   if (b->w) {
95     ierr = VecDestroy(b->w);CHKERRQ(ierr);
96   }
97   ierr = PetscFree(b);CHKERRQ(ierr);
98   PetscFunctionReturn(0);
99 }
100 
101 /*MC
102   MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
103   multicomponent problems, interpolating or restricting each component the same way independently.
104   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
105 
106   Operations provided:
107 . MatMult
108 . MatMultTranspose
109 . MatMultAdd
110 . MatMultTransposeAdd
111 
112   Level: advanced
113 
114 .seealso: MatCreateSeqDense
115 M*/
116 
117 EXTERN_C_BEGIN
118 #undef __FUNCT__
119 #define __FUNCT__ "MatCreate_MAIJ"
120 int MatCreate_MAIJ(Mat A)
121 {
122   int         ierr;
123   Mat_MPIMAIJ *b;
124 
125   PetscFunctionBegin;
126   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
127   A->data  = (void*)b;
128   ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr);
129   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
130   A->factor           = 0;
131   A->mapping          = 0;
132 
133   b->AIJ  = 0;
134   b->dof  = 0;
135   b->OAIJ = 0;
136   b->ctx  = 0;
137   b->w    = 0;
138   PetscFunctionReturn(0);
139 }
140 EXTERN_C_END
141 
142 /* --------------------------------------------------------------------------------------*/
143 #undef __FUNCT__
144 #define __FUNCT__ "MatMult_SeqMAIJ_2"
145 int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
146 {
147   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
148   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
149   PetscScalar   *x,*y,*v,sum1, sum2;
150   int           ierr,m = b->AIJ->m,*idx,*ii;
151   int           n,i,jrow,j;
152 
153   PetscFunctionBegin;
154   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
155   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
156   idx  = a->j;
157   v    = a->a;
158   ii   = a->i;
159 
160   for (i=0; i<m; i++) {
161     jrow = ii[i];
162     n    = ii[i+1] - jrow;
163     sum1  = 0.0;
164     sum2  = 0.0;
165     for (j=0; j<n; j++) {
166       sum1 += v[jrow]*x[2*idx[jrow]];
167       sum2 += v[jrow]*x[2*idx[jrow]+1];
168       jrow++;
169      }
170     y[2*i]   = sum1;
171     y[2*i+1] = sum2;
172   }
173 
174   PetscLogFlops(4*a->nz - 2*m);
175   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
176   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
182 int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183 {
184   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
185   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
186   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
187   int           ierr,m = b->AIJ->m,n,i,*idx;
188 
189   PetscFunctionBegin;
190   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
191   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
192   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
193 
194   for (i=0; i<m; i++) {
195     idx    = a->j + a->i[i] ;
196     v      = a->a + a->i[i] ;
197     n      = a->i[i+1] - a->i[i];
198     alpha1 = x[2*i];
199     alpha2 = x[2*i+1];
200     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
201   }
202   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
203   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
204   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
210 int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
211 {
212   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
213   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
214   PetscScalar   *x,*y,*v,sum1, sum2;
215   int           ierr,m = b->AIJ->m,*idx,*ii;
216   int           n,i,jrow,j;
217 
218   PetscFunctionBegin;
219   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
220   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
221   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
222   idx  = a->j;
223   v    = a->a;
224   ii   = a->i;
225 
226   for (i=0; i<m; i++) {
227     jrow = ii[i];
228     n    = ii[i+1] - jrow;
229     sum1  = 0.0;
230     sum2  = 0.0;
231     for (j=0; j<n; j++) {
232       sum1 += v[jrow]*x[2*idx[jrow]];
233       sum2 += v[jrow]*x[2*idx[jrow]+1];
234       jrow++;
235      }
236     y[2*i]   += sum1;
237     y[2*i+1] += sum2;
238   }
239 
240   PetscLogFlops(4*a->nz - 2*m);
241   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
242   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 #undef __FUNCT__
246 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
247 int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
248 {
249   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
250   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
251   PetscScalar   *x,*y,*v,alpha1,alpha2;
252   int           ierr,m = b->AIJ->m,n,i,*idx;
253 
254   PetscFunctionBegin;
255   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
256   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
257   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
258 
259   for (i=0; i<m; i++) {
260     idx   = a->j + a->i[i] ;
261     v     = a->a + a->i[i] ;
262     n     = a->i[i+1] - a->i[i];
263     alpha1 = x[2*i];
264     alpha2 = x[2*i+1];
265     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
266   }
267   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
268   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
269   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 /* --------------------------------------------------------------------------------------*/
273 #undef __FUNCT__
274 #define __FUNCT__ "MatMult_SeqMAIJ_3"
275 int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
276 {
277   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
278   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
279   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
280   int           ierr,m = b->AIJ->m,*idx,*ii;
281   int           n,i,jrow,j;
282 
283   PetscFunctionBegin;
284   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
285   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
286   idx  = a->j;
287   v    = a->a;
288   ii   = a->i;
289 
290   for (i=0; i<m; i++) {
291     jrow = ii[i];
292     n    = ii[i+1] - jrow;
293     sum1  = 0.0;
294     sum2  = 0.0;
295     sum3  = 0.0;
296     for (j=0; j<n; j++) {
297       sum1 += v[jrow]*x[3*idx[jrow]];
298       sum2 += v[jrow]*x[3*idx[jrow]+1];
299       sum3 += v[jrow]*x[3*idx[jrow]+2];
300       jrow++;
301      }
302     y[3*i]   = sum1;
303     y[3*i+1] = sum2;
304     y[3*i+2] = sum3;
305   }
306 
307   PetscLogFlops(6*a->nz - 3*m);
308   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
309   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
315 int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316 {
317   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
318   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
319   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
320   int           ierr,m = b->AIJ->m,n,i,*idx;
321 
322   PetscFunctionBegin;
323   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
324   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
325   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
326 
327   for (i=0; i<m; i++) {
328     idx    = a->j + a->i[i];
329     v      = a->a + a->i[i];
330     n      = a->i[i+1] - a->i[i];
331     alpha1 = x[3*i];
332     alpha2 = x[3*i+1];
333     alpha3 = x[3*i+2];
334     while (n-->0) {
335       y[3*(*idx)]   += alpha1*(*v);
336       y[3*(*idx)+1] += alpha2*(*v);
337       y[3*(*idx)+2] += alpha3*(*v);
338       idx++; v++;
339     }
340   }
341   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
342   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
343   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
349 int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
350 {
351   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
352   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
353   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
354   int           ierr,m = b->AIJ->m,*idx,*ii;
355   int           n,i,jrow,j;
356 
357   PetscFunctionBegin;
358   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
359   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
360   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
361   idx  = a->j;
362   v    = a->a;
363   ii   = a->i;
364 
365   for (i=0; i<m; i++) {
366     jrow = ii[i];
367     n    = ii[i+1] - jrow;
368     sum1  = 0.0;
369     sum2  = 0.0;
370     sum3  = 0.0;
371     for (j=0; j<n; j++) {
372       sum1 += v[jrow]*x[3*idx[jrow]];
373       sum2 += v[jrow]*x[3*idx[jrow]+1];
374       sum3 += v[jrow]*x[3*idx[jrow]+2];
375       jrow++;
376      }
377     y[3*i]   += sum1;
378     y[3*i+1] += sum2;
379     y[3*i+2] += sum3;
380   }
381 
382   PetscLogFlops(6*a->nz);
383   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
384   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 #undef __FUNCT__
388 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
389 int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
390 {
391   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
392   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
393   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
394   int           ierr,m = b->AIJ->m,n,i,*idx;
395 
396   PetscFunctionBegin;
397   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
398   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
399   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
400   for (i=0; i<m; i++) {
401     idx    = a->j + a->i[i] ;
402     v      = a->a + a->i[i] ;
403     n      = a->i[i+1] - a->i[i];
404     alpha1 = x[3*i];
405     alpha2 = x[3*i+1];
406     alpha3 = x[3*i+2];
407     while (n-->0) {
408       y[3*(*idx)]   += alpha1*(*v);
409       y[3*(*idx)+1] += alpha2*(*v);
410       y[3*(*idx)+2] += alpha3*(*v);
411       idx++; v++;
412     }
413   }
414   PetscLogFlops(6*a->nz);
415   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
416   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /* ------------------------------------------------------------------------------*/
421 #undef __FUNCT__
422 #define __FUNCT__ "MatMult_SeqMAIJ_4"
423 int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
424 {
425   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
426   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
427   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
428   int           ierr,m = b->AIJ->m,*idx,*ii;
429   int           n,i,jrow,j;
430 
431   PetscFunctionBegin;
432   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
434   idx  = a->j;
435   v    = a->a;
436   ii   = a->i;
437 
438   for (i=0; i<m; i++) {
439     jrow = ii[i];
440     n    = ii[i+1] - jrow;
441     sum1  = 0.0;
442     sum2  = 0.0;
443     sum3  = 0.0;
444     sum4  = 0.0;
445     for (j=0; j<n; j++) {
446       sum1 += v[jrow]*x[4*idx[jrow]];
447       sum2 += v[jrow]*x[4*idx[jrow]+1];
448       sum3 += v[jrow]*x[4*idx[jrow]+2];
449       sum4 += v[jrow]*x[4*idx[jrow]+3];
450       jrow++;
451      }
452     y[4*i]   = sum1;
453     y[4*i+1] = sum2;
454     y[4*i+2] = sum3;
455     y[4*i+3] = sum4;
456   }
457 
458   PetscLogFlops(8*a->nz - 4*m);
459   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
460   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
466 int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467 {
468   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
469   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
470   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471   int           ierr,m = b->AIJ->m,n,i,*idx;
472 
473   PetscFunctionBegin;
474   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
475   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
476   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
477   for (i=0; i<m; i++) {
478     idx    = a->j + a->i[i] ;
479     v      = a->a + a->i[i] ;
480     n      = a->i[i+1] - a->i[i];
481     alpha1 = x[4*i];
482     alpha2 = x[4*i+1];
483     alpha3 = x[4*i+2];
484     alpha4 = x[4*i+3];
485     while (n-->0) {
486       y[4*(*idx)]   += alpha1*(*v);
487       y[4*(*idx)+1] += alpha2*(*v);
488       y[4*(*idx)+2] += alpha3*(*v);
489       y[4*(*idx)+3] += alpha4*(*v);
490       idx++; v++;
491     }
492   }
493   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
495   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
501 int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
504   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
505   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
506   int           ierr,m = b->AIJ->m,*idx,*ii;
507   int           n,i,jrow,j;
508 
509   PetscFunctionBegin;
510   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
511   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
512   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
513   idx  = a->j;
514   v    = a->a;
515   ii   = a->i;
516 
517   for (i=0; i<m; i++) {
518     jrow = ii[i];
519     n    = ii[i+1] - jrow;
520     sum1  = 0.0;
521     sum2  = 0.0;
522     sum3  = 0.0;
523     sum4  = 0.0;
524     for (j=0; j<n; j++) {
525       sum1 += v[jrow]*x[4*idx[jrow]];
526       sum2 += v[jrow]*x[4*idx[jrow]+1];
527       sum3 += v[jrow]*x[4*idx[jrow]+2];
528       sum4 += v[jrow]*x[4*idx[jrow]+3];
529       jrow++;
530      }
531     y[4*i]   += sum1;
532     y[4*i+1] += sum2;
533     y[4*i+2] += sum3;
534     y[4*i+3] += sum4;
535   }
536 
537   PetscLogFlops(8*a->nz - 4*m);
538   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
539   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 #undef __FUNCT__
543 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
544 int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
545 {
546   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
547   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
548   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
549   int           ierr,m = b->AIJ->m,n,i,*idx;
550 
551   PetscFunctionBegin;
552   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
553   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
554   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
555 
556   for (i=0; i<m; i++) {
557     idx    = a->j + a->i[i] ;
558     v      = a->a + a->i[i] ;
559     n      = a->i[i+1] - a->i[i];
560     alpha1 = x[4*i];
561     alpha2 = x[4*i+1];
562     alpha3 = x[4*i+2];
563     alpha4 = x[4*i+3];
564     while (n-->0) {
565       y[4*(*idx)]   += alpha1*(*v);
566       y[4*(*idx)+1] += alpha2*(*v);
567       y[4*(*idx)+2] += alpha3*(*v);
568       y[4*(*idx)+3] += alpha4*(*v);
569       idx++; v++;
570     }
571   }
572   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
573   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
574   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 /* ------------------------------------------------------------------------------*/
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_SeqMAIJ_5"
581 int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
582 {
583   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
584   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
585   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
586   int           ierr,m = b->AIJ->m,*idx,*ii;
587   int           n,i,jrow,j;
588 
589   PetscFunctionBegin;
590   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
591   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
592   idx  = a->j;
593   v    = a->a;
594   ii   = a->i;
595 
596   for (i=0; i<m; i++) {
597     jrow = ii[i];
598     n    = ii[i+1] - jrow;
599     sum1  = 0.0;
600     sum2  = 0.0;
601     sum3  = 0.0;
602     sum4  = 0.0;
603     sum5  = 0.0;
604     for (j=0; j<n; j++) {
605       sum1 += v[jrow]*x[5*idx[jrow]];
606       sum2 += v[jrow]*x[5*idx[jrow]+1];
607       sum3 += v[jrow]*x[5*idx[jrow]+2];
608       sum4 += v[jrow]*x[5*idx[jrow]+3];
609       sum5 += v[jrow]*x[5*idx[jrow]+4];
610       jrow++;
611      }
612     y[5*i]   = sum1;
613     y[5*i+1] = sum2;
614     y[5*i+2] = sum3;
615     y[5*i+3] = sum4;
616     y[5*i+4] = sum5;
617   }
618 
619   PetscLogFlops(10*a->nz - 5*m);
620   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
621   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
627 int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
628 {
629   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
630   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
631   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
632   int           ierr,m = b->AIJ->m,n,i,*idx;
633 
634   PetscFunctionBegin;
635   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
636   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
637   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
638 
639   for (i=0; i<m; i++) {
640     idx    = a->j + a->i[i] ;
641     v      = a->a + a->i[i] ;
642     n      = a->i[i+1] - a->i[i];
643     alpha1 = x[5*i];
644     alpha2 = x[5*i+1];
645     alpha3 = x[5*i+2];
646     alpha4 = x[5*i+3];
647     alpha5 = x[5*i+4];
648     while (n-->0) {
649       y[5*(*idx)]   += alpha1*(*v);
650       y[5*(*idx)+1] += alpha2*(*v);
651       y[5*(*idx)+2] += alpha3*(*v);
652       y[5*(*idx)+3] += alpha4*(*v);
653       y[5*(*idx)+4] += alpha5*(*v);
654       idx++; v++;
655     }
656   }
657   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
658   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
659   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
660   PetscFunctionReturn(0);
661 }
662 
663 #undef __FUNCT__
664 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
665 int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
666 {
667   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
668   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
669   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670   int           ierr,m = b->AIJ->m,*idx,*ii;
671   int           n,i,jrow,j;
672 
673   PetscFunctionBegin;
674   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
675   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
676   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
677   idx  = a->j;
678   v    = a->a;
679   ii   = a->i;
680 
681   for (i=0; i<m; i++) {
682     jrow = ii[i];
683     n    = ii[i+1] - jrow;
684     sum1  = 0.0;
685     sum2  = 0.0;
686     sum3  = 0.0;
687     sum4  = 0.0;
688     sum5  = 0.0;
689     for (j=0; j<n; j++) {
690       sum1 += v[jrow]*x[5*idx[jrow]];
691       sum2 += v[jrow]*x[5*idx[jrow]+1];
692       sum3 += v[jrow]*x[5*idx[jrow]+2];
693       sum4 += v[jrow]*x[5*idx[jrow]+3];
694       sum5 += v[jrow]*x[5*idx[jrow]+4];
695       jrow++;
696      }
697     y[5*i]   += sum1;
698     y[5*i+1] += sum2;
699     y[5*i+2] += sum3;
700     y[5*i+3] += sum4;
701     y[5*i+4] += sum5;
702   }
703 
704   PetscLogFlops(10*a->nz);
705   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
706   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
712 int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713 {
714   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
715   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
716   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
717   int           ierr,m = b->AIJ->m,n,i,*idx;
718 
719   PetscFunctionBegin;
720   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
721   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
722   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
723 
724   for (i=0; i<m; i++) {
725     idx    = a->j + a->i[i] ;
726     v      = a->a + a->i[i] ;
727     n      = a->i[i+1] - a->i[i];
728     alpha1 = x[5*i];
729     alpha2 = x[5*i+1];
730     alpha3 = x[5*i+2];
731     alpha4 = x[5*i+3];
732     alpha5 = x[5*i+4];
733     while (n-->0) {
734       y[5*(*idx)]   += alpha1*(*v);
735       y[5*(*idx)+1] += alpha2*(*v);
736       y[5*(*idx)+2] += alpha3*(*v);
737       y[5*(*idx)+3] += alpha4*(*v);
738       y[5*(*idx)+4] += alpha5*(*v);
739       idx++; v++;
740     }
741   }
742   PetscLogFlops(10*a->nz);
743   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
744   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 /* ------------------------------------------------------------------------------*/
749 #undef __FUNCT__
750 #define __FUNCT__ "MatMult_SeqMAIJ_6"
751 int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
752 {
753   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
754   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
755   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
756   int           ierr,m = b->AIJ->m,*idx,*ii;
757   int           n,i,jrow,j;
758 
759   PetscFunctionBegin;
760   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
761   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
762   idx  = a->j;
763   v    = a->a;
764   ii   = a->i;
765 
766   for (i=0; i<m; i++) {
767     jrow = ii[i];
768     n    = ii[i+1] - jrow;
769     sum1  = 0.0;
770     sum2  = 0.0;
771     sum3  = 0.0;
772     sum4  = 0.0;
773     sum5  = 0.0;
774     sum6  = 0.0;
775     for (j=0; j<n; j++) {
776       sum1 += v[jrow]*x[6*idx[jrow]];
777       sum2 += v[jrow]*x[6*idx[jrow]+1];
778       sum3 += v[jrow]*x[6*idx[jrow]+2];
779       sum4 += v[jrow]*x[6*idx[jrow]+3];
780       sum5 += v[jrow]*x[6*idx[jrow]+4];
781       sum6 += v[jrow]*x[6*idx[jrow]+5];
782       jrow++;
783      }
784     y[6*i]   = sum1;
785     y[6*i+1] = sum2;
786     y[6*i+2] = sum3;
787     y[6*i+3] = sum4;
788     y[6*i+4] = sum5;
789     y[6*i+5] = sum6;
790   }
791 
792   PetscLogFlops(12*a->nz - 6*m);
793   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
794   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
800 int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
801 {
802   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
803   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
804   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
805   int           ierr,m = b->AIJ->m,n,i,*idx;
806 
807   PetscFunctionBegin;
808   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
809   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
810   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
811 
812   for (i=0; i<m; i++) {
813     idx    = a->j + a->i[i] ;
814     v      = a->a + a->i[i] ;
815     n      = a->i[i+1] - a->i[i];
816     alpha1 = x[6*i];
817     alpha2 = x[6*i+1];
818     alpha3 = x[6*i+2];
819     alpha4 = x[6*i+3];
820     alpha5 = x[6*i+4];
821     alpha6 = x[6*i+5];
822     while (n-->0) {
823       y[6*(*idx)]   += alpha1*(*v);
824       y[6*(*idx)+1] += alpha2*(*v);
825       y[6*(*idx)+2] += alpha3*(*v);
826       y[6*(*idx)+3] += alpha4*(*v);
827       y[6*(*idx)+4] += alpha5*(*v);
828       y[6*(*idx)+5] += alpha6*(*v);
829       idx++; v++;
830     }
831   }
832   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
833   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
834   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
840 int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
841 {
842   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
843   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
844   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845   int           ierr,m = b->AIJ->m,*idx,*ii;
846   int           n,i,jrow,j;
847 
848   PetscFunctionBegin;
849   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
850   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
851   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
852   idx  = a->j;
853   v    = a->a;
854   ii   = a->i;
855 
856   for (i=0; i<m; i++) {
857     jrow = ii[i];
858     n    = ii[i+1] - jrow;
859     sum1  = 0.0;
860     sum2  = 0.0;
861     sum3  = 0.0;
862     sum4  = 0.0;
863     sum5  = 0.0;
864     sum6  = 0.0;
865     for (j=0; j<n; j++) {
866       sum1 += v[jrow]*x[6*idx[jrow]];
867       sum2 += v[jrow]*x[6*idx[jrow]+1];
868       sum3 += v[jrow]*x[6*idx[jrow]+2];
869       sum4 += v[jrow]*x[6*idx[jrow]+3];
870       sum5 += v[jrow]*x[6*idx[jrow]+4];
871       sum6 += v[jrow]*x[6*idx[jrow]+5];
872       jrow++;
873      }
874     y[6*i]   += sum1;
875     y[6*i+1] += sum2;
876     y[6*i+2] += sum3;
877     y[6*i+3] += sum4;
878     y[6*i+4] += sum5;
879     y[6*i+5] += sum6;
880   }
881 
882   PetscLogFlops(12*a->nz);
883   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
884   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
890 int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
891 {
892   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
893   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
894   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
895   int           ierr,m = b->AIJ->m,n,i,*idx;
896 
897   PetscFunctionBegin;
898   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
899   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
900   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
901 
902   for (i=0; i<m; i++) {
903     idx    = a->j + a->i[i] ;
904     v      = a->a + a->i[i] ;
905     n      = a->i[i+1] - a->i[i];
906     alpha1 = x[6*i];
907     alpha2 = x[6*i+1];
908     alpha3 = x[6*i+2];
909     alpha4 = x[6*i+3];
910     alpha5 = x[6*i+4];
911     alpha6 = x[6*i+5];
912     while (n-->0) {
913       y[6*(*idx)]   += alpha1*(*v);
914       y[6*(*idx)+1] += alpha2*(*v);
915       y[6*(*idx)+2] += alpha3*(*v);
916       y[6*(*idx)+3] += alpha4*(*v);
917       y[6*(*idx)+4] += alpha5*(*v);
918       y[6*(*idx)+5] += alpha6*(*v);
919       idx++; v++;
920     }
921   }
922   PetscLogFlops(12*a->nz);
923   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
924   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 /* ------------------------------------------------------------------------------*/
929 #undef __FUNCT__
930 #define __FUNCT__ "MatMult_SeqMAIJ_8"
931 int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
932 {
933   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
934   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
935   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
936   int           ierr,m = b->AIJ->m,*idx,*ii;
937   int           n,i,jrow,j;
938 
939   PetscFunctionBegin;
940   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
941   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
942   idx  = a->j;
943   v    = a->a;
944   ii   = a->i;
945 
946   for (i=0; i<m; i++) {
947     jrow = ii[i];
948     n    = ii[i+1] - jrow;
949     sum1  = 0.0;
950     sum2  = 0.0;
951     sum3  = 0.0;
952     sum4  = 0.0;
953     sum5  = 0.0;
954     sum6  = 0.0;
955     sum7  = 0.0;
956     sum8  = 0.0;
957     for (j=0; j<n; j++) {
958       sum1 += v[jrow]*x[8*idx[jrow]];
959       sum2 += v[jrow]*x[8*idx[jrow]+1];
960       sum3 += v[jrow]*x[8*idx[jrow]+2];
961       sum4 += v[jrow]*x[8*idx[jrow]+3];
962       sum5 += v[jrow]*x[8*idx[jrow]+4];
963       sum6 += v[jrow]*x[8*idx[jrow]+5];
964       sum7 += v[jrow]*x[8*idx[jrow]+6];
965       sum8 += v[jrow]*x[8*idx[jrow]+7];
966       jrow++;
967      }
968     y[8*i]   = sum1;
969     y[8*i+1] = sum2;
970     y[8*i+2] = sum3;
971     y[8*i+3] = sum4;
972     y[8*i+4] = sum5;
973     y[8*i+5] = sum6;
974     y[8*i+6] = sum7;
975     y[8*i+7] = sum8;
976   }
977 
978   PetscLogFlops(16*a->nz - 8*m);
979   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
980   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
986 int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
987 {
988   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
989   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
990   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
991   int           ierr,m = b->AIJ->m,n,i,*idx;
992 
993   PetscFunctionBegin;
994   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
995   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
996   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
997 
998   for (i=0; i<m; i++) {
999     idx    = a->j + a->i[i] ;
1000     v      = a->a + a->i[i] ;
1001     n      = a->i[i+1] - a->i[i];
1002     alpha1 = x[8*i];
1003     alpha2 = x[8*i+1];
1004     alpha3 = x[8*i+2];
1005     alpha4 = x[8*i+3];
1006     alpha5 = x[8*i+4];
1007     alpha6 = x[8*i+5];
1008     alpha7 = x[8*i+6];
1009     alpha8 = x[8*i+7];
1010     while (n-->0) {
1011       y[8*(*idx)]   += alpha1*(*v);
1012       y[8*(*idx)+1] += alpha2*(*v);
1013       y[8*(*idx)+2] += alpha3*(*v);
1014       y[8*(*idx)+3] += alpha4*(*v);
1015       y[8*(*idx)+4] += alpha5*(*v);
1016       y[8*(*idx)+5] += alpha6*(*v);
1017       y[8*(*idx)+6] += alpha7*(*v);
1018       y[8*(*idx)+7] += alpha8*(*v);
1019       idx++; v++;
1020     }
1021   }
1022   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1023   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1024   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1030 int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1031 {
1032   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1033   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1034   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1035   int           ierr,m = b->AIJ->m,*idx,*ii;
1036   int           n,i,jrow,j;
1037 
1038   PetscFunctionBegin;
1039   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1040   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1041   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1042   idx  = a->j;
1043   v    = a->a;
1044   ii   = a->i;
1045 
1046   for (i=0; i<m; i++) {
1047     jrow = ii[i];
1048     n    = ii[i+1] - jrow;
1049     sum1  = 0.0;
1050     sum2  = 0.0;
1051     sum3  = 0.0;
1052     sum4  = 0.0;
1053     sum5  = 0.0;
1054     sum6  = 0.0;
1055     sum7  = 0.0;
1056     sum8  = 0.0;
1057     for (j=0; j<n; j++) {
1058       sum1 += v[jrow]*x[8*idx[jrow]];
1059       sum2 += v[jrow]*x[8*idx[jrow]+1];
1060       sum3 += v[jrow]*x[8*idx[jrow]+2];
1061       sum4 += v[jrow]*x[8*idx[jrow]+3];
1062       sum5 += v[jrow]*x[8*idx[jrow]+4];
1063       sum6 += v[jrow]*x[8*idx[jrow]+5];
1064       sum7 += v[jrow]*x[8*idx[jrow]+6];
1065       sum8 += v[jrow]*x[8*idx[jrow]+7];
1066       jrow++;
1067      }
1068     y[8*i]   += sum1;
1069     y[8*i+1] += sum2;
1070     y[8*i+2] += sum3;
1071     y[8*i+3] += sum4;
1072     y[8*i+4] += sum5;
1073     y[8*i+5] += sum6;
1074     y[8*i+6] += sum7;
1075     y[8*i+7] += sum8;
1076   }
1077 
1078   PetscLogFlops(16*a->nz);
1079   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1080   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1086 int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1089   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1090   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1091   int           ierr,m = b->AIJ->m,n,i,*idx;
1092 
1093   PetscFunctionBegin;
1094   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1095   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1096   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1097   for (i=0; i<m; i++) {
1098     idx    = a->j + a->i[i] ;
1099     v      = a->a + a->i[i] ;
1100     n      = a->i[i+1] - a->i[i];
1101     alpha1 = x[8*i];
1102     alpha2 = x[8*i+1];
1103     alpha3 = x[8*i+2];
1104     alpha4 = x[8*i+3];
1105     alpha5 = x[8*i+4];
1106     alpha6 = x[8*i+5];
1107     alpha7 = x[8*i+6];
1108     alpha8 = x[8*i+7];
1109     while (n-->0) {
1110       y[8*(*idx)]   += alpha1*(*v);
1111       y[8*(*idx)+1] += alpha2*(*v);
1112       y[8*(*idx)+2] += alpha3*(*v);
1113       y[8*(*idx)+3] += alpha4*(*v);
1114       y[8*(*idx)+4] += alpha5*(*v);
1115       y[8*(*idx)+5] += alpha6*(*v);
1116       y[8*(*idx)+6] += alpha7*(*v);
1117       y[8*(*idx)+7] += alpha8*(*v);
1118       idx++; v++;
1119     }
1120   }
1121   PetscLogFlops(16*a->nz);
1122   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1123   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 /*--------------------------------------------------------------------------------------------*/
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1130 int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1133   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1134   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1135   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1136   int           ierr,m = b->AIJ->m,*idx,*ii;
1137   int           n,i,jrow,j;
1138 
1139   PetscFunctionBegin;
1140   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1141   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1142   idx  = a->j;
1143   v    = a->a;
1144   ii   = a->i;
1145 
1146   for (i=0; i<m; i++) {
1147     jrow = ii[i];
1148     n    = ii[i+1] - jrow;
1149     sum1  = 0.0;
1150     sum2  = 0.0;
1151     sum3  = 0.0;
1152     sum4  = 0.0;
1153     sum5  = 0.0;
1154     sum6  = 0.0;
1155     sum7  = 0.0;
1156     sum8  = 0.0;
1157     sum9  = 0.0;
1158     sum10 = 0.0;
1159     sum11 = 0.0;
1160     sum12 = 0.0;
1161     sum13 = 0.0;
1162     sum14 = 0.0;
1163     sum15 = 0.0;
1164     sum16 = 0.0;
1165     for (j=0; j<n; j++) {
1166       sum1  += v[jrow]*x[16*idx[jrow]];
1167       sum2  += v[jrow]*x[16*idx[jrow]+1];
1168       sum3  += v[jrow]*x[16*idx[jrow]+2];
1169       sum4  += v[jrow]*x[16*idx[jrow]+3];
1170       sum5  += v[jrow]*x[16*idx[jrow]+4];
1171       sum6  += v[jrow]*x[16*idx[jrow]+5];
1172       sum7  += v[jrow]*x[16*idx[jrow]+6];
1173       sum8  += v[jrow]*x[16*idx[jrow]+7];
1174       sum9  += v[jrow]*x[16*idx[jrow]+8];
1175       sum10 += v[jrow]*x[16*idx[jrow]+9];
1176       sum11 += v[jrow]*x[16*idx[jrow]+10];
1177       sum12 += v[jrow]*x[16*idx[jrow]+11];
1178       sum13 += v[jrow]*x[16*idx[jrow]+12];
1179       sum14 += v[jrow]*x[16*idx[jrow]+13];
1180       sum15 += v[jrow]*x[16*idx[jrow]+14];
1181       sum16 += v[jrow]*x[16*idx[jrow]+15];
1182       jrow++;
1183      }
1184     y[16*i]    = sum1;
1185     y[16*i+1]  = sum2;
1186     y[16*i+2]  = sum3;
1187     y[16*i+3]  = sum4;
1188     y[16*i+4]  = sum5;
1189     y[16*i+5]  = sum6;
1190     y[16*i+6]  = sum7;
1191     y[16*i+7]  = sum8;
1192     y[16*i+8]  = sum9;
1193     y[16*i+9]  = sum10;
1194     y[16*i+10] = sum11;
1195     y[16*i+11] = sum12;
1196     y[16*i+12] = sum13;
1197     y[16*i+13] = sum14;
1198     y[16*i+14] = sum15;
1199     y[16*i+15] = sum16;
1200   }
1201 
1202   PetscLogFlops(32*a->nz - 16*m);
1203   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1204   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1210 int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1211 {
1212   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1213   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1214   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1215   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1216   int           ierr,m = b->AIJ->m,n,i,*idx;
1217 
1218   PetscFunctionBegin;
1219   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
1220   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1221   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
1222 
1223   for (i=0; i<m; i++) {
1224     idx    = a->j + a->i[i] ;
1225     v      = a->a + a->i[i] ;
1226     n      = a->i[i+1] - a->i[i];
1227     alpha1  = x[16*i];
1228     alpha2  = x[16*i+1];
1229     alpha3  = x[16*i+2];
1230     alpha4  = x[16*i+3];
1231     alpha5  = x[16*i+4];
1232     alpha6  = x[16*i+5];
1233     alpha7  = x[16*i+6];
1234     alpha8  = x[16*i+7];
1235     alpha9  = x[16*i+8];
1236     alpha10 = x[16*i+9];
1237     alpha11 = x[16*i+10];
1238     alpha12 = x[16*i+11];
1239     alpha13 = x[16*i+12];
1240     alpha14 = x[16*i+13];
1241     alpha15 = x[16*i+14];
1242     alpha16 = x[16*i+15];
1243     while (n-->0) {
1244       y[16*(*idx)]    += alpha1*(*v);
1245       y[16*(*idx)+1]  += alpha2*(*v);
1246       y[16*(*idx)+2]  += alpha3*(*v);
1247       y[16*(*idx)+3]  += alpha4*(*v);
1248       y[16*(*idx)+4]  += alpha5*(*v);
1249       y[16*(*idx)+5]  += alpha6*(*v);
1250       y[16*(*idx)+6]  += alpha7*(*v);
1251       y[16*(*idx)+7]  += alpha8*(*v);
1252       y[16*(*idx)+8]  += alpha9*(*v);
1253       y[16*(*idx)+9]  += alpha10*(*v);
1254       y[16*(*idx)+10] += alpha11*(*v);
1255       y[16*(*idx)+11] += alpha12*(*v);
1256       y[16*(*idx)+12] += alpha13*(*v);
1257       y[16*(*idx)+13] += alpha14*(*v);
1258       y[16*(*idx)+14] += alpha15*(*v);
1259       y[16*(*idx)+15] += alpha16*(*v);
1260       idx++; v++;
1261     }
1262   }
1263   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1264   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1265   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1271 int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1272 {
1273   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1274   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1275   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1276   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1277   int           ierr,m = b->AIJ->m,*idx,*ii;
1278   int           n,i,jrow,j;
1279 
1280   PetscFunctionBegin;
1281   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1282   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1283   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1284   idx  = a->j;
1285   v    = a->a;
1286   ii   = a->i;
1287 
1288   for (i=0; i<m; i++) {
1289     jrow = ii[i];
1290     n    = ii[i+1] - jrow;
1291     sum1  = 0.0;
1292     sum2  = 0.0;
1293     sum3  = 0.0;
1294     sum4  = 0.0;
1295     sum5  = 0.0;
1296     sum6  = 0.0;
1297     sum7  = 0.0;
1298     sum8  = 0.0;
1299     sum9  = 0.0;
1300     sum10 = 0.0;
1301     sum11 = 0.0;
1302     sum12 = 0.0;
1303     sum13 = 0.0;
1304     sum14 = 0.0;
1305     sum15 = 0.0;
1306     sum16 = 0.0;
1307     for (j=0; j<n; j++) {
1308       sum1  += v[jrow]*x[16*idx[jrow]];
1309       sum2  += v[jrow]*x[16*idx[jrow]+1];
1310       sum3  += v[jrow]*x[16*idx[jrow]+2];
1311       sum4  += v[jrow]*x[16*idx[jrow]+3];
1312       sum5  += v[jrow]*x[16*idx[jrow]+4];
1313       sum6  += v[jrow]*x[16*idx[jrow]+5];
1314       sum7  += v[jrow]*x[16*idx[jrow]+6];
1315       sum8  += v[jrow]*x[16*idx[jrow]+7];
1316       sum9  += v[jrow]*x[16*idx[jrow]+8];
1317       sum10 += v[jrow]*x[16*idx[jrow]+9];
1318       sum11 += v[jrow]*x[16*idx[jrow]+10];
1319       sum12 += v[jrow]*x[16*idx[jrow]+11];
1320       sum13 += v[jrow]*x[16*idx[jrow]+12];
1321       sum14 += v[jrow]*x[16*idx[jrow]+13];
1322       sum15 += v[jrow]*x[16*idx[jrow]+14];
1323       sum16 += v[jrow]*x[16*idx[jrow]+15];
1324       jrow++;
1325      }
1326     y[16*i]    += sum1;
1327     y[16*i+1]  += sum2;
1328     y[16*i+2]  += sum3;
1329     y[16*i+3]  += sum4;
1330     y[16*i+4]  += sum5;
1331     y[16*i+5]  += sum6;
1332     y[16*i+6]  += sum7;
1333     y[16*i+7]  += sum8;
1334     y[16*i+8]  += sum9;
1335     y[16*i+9]  += sum10;
1336     y[16*i+10] += sum11;
1337     y[16*i+11] += sum12;
1338     y[16*i+12] += sum13;
1339     y[16*i+13] += sum14;
1340     y[16*i+14] += sum15;
1341     y[16*i+15] += sum16;
1342   }
1343 
1344   PetscLogFlops(32*a->nz);
1345   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1346   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
1352 int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1353 {
1354   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1355   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1356   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1357   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1358   int           ierr,m = b->AIJ->m,n,i,*idx;
1359 
1360   PetscFunctionBegin;
1361   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1362   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1363   ierr = VecGetArrayFast(zz,&y);CHKERRQ(ierr);
1364   for (i=0; i<m; i++) {
1365     idx    = a->j + a->i[i] ;
1366     v      = a->a + a->i[i] ;
1367     n      = a->i[i+1] - a->i[i];
1368     alpha1 = x[16*i];
1369     alpha2 = x[16*i+1];
1370     alpha3 = x[16*i+2];
1371     alpha4 = x[16*i+3];
1372     alpha5 = x[16*i+4];
1373     alpha6 = x[16*i+5];
1374     alpha7 = x[16*i+6];
1375     alpha8 = x[16*i+7];
1376     alpha9  = x[16*i+8];
1377     alpha10 = x[16*i+9];
1378     alpha11 = x[16*i+10];
1379     alpha12 = x[16*i+11];
1380     alpha13 = x[16*i+12];
1381     alpha14 = x[16*i+13];
1382     alpha15 = x[16*i+14];
1383     alpha16 = x[16*i+15];
1384     while (n-->0) {
1385       y[16*(*idx)]   += alpha1*(*v);
1386       y[16*(*idx)+1] += alpha2*(*v);
1387       y[16*(*idx)+2] += alpha3*(*v);
1388       y[16*(*idx)+3] += alpha4*(*v);
1389       y[16*(*idx)+4] += alpha5*(*v);
1390       y[16*(*idx)+5] += alpha6*(*v);
1391       y[16*(*idx)+6] += alpha7*(*v);
1392       y[16*(*idx)+7] += alpha8*(*v);
1393       y[16*(*idx)+8]  += alpha9*(*v);
1394       y[16*(*idx)+9]  += alpha10*(*v);
1395       y[16*(*idx)+10] += alpha11*(*v);
1396       y[16*(*idx)+11] += alpha12*(*v);
1397       y[16*(*idx)+12] += alpha13*(*v);
1398       y[16*(*idx)+13] += alpha14*(*v);
1399       y[16*(*idx)+14] += alpha15*(*v);
1400       y[16*(*idx)+15] += alpha16*(*v);
1401       idx++; v++;
1402     }
1403   }
1404   PetscLogFlops(32*a->nz);
1405   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1406   ierr = VecRestoreArrayFast(zz,&y);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /*===================================================================================*/
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
1413 int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1414 {
1415   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1416   int         ierr;
1417   PetscFunctionBegin;
1418 
1419   /* start the scatter */
1420   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1421   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
1422   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1423   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
1429 int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1430 {
1431   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1432   int         ierr;
1433   PetscFunctionBegin;
1434   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1435   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1436   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
1437   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
1443 int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1444 {
1445   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1446   int         ierr;
1447   PetscFunctionBegin;
1448 
1449   /* start the scatter */
1450   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1451   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1452   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
1453   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
1459 int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1460 {
1461   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1462   int         ierr;
1463   PetscFunctionBegin;
1464   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
1465   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1466   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
1467   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 /* ---------------------------------------------------------------------------------- */
1472 /*@C
1473   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1474   operations for multicomponent problems.  It interpolates each component the same
1475   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
1476   and MATMPIAIJ for distributed matrices.
1477 
1478   Operations provided:
1479 . MatMult
1480 . MatMultTranspose
1481 . MatMultAdd
1482 . MatMultTransposeAdd
1483 
1484   Level: advanced
1485 
1486 M*/
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatCreateMAIJ"
1489 int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1490 {
1491   int         ierr,size,n;
1492   Mat_MPIMAIJ *b;
1493   Mat         B;
1494 
1495   PetscFunctionBegin;
1496   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
1497 
1498   if (dof == 1) {
1499     *maij = A;
1500   } else {
1501     ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr);
1502     B->assembled    = PETSC_TRUE;
1503 
1504     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1505     if (size == 1) {
1506       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
1507       B->ops->destroy = MatDestroy_SeqMAIJ;
1508       b      = (Mat_MPIMAIJ*)B->data;
1509       b->dof = dof;
1510       b->AIJ = A;
1511       if (dof == 2) {
1512         B->ops->mult             = MatMult_SeqMAIJ_2;
1513         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1514         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1515         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1516       } else if (dof == 3) {
1517         B->ops->mult             = MatMult_SeqMAIJ_3;
1518         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1519         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1520         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1521       } else if (dof == 4) {
1522         B->ops->mult             = MatMult_SeqMAIJ_4;
1523         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1524         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1525         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1526       } else if (dof == 5) {
1527         B->ops->mult             = MatMult_SeqMAIJ_5;
1528         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1529         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1530         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1531       } else if (dof == 6) {
1532         B->ops->mult             = MatMult_SeqMAIJ_6;
1533         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1534         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1535         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1536       } else if (dof == 8) {
1537         B->ops->mult             = MatMult_SeqMAIJ_8;
1538         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1539         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1540         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1541       } else if (dof == 16) {
1542         B->ops->mult             = MatMult_SeqMAIJ_16;
1543         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1544         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1545         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1546       } else {
1547         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1548       }
1549     } else {
1550       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1551       IS         from,to;
1552       Vec        gvec;
1553       int        *garray,i;
1554 
1555       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
1556       B->ops->destroy = MatDestroy_MPIMAIJ;
1557       b      = (Mat_MPIMAIJ*)B->data;
1558       b->dof = dof;
1559       b->A   = A;
1560       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
1561       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
1562 
1563       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
1564       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
1565 
1566       /* create two temporary Index sets for build scatter gather */
1567       ierr = PetscMalloc((n+1)*sizeof(int),&garray);CHKERRQ(ierr);
1568       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1569       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
1570       ierr = PetscFree(garray);CHKERRQ(ierr);
1571       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
1572 
1573       /* create temporary global vector to generate scatter context */
1574       ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr);
1575 
1576       /* generate the scatter context */
1577       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
1578 
1579       ierr = ISDestroy(from);CHKERRQ(ierr);
1580       ierr = ISDestroy(to);CHKERRQ(ierr);
1581       ierr = VecDestroy(gvec);CHKERRQ(ierr);
1582 
1583       B->ops->mult             = MatMult_MPIMAIJ_dof;
1584       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1585       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1586       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1587     }
1588     *maij = B;
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 
1594 
1595 
1596 
1597 
1598 
1599 
1600 
1601 
1602 
1603 
1604