xref: /petsc/src/mat/impls/maij/maij.c (revision 285fb4e2b69b3de46a0633bd0adc6a7f684caa1e)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: MatCreateMAIJ()
39 @*/
40 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41 {
42   PetscErrorCode ierr;
43   PetscBool      ismpimaij,isseqmaij;
44 
45   PetscFunctionBegin;
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
47   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
48   if (ismpimaij) {
49     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
50 
51     *B = b->A;
52   } else if (isseqmaij) {
53     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
54 
55     *B = b->AIJ;
56   } else {
57     *B = A;
58   }
59   PetscFunctionReturn(0);
60 }
61 
62 /*@
63    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
64 
65    Logically Collective
66 
67    Input Parameter:
68 +  A - the MAIJ matrix
69 -  dof - the block size for the new matrix
70 
71    Output Parameter:
72 .  B - the new MAIJ matrix
73 
74    Level: advanced
75 
76 .seealso: MatCreateMAIJ()
77 @*/
78 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79 {
80   PetscErrorCode ierr;
81   Mat            Aij = NULL;
82 
83   PetscFunctionBegin;
84   PetscValidLogicalCollectiveInt(A,dof,2);
85   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
86   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91 {
92   PetscErrorCode ierr;
93   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
94 
95   PetscFunctionBegin;
96   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
97   ierr = PetscFree(A->data);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
99   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
100   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr);
101   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 PetscErrorCode MatSetUp_MAIJ(Mat A)
106 {
107   PetscFunctionBegin;
108   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
109   PetscFunctionReturn(0);
110 }
111 
112 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
113 {
114   PetscErrorCode ierr;
115   Mat            B;
116 
117   PetscFunctionBegin;
118   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
119   ierr = MatView(B,viewer);CHKERRQ(ierr);
120   ierr = MatDestroy(&B);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
125 {
126   PetscErrorCode ierr;
127   Mat            B;
128 
129   PetscFunctionBegin;
130   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
131   ierr = MatView(B,viewer);CHKERRQ(ierr);
132   ierr = MatDestroy(&B);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
137 {
138   PetscErrorCode ierr;
139   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
140 
141   PetscFunctionBegin;
142   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
143   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
144   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
145   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
146   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
147   ierr = PetscFree(A->data);CHKERRQ(ierr);
148   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
149   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
150   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
151   PetscFunctionReturn(0);
152 }
153 
154 /*MC
155   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
156   multicomponent problems, interpolating or restricting each component the same way independently.
157   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
158 
159   Operations provided:
160 . MatMult
161 . MatMultTranspose
162 . MatMultAdd
163 . MatMultTransposeAdd
164 
165   Level: advanced
166 
167 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
168 M*/
169 
170 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
171 {
172   PetscErrorCode ierr;
173   Mat_MPIMAIJ    *b;
174   PetscMPIInt    size;
175 
176   PetscFunctionBegin;
177   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
178   A->data  = (void*)b;
179 
180   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
181 
182   A->ops->setup = MatSetUp_MAIJ;
183 
184   b->AIJ  = 0;
185   b->dof  = 0;
186   b->OAIJ = 0;
187   b->ctx  = 0;
188   b->w    = 0;
189   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
190   if (size == 1) {
191     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
192   } else {
193     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
194   }
195   A->preallocated  = PETSC_TRUE;
196   A->assembled     = PETSC_TRUE;
197   PetscFunctionReturn(0);
198 }
199 
200 /* --------------------------------------------------------------------------------------*/
201 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
202 {
203   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
204   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
205   const PetscScalar *x,*v;
206   PetscScalar       *y, sum1, sum2;
207   PetscErrorCode    ierr;
208   PetscInt          nonzerorow=0,n,i,jrow,j;
209   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
210 
211   PetscFunctionBegin;
212   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
213   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
214   idx  = a->j;
215   v    = a->a;
216   ii   = a->i;
217 
218   for (i=0; i<m; i++) {
219     jrow  = ii[i];
220     n     = ii[i+1] - jrow;
221     sum1  = 0.0;
222     sum2  = 0.0;
223 
224     nonzerorow += (n>0);
225     for (j=0; j<n; j++) {
226       sum1 += v[jrow]*x[2*idx[jrow]];
227       sum2 += v[jrow]*x[2*idx[jrow]+1];
228       jrow++;
229     }
230     y[2*i]   = sum1;
231     y[2*i+1] = sum2;
232   }
233 
234   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
235   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
236   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
241 {
242   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
243   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
244   const PetscScalar *x,*v;
245   PetscScalar       *y,alpha1,alpha2;
246   PetscErrorCode    ierr;
247   PetscInt          n,i;
248   const PetscInt    m = b->AIJ->rmap->n,*idx;
249 
250   PetscFunctionBegin;
251   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
252   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
253   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
254 
255   for (i=0; i<m; i++) {
256     idx    = a->j + a->i[i];
257     v      = a->a + a->i[i];
258     n      = a->i[i+1] - a->i[i];
259     alpha1 = x[2*i];
260     alpha2 = x[2*i+1];
261     while (n-->0) {
262       y[2*(*idx)]   += alpha1*(*v);
263       y[2*(*idx)+1] += alpha2*(*v);
264       idx++; v++;
265     }
266   }
267   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
268   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
269   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
274 {
275   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
276   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
277   const PetscScalar *x,*v;
278   PetscScalar       *y,sum1, sum2;
279   PetscErrorCode    ierr;
280   PetscInt          n,i,jrow,j;
281   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
282 
283   PetscFunctionBegin;
284   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
285   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
286   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
287   idx  = a->j;
288   v    = a->a;
289   ii   = a->i;
290 
291   for (i=0; i<m; i++) {
292     jrow = ii[i];
293     n    = ii[i+1] - jrow;
294     sum1 = 0.0;
295     sum2 = 0.0;
296     for (j=0; j<n; j++) {
297       sum1 += v[jrow]*x[2*idx[jrow]];
298       sum2 += v[jrow]*x[2*idx[jrow]+1];
299       jrow++;
300     }
301     y[2*i]   += sum1;
302     y[2*i+1] += sum2;
303   }
304 
305   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
306   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
307   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
311 {
312   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
313   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
314   const PetscScalar *x,*v;
315   PetscScalar       *y,alpha1,alpha2;
316   PetscErrorCode    ierr;
317   PetscInt          n,i;
318   const PetscInt    m = b->AIJ->rmap->n,*idx;
319 
320   PetscFunctionBegin;
321   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
322   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
323   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
324 
325   for (i=0; i<m; i++) {
326     idx    = a->j + a->i[i];
327     v      = a->a + a->i[i];
328     n      = a->i[i+1] - a->i[i];
329     alpha1 = x[2*i];
330     alpha2 = x[2*i+1];
331     while (n-->0) {
332       y[2*(*idx)]   += alpha1*(*v);
333       y[2*(*idx)+1] += alpha2*(*v);
334       idx++; v++;
335     }
336   }
337   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
338   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 /* --------------------------------------------------------------------------------------*/
343 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
344 {
345   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
346   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
347   const PetscScalar *x,*v;
348   PetscScalar       *y,sum1, sum2, sum3;
349   PetscErrorCode    ierr;
350   PetscInt          nonzerorow=0,n,i,jrow,j;
351   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
352 
353   PetscFunctionBegin;
354   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
355   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
356   idx  = a->j;
357   v    = a->a;
358   ii   = a->i;
359 
360   for (i=0; i<m; i++) {
361     jrow  = ii[i];
362     n     = ii[i+1] - jrow;
363     sum1  = 0.0;
364     sum2  = 0.0;
365     sum3  = 0.0;
366 
367     nonzerorow += (n>0);
368     for (j=0; j<n; j++) {
369       sum1 += v[jrow]*x[3*idx[jrow]];
370       sum2 += v[jrow]*x[3*idx[jrow]+1];
371       sum3 += v[jrow]*x[3*idx[jrow]+2];
372       jrow++;
373      }
374     y[3*i]   = sum1;
375     y[3*i+1] = sum2;
376     y[3*i+2] = sum3;
377   }
378 
379   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
380   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
381   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
386 {
387   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
388   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
389   const PetscScalar *x,*v;
390   PetscScalar       *y,alpha1,alpha2,alpha3;
391   PetscErrorCode    ierr;
392   PetscInt          n,i;
393   const PetscInt    m = b->AIJ->rmap->n,*idx;
394 
395   PetscFunctionBegin;
396   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
397   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
398   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
399 
400   for (i=0; i<m; i++) {
401     idx    = a->j + a->i[i];
402     v      = a->a + a->i[i];
403     n      = a->i[i+1] - a->i[i];
404     alpha1 = x[3*i];
405     alpha2 = x[3*i+1];
406     alpha3 = x[3*i+2];
407     while (n-->0) {
408       y[3*(*idx)]   += alpha1*(*v);
409       y[3*(*idx)+1] += alpha2*(*v);
410       y[3*(*idx)+2] += alpha3*(*v);
411       idx++; v++;
412     }
413   }
414   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
415   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
416   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
421 {
422   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
423   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
424   const PetscScalar *x,*v;
425   PetscScalar       *y,sum1, sum2, sum3;
426   PetscErrorCode    ierr;
427   PetscInt          n,i,jrow,j;
428   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
429 
430   PetscFunctionBegin;
431   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
432   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
434   idx  = a->j;
435   v    = a->a;
436   ii   = a->i;
437 
438   for (i=0; i<m; i++) {
439     jrow = ii[i];
440     n    = ii[i+1] - jrow;
441     sum1 = 0.0;
442     sum2 = 0.0;
443     sum3 = 0.0;
444     for (j=0; j<n; j++) {
445       sum1 += v[jrow]*x[3*idx[jrow]];
446       sum2 += v[jrow]*x[3*idx[jrow]+1];
447       sum3 += v[jrow]*x[3*idx[jrow]+2];
448       jrow++;
449     }
450     y[3*i]   += sum1;
451     y[3*i+1] += sum2;
452     y[3*i+2] += sum3;
453   }
454 
455   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
456   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
457   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
463   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
464   const PetscScalar *x,*v;
465   PetscScalar       *y,alpha1,alpha2,alpha3;
466   PetscErrorCode    ierr;
467   PetscInt          n,i;
468   const PetscInt    m = b->AIJ->rmap->n,*idx;
469 
470   PetscFunctionBegin;
471   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
472   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
473   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
474   for (i=0; i<m; i++) {
475     idx    = a->j + a->i[i];
476     v      = a->a + a->i[i];
477     n      = a->i[i+1] - a->i[i];
478     alpha1 = x[3*i];
479     alpha2 = x[3*i+1];
480     alpha3 = x[3*i+2];
481     while (n-->0) {
482       y[3*(*idx)]   += alpha1*(*v);
483       y[3*(*idx)+1] += alpha2*(*v);
484       y[3*(*idx)+2] += alpha3*(*v);
485       idx++; v++;
486     }
487   }
488   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
489   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
490   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 
494 /* ------------------------------------------------------------------------------*/
495 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
496 {
497   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
498   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
499   const PetscScalar *x,*v;
500   PetscScalar       *y,sum1, sum2, sum3, sum4;
501   PetscErrorCode    ierr;
502   PetscInt          nonzerorow=0,n,i,jrow,j;
503   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
504 
505   PetscFunctionBegin;
506   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
507   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
508   idx  = a->j;
509   v    = a->a;
510   ii   = a->i;
511 
512   for (i=0; i<m; i++) {
513     jrow        = ii[i];
514     n           = ii[i+1] - jrow;
515     sum1        = 0.0;
516     sum2        = 0.0;
517     sum3        = 0.0;
518     sum4        = 0.0;
519     nonzerorow += (n>0);
520     for (j=0; j<n; j++) {
521       sum1 += v[jrow]*x[4*idx[jrow]];
522       sum2 += v[jrow]*x[4*idx[jrow]+1];
523       sum3 += v[jrow]*x[4*idx[jrow]+2];
524       sum4 += v[jrow]*x[4*idx[jrow]+3];
525       jrow++;
526     }
527     y[4*i]   = sum1;
528     y[4*i+1] = sum2;
529     y[4*i+2] = sum3;
530     y[4*i+3] = sum4;
531   }
532 
533   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
534   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
540 {
541   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
542   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
543   const PetscScalar *x,*v;
544   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
545   PetscErrorCode    ierr;
546   PetscInt          n,i;
547   const PetscInt    m = b->AIJ->rmap->n,*idx;
548 
549   PetscFunctionBegin;
550   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
551   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
552   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
553   for (i=0; i<m; i++) {
554     idx    = a->j + a->i[i];
555     v      = a->a + a->i[i];
556     n      = a->i[i+1] - a->i[i];
557     alpha1 = x[4*i];
558     alpha2 = x[4*i+1];
559     alpha3 = x[4*i+2];
560     alpha4 = x[4*i+3];
561     while (n-->0) {
562       y[4*(*idx)]   += alpha1*(*v);
563       y[4*(*idx)+1] += alpha2*(*v);
564       y[4*(*idx)+2] += alpha3*(*v);
565       y[4*(*idx)+3] += alpha4*(*v);
566       idx++; v++;
567     }
568   }
569   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
570   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
571   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
576 {
577   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
578   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
579   const PetscScalar *x,*v;
580   PetscScalar       *y,sum1, sum2, sum3, sum4;
581   PetscErrorCode    ierr;
582   PetscInt          n,i,jrow,j;
583   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
584 
585   PetscFunctionBegin;
586   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
587   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
588   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
589   idx  = a->j;
590   v    = a->a;
591   ii   = a->i;
592 
593   for (i=0; i<m; i++) {
594     jrow = ii[i];
595     n    = ii[i+1] - jrow;
596     sum1 = 0.0;
597     sum2 = 0.0;
598     sum3 = 0.0;
599     sum4 = 0.0;
600     for (j=0; j<n; j++) {
601       sum1 += v[jrow]*x[4*idx[jrow]];
602       sum2 += v[jrow]*x[4*idx[jrow]+1];
603       sum3 += v[jrow]*x[4*idx[jrow]+2];
604       sum4 += v[jrow]*x[4*idx[jrow]+3];
605       jrow++;
606     }
607     y[4*i]   += sum1;
608     y[4*i+1] += sum2;
609     y[4*i+2] += sum3;
610     y[4*i+3] += sum4;
611   }
612 
613   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
614   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
615   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
619 {
620   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
621   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
622   const PetscScalar *x,*v;
623   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
624   PetscErrorCode    ierr;
625   PetscInt          n,i;
626   const PetscInt    m = b->AIJ->rmap->n,*idx;
627 
628   PetscFunctionBegin;
629   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
630   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
631   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
632 
633   for (i=0; i<m; i++) {
634     idx    = a->j + a->i[i];
635     v      = a->a + a->i[i];
636     n      = a->i[i+1] - a->i[i];
637     alpha1 = x[4*i];
638     alpha2 = x[4*i+1];
639     alpha3 = x[4*i+2];
640     alpha4 = x[4*i+3];
641     while (n-->0) {
642       y[4*(*idx)]   += alpha1*(*v);
643       y[4*(*idx)+1] += alpha2*(*v);
644       y[4*(*idx)+2] += alpha3*(*v);
645       y[4*(*idx)+3] += alpha4*(*v);
646       idx++; v++;
647     }
648   }
649   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
650   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
651   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 /* ------------------------------------------------------------------------------*/
655 
656 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
657 {
658   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
659   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
660   const PetscScalar *x,*v;
661   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
662   PetscErrorCode    ierr;
663   PetscInt          nonzerorow=0,n,i,jrow,j;
664   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
665 
666   PetscFunctionBegin;
667   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
668   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
669   idx  = a->j;
670   v    = a->a;
671   ii   = a->i;
672 
673   for (i=0; i<m; i++) {
674     jrow  = ii[i];
675     n     = ii[i+1] - jrow;
676     sum1  = 0.0;
677     sum2  = 0.0;
678     sum3  = 0.0;
679     sum4  = 0.0;
680     sum5  = 0.0;
681 
682     nonzerorow += (n>0);
683     for (j=0; j<n; j++) {
684       sum1 += v[jrow]*x[5*idx[jrow]];
685       sum2 += v[jrow]*x[5*idx[jrow]+1];
686       sum3 += v[jrow]*x[5*idx[jrow]+2];
687       sum4 += v[jrow]*x[5*idx[jrow]+3];
688       sum5 += v[jrow]*x[5*idx[jrow]+4];
689       jrow++;
690     }
691     y[5*i]   = sum1;
692     y[5*i+1] = sum2;
693     y[5*i+2] = sum3;
694     y[5*i+3] = sum4;
695     y[5*i+4] = sum5;
696   }
697 
698   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
699   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
705 {
706   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
707   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
708   const PetscScalar *x,*v;
709   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
710   PetscErrorCode    ierr;
711   PetscInt          n,i;
712   const PetscInt    m = b->AIJ->rmap->n,*idx;
713 
714   PetscFunctionBegin;
715   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
716   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
717   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
718 
719   for (i=0; i<m; i++) {
720     idx    = a->j + a->i[i];
721     v      = a->a + a->i[i];
722     n      = a->i[i+1] - a->i[i];
723     alpha1 = x[5*i];
724     alpha2 = x[5*i+1];
725     alpha3 = x[5*i+2];
726     alpha4 = x[5*i+3];
727     alpha5 = x[5*i+4];
728     while (n-->0) {
729       y[5*(*idx)]   += alpha1*(*v);
730       y[5*(*idx)+1] += alpha2*(*v);
731       y[5*(*idx)+2] += alpha3*(*v);
732       y[5*(*idx)+3] += alpha4*(*v);
733       y[5*(*idx)+4] += alpha5*(*v);
734       idx++; v++;
735     }
736   }
737   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
738   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
739   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
744 {
745   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
746   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
747   const PetscScalar *x,*v;
748   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
749   PetscErrorCode    ierr;
750   PetscInt          n,i,jrow,j;
751   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
752 
753   PetscFunctionBegin;
754   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
755   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
756   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
757   idx  = a->j;
758   v    = a->a;
759   ii   = a->i;
760 
761   for (i=0; i<m; i++) {
762     jrow = ii[i];
763     n    = ii[i+1] - jrow;
764     sum1 = 0.0;
765     sum2 = 0.0;
766     sum3 = 0.0;
767     sum4 = 0.0;
768     sum5 = 0.0;
769     for (j=0; j<n; j++) {
770       sum1 += v[jrow]*x[5*idx[jrow]];
771       sum2 += v[jrow]*x[5*idx[jrow]+1];
772       sum3 += v[jrow]*x[5*idx[jrow]+2];
773       sum4 += v[jrow]*x[5*idx[jrow]+3];
774       sum5 += v[jrow]*x[5*idx[jrow]+4];
775       jrow++;
776     }
777     y[5*i]   += sum1;
778     y[5*i+1] += sum2;
779     y[5*i+2] += sum3;
780     y[5*i+3] += sum4;
781     y[5*i+4] += sum5;
782   }
783 
784   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
785   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
786   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
787   PetscFunctionReturn(0);
788 }
789 
790 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
791 {
792   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
793   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
794   const PetscScalar *x,*v;
795   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
796   PetscErrorCode    ierr;
797   PetscInt          n,i;
798   const PetscInt    m = b->AIJ->rmap->n,*idx;
799 
800   PetscFunctionBegin;
801   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
802   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
803   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
804 
805   for (i=0; i<m; i++) {
806     idx    = a->j + a->i[i];
807     v      = a->a + a->i[i];
808     n      = a->i[i+1] - a->i[i];
809     alpha1 = x[5*i];
810     alpha2 = x[5*i+1];
811     alpha3 = x[5*i+2];
812     alpha4 = x[5*i+3];
813     alpha5 = x[5*i+4];
814     while (n-->0) {
815       y[5*(*idx)]   += alpha1*(*v);
816       y[5*(*idx)+1] += alpha2*(*v);
817       y[5*(*idx)+2] += alpha3*(*v);
818       y[5*(*idx)+3] += alpha4*(*v);
819       y[5*(*idx)+4] += alpha5*(*v);
820       idx++; v++;
821     }
822   }
823   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
824   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
825   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 /* ------------------------------------------------------------------------------*/
830 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
831 {
832   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
833   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
834   const PetscScalar *x,*v;
835   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
836   PetscErrorCode    ierr;
837   PetscInt          nonzerorow=0,n,i,jrow,j;
838   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
839 
840   PetscFunctionBegin;
841   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
842   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
843   idx  = a->j;
844   v    = a->a;
845   ii   = a->i;
846 
847   for (i=0; i<m; i++) {
848     jrow  = ii[i];
849     n     = ii[i+1] - jrow;
850     sum1  = 0.0;
851     sum2  = 0.0;
852     sum3  = 0.0;
853     sum4  = 0.0;
854     sum5  = 0.0;
855     sum6  = 0.0;
856 
857     nonzerorow += (n>0);
858     for (j=0; j<n; j++) {
859       sum1 += v[jrow]*x[6*idx[jrow]];
860       sum2 += v[jrow]*x[6*idx[jrow]+1];
861       sum3 += v[jrow]*x[6*idx[jrow]+2];
862       sum4 += v[jrow]*x[6*idx[jrow]+3];
863       sum5 += v[jrow]*x[6*idx[jrow]+4];
864       sum6 += v[jrow]*x[6*idx[jrow]+5];
865       jrow++;
866     }
867     y[6*i]   = sum1;
868     y[6*i+1] = sum2;
869     y[6*i+2] = sum3;
870     y[6*i+3] = sum4;
871     y[6*i+4] = sum5;
872     y[6*i+5] = sum6;
873   }
874 
875   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
876   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
877   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
878   PetscFunctionReturn(0);
879 }
880 
881 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
882 {
883   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
884   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
885   const PetscScalar *x,*v;
886   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
887   PetscErrorCode    ierr;
888   PetscInt          n,i;
889   const PetscInt    m = b->AIJ->rmap->n,*idx;
890 
891   PetscFunctionBegin;
892   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
893   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
894   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
895 
896   for (i=0; i<m; i++) {
897     idx    = a->j + a->i[i];
898     v      = a->a + a->i[i];
899     n      = a->i[i+1] - a->i[i];
900     alpha1 = x[6*i];
901     alpha2 = x[6*i+1];
902     alpha3 = x[6*i+2];
903     alpha4 = x[6*i+3];
904     alpha5 = x[6*i+4];
905     alpha6 = x[6*i+5];
906     while (n-->0) {
907       y[6*(*idx)]   += alpha1*(*v);
908       y[6*(*idx)+1] += alpha2*(*v);
909       y[6*(*idx)+2] += alpha3*(*v);
910       y[6*(*idx)+3] += alpha4*(*v);
911       y[6*(*idx)+4] += alpha5*(*v);
912       y[6*(*idx)+5] += alpha6*(*v);
913       idx++; v++;
914     }
915   }
916   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
917   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
918   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
923 {
924   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
925   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
926   const PetscScalar *x,*v;
927   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
928   PetscErrorCode    ierr;
929   PetscInt          n,i,jrow,j;
930   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
931 
932   PetscFunctionBegin;
933   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
934   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
935   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
936   idx  = a->j;
937   v    = a->a;
938   ii   = a->i;
939 
940   for (i=0; i<m; i++) {
941     jrow = ii[i];
942     n    = ii[i+1] - jrow;
943     sum1 = 0.0;
944     sum2 = 0.0;
945     sum3 = 0.0;
946     sum4 = 0.0;
947     sum5 = 0.0;
948     sum6 = 0.0;
949     for (j=0; j<n; j++) {
950       sum1 += v[jrow]*x[6*idx[jrow]];
951       sum2 += v[jrow]*x[6*idx[jrow]+1];
952       sum3 += v[jrow]*x[6*idx[jrow]+2];
953       sum4 += v[jrow]*x[6*idx[jrow]+3];
954       sum5 += v[jrow]*x[6*idx[jrow]+4];
955       sum6 += v[jrow]*x[6*idx[jrow]+5];
956       jrow++;
957     }
958     y[6*i]   += sum1;
959     y[6*i+1] += sum2;
960     y[6*i+2] += sum3;
961     y[6*i+3] += sum4;
962     y[6*i+4] += sum5;
963     y[6*i+5] += sum6;
964   }
965 
966   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
967   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
968   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
973 {
974   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
975   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
976   const PetscScalar *x,*v;
977   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
978   PetscErrorCode    ierr;
979   PetscInt          n,i;
980   const PetscInt    m = b->AIJ->rmap->n,*idx;
981 
982   PetscFunctionBegin;
983   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
984   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
985   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
986 
987   for (i=0; i<m; i++) {
988     idx    = a->j + a->i[i];
989     v      = a->a + a->i[i];
990     n      = a->i[i+1] - a->i[i];
991     alpha1 = x[6*i];
992     alpha2 = x[6*i+1];
993     alpha3 = x[6*i+2];
994     alpha4 = x[6*i+3];
995     alpha5 = x[6*i+4];
996     alpha6 = x[6*i+5];
997     while (n-->0) {
998       y[6*(*idx)]   += alpha1*(*v);
999       y[6*(*idx)+1] += alpha2*(*v);
1000       y[6*(*idx)+2] += alpha3*(*v);
1001       y[6*(*idx)+3] += alpha4*(*v);
1002       y[6*(*idx)+4] += alpha5*(*v);
1003       y[6*(*idx)+5] += alpha6*(*v);
1004       idx++; v++;
1005     }
1006   }
1007   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1008   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1009   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 /* ------------------------------------------------------------------------------*/
1014 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1015 {
1016   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1017   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1018   const PetscScalar *x,*v;
1019   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1020   PetscErrorCode    ierr;
1021   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1022   PetscInt          nonzerorow=0,n,i,jrow,j;
1023 
1024   PetscFunctionBegin;
1025   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1026   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1027   idx  = a->j;
1028   v    = a->a;
1029   ii   = a->i;
1030 
1031   for (i=0; i<m; i++) {
1032     jrow  = ii[i];
1033     n     = ii[i+1] - jrow;
1034     sum1  = 0.0;
1035     sum2  = 0.0;
1036     sum3  = 0.0;
1037     sum4  = 0.0;
1038     sum5  = 0.0;
1039     sum6  = 0.0;
1040     sum7  = 0.0;
1041 
1042     nonzerorow += (n>0);
1043     for (j=0; j<n; j++) {
1044       sum1 += v[jrow]*x[7*idx[jrow]];
1045       sum2 += v[jrow]*x[7*idx[jrow]+1];
1046       sum3 += v[jrow]*x[7*idx[jrow]+2];
1047       sum4 += v[jrow]*x[7*idx[jrow]+3];
1048       sum5 += v[jrow]*x[7*idx[jrow]+4];
1049       sum6 += v[jrow]*x[7*idx[jrow]+5];
1050       sum7 += v[jrow]*x[7*idx[jrow]+6];
1051       jrow++;
1052     }
1053     y[7*i]   = sum1;
1054     y[7*i+1] = sum2;
1055     y[7*i+2] = sum3;
1056     y[7*i+3] = sum4;
1057     y[7*i+4] = sum5;
1058     y[7*i+5] = sum6;
1059     y[7*i+6] = sum7;
1060   }
1061 
1062   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1063   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1064   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1069 {
1070   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1071   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1072   const PetscScalar *x,*v;
1073   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1074   PetscErrorCode    ierr;
1075   const PetscInt    m = b->AIJ->rmap->n,*idx;
1076   PetscInt          n,i;
1077 
1078   PetscFunctionBegin;
1079   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1080   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1081   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1082 
1083   for (i=0; i<m; i++) {
1084     idx    = a->j + a->i[i];
1085     v      = a->a + a->i[i];
1086     n      = a->i[i+1] - a->i[i];
1087     alpha1 = x[7*i];
1088     alpha2 = x[7*i+1];
1089     alpha3 = x[7*i+2];
1090     alpha4 = x[7*i+3];
1091     alpha5 = x[7*i+4];
1092     alpha6 = x[7*i+5];
1093     alpha7 = x[7*i+6];
1094     while (n-->0) {
1095       y[7*(*idx)]   += alpha1*(*v);
1096       y[7*(*idx)+1] += alpha2*(*v);
1097       y[7*(*idx)+2] += alpha3*(*v);
1098       y[7*(*idx)+3] += alpha4*(*v);
1099       y[7*(*idx)+4] += alpha5*(*v);
1100       y[7*(*idx)+5] += alpha6*(*v);
1101       y[7*(*idx)+6] += alpha7*(*v);
1102       idx++; v++;
1103     }
1104   }
1105   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1106   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1107   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1112 {
1113   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1114   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1115   const PetscScalar *x,*v;
1116   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1117   PetscErrorCode    ierr;
1118   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1119   PetscInt          n,i,jrow,j;
1120 
1121   PetscFunctionBegin;
1122   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1123   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1124   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1125   idx  = a->j;
1126   v    = a->a;
1127   ii   = a->i;
1128 
1129   for (i=0; i<m; i++) {
1130     jrow = ii[i];
1131     n    = ii[i+1] - jrow;
1132     sum1 = 0.0;
1133     sum2 = 0.0;
1134     sum3 = 0.0;
1135     sum4 = 0.0;
1136     sum5 = 0.0;
1137     sum6 = 0.0;
1138     sum7 = 0.0;
1139     for (j=0; j<n; j++) {
1140       sum1 += v[jrow]*x[7*idx[jrow]];
1141       sum2 += v[jrow]*x[7*idx[jrow]+1];
1142       sum3 += v[jrow]*x[7*idx[jrow]+2];
1143       sum4 += v[jrow]*x[7*idx[jrow]+3];
1144       sum5 += v[jrow]*x[7*idx[jrow]+4];
1145       sum6 += v[jrow]*x[7*idx[jrow]+5];
1146       sum7 += v[jrow]*x[7*idx[jrow]+6];
1147       jrow++;
1148     }
1149     y[7*i]   += sum1;
1150     y[7*i+1] += sum2;
1151     y[7*i+2] += sum3;
1152     y[7*i+3] += sum4;
1153     y[7*i+4] += sum5;
1154     y[7*i+5] += sum6;
1155     y[7*i+6] += sum7;
1156   }
1157 
1158   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1159   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1160   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1165 {
1166   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1167   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1168   const PetscScalar *x,*v;
1169   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1170   PetscErrorCode    ierr;
1171   const PetscInt    m = b->AIJ->rmap->n,*idx;
1172   PetscInt          n,i;
1173 
1174   PetscFunctionBegin;
1175   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1176   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1177   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1178   for (i=0; i<m; i++) {
1179     idx    = a->j + a->i[i];
1180     v      = a->a + a->i[i];
1181     n      = a->i[i+1] - a->i[i];
1182     alpha1 = x[7*i];
1183     alpha2 = x[7*i+1];
1184     alpha3 = x[7*i+2];
1185     alpha4 = x[7*i+3];
1186     alpha5 = x[7*i+4];
1187     alpha6 = x[7*i+5];
1188     alpha7 = x[7*i+6];
1189     while (n-->0) {
1190       y[7*(*idx)]   += alpha1*(*v);
1191       y[7*(*idx)+1] += alpha2*(*v);
1192       y[7*(*idx)+2] += alpha3*(*v);
1193       y[7*(*idx)+3] += alpha4*(*v);
1194       y[7*(*idx)+4] += alpha5*(*v);
1195       y[7*(*idx)+5] += alpha6*(*v);
1196       y[7*(*idx)+6] += alpha7*(*v);
1197       idx++; v++;
1198     }
1199   }
1200   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1201   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1202   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1207 {
1208   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1209   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1210   const PetscScalar *x,*v;
1211   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1212   PetscErrorCode    ierr;
1213   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1214   PetscInt          nonzerorow=0,n,i,jrow,j;
1215 
1216   PetscFunctionBegin;
1217   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1218   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1219   idx  = a->j;
1220   v    = a->a;
1221   ii   = a->i;
1222 
1223   for (i=0; i<m; i++) {
1224     jrow  = ii[i];
1225     n     = ii[i+1] - jrow;
1226     sum1  = 0.0;
1227     sum2  = 0.0;
1228     sum3  = 0.0;
1229     sum4  = 0.0;
1230     sum5  = 0.0;
1231     sum6  = 0.0;
1232     sum7  = 0.0;
1233     sum8  = 0.0;
1234 
1235     nonzerorow += (n>0);
1236     for (j=0; j<n; j++) {
1237       sum1 += v[jrow]*x[8*idx[jrow]];
1238       sum2 += v[jrow]*x[8*idx[jrow]+1];
1239       sum3 += v[jrow]*x[8*idx[jrow]+2];
1240       sum4 += v[jrow]*x[8*idx[jrow]+3];
1241       sum5 += v[jrow]*x[8*idx[jrow]+4];
1242       sum6 += v[jrow]*x[8*idx[jrow]+5];
1243       sum7 += v[jrow]*x[8*idx[jrow]+6];
1244       sum8 += v[jrow]*x[8*idx[jrow]+7];
1245       jrow++;
1246     }
1247     y[8*i]   = sum1;
1248     y[8*i+1] = sum2;
1249     y[8*i+2] = sum3;
1250     y[8*i+3] = sum4;
1251     y[8*i+4] = sum5;
1252     y[8*i+5] = sum6;
1253     y[8*i+6] = sum7;
1254     y[8*i+7] = sum8;
1255   }
1256 
1257   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1258   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1259   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1264 {
1265   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1266   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1267   const PetscScalar *x,*v;
1268   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1269   PetscErrorCode    ierr;
1270   const PetscInt    m = b->AIJ->rmap->n,*idx;
1271   PetscInt          n,i;
1272 
1273   PetscFunctionBegin;
1274   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1275   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1276   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1277 
1278   for (i=0; i<m; i++) {
1279     idx    = a->j + a->i[i];
1280     v      = a->a + a->i[i];
1281     n      = a->i[i+1] - a->i[i];
1282     alpha1 = x[8*i];
1283     alpha2 = x[8*i+1];
1284     alpha3 = x[8*i+2];
1285     alpha4 = x[8*i+3];
1286     alpha5 = x[8*i+4];
1287     alpha6 = x[8*i+5];
1288     alpha7 = x[8*i+6];
1289     alpha8 = x[8*i+7];
1290     while (n-->0) {
1291       y[8*(*idx)]   += alpha1*(*v);
1292       y[8*(*idx)+1] += alpha2*(*v);
1293       y[8*(*idx)+2] += alpha3*(*v);
1294       y[8*(*idx)+3] += alpha4*(*v);
1295       y[8*(*idx)+4] += alpha5*(*v);
1296       y[8*(*idx)+5] += alpha6*(*v);
1297       y[8*(*idx)+6] += alpha7*(*v);
1298       y[8*(*idx)+7] += alpha8*(*v);
1299       idx++; v++;
1300     }
1301   }
1302   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1303   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1304   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1309 {
1310   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1311   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1312   const PetscScalar *x,*v;
1313   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1314   PetscErrorCode    ierr;
1315   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1316   PetscInt          n,i,jrow,j;
1317 
1318   PetscFunctionBegin;
1319   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1320   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1321   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1322   idx  = a->j;
1323   v    = a->a;
1324   ii   = a->i;
1325 
1326   for (i=0; i<m; i++) {
1327     jrow = ii[i];
1328     n    = ii[i+1] - jrow;
1329     sum1 = 0.0;
1330     sum2 = 0.0;
1331     sum3 = 0.0;
1332     sum4 = 0.0;
1333     sum5 = 0.0;
1334     sum6 = 0.0;
1335     sum7 = 0.0;
1336     sum8 = 0.0;
1337     for (j=0; j<n; j++) {
1338       sum1 += v[jrow]*x[8*idx[jrow]];
1339       sum2 += v[jrow]*x[8*idx[jrow]+1];
1340       sum3 += v[jrow]*x[8*idx[jrow]+2];
1341       sum4 += v[jrow]*x[8*idx[jrow]+3];
1342       sum5 += v[jrow]*x[8*idx[jrow]+4];
1343       sum6 += v[jrow]*x[8*idx[jrow]+5];
1344       sum7 += v[jrow]*x[8*idx[jrow]+6];
1345       sum8 += v[jrow]*x[8*idx[jrow]+7];
1346       jrow++;
1347     }
1348     y[8*i]   += sum1;
1349     y[8*i+1] += sum2;
1350     y[8*i+2] += sum3;
1351     y[8*i+3] += sum4;
1352     y[8*i+4] += sum5;
1353     y[8*i+5] += sum6;
1354     y[8*i+6] += sum7;
1355     y[8*i+7] += sum8;
1356   }
1357 
1358   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1359   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1360   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1365 {
1366   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1367   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1368   const PetscScalar *x,*v;
1369   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1370   PetscErrorCode    ierr;
1371   const PetscInt    m = b->AIJ->rmap->n,*idx;
1372   PetscInt          n,i;
1373 
1374   PetscFunctionBegin;
1375   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1376   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1377   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1378   for (i=0; i<m; i++) {
1379     idx    = a->j + a->i[i];
1380     v      = a->a + a->i[i];
1381     n      = a->i[i+1] - a->i[i];
1382     alpha1 = x[8*i];
1383     alpha2 = x[8*i+1];
1384     alpha3 = x[8*i+2];
1385     alpha4 = x[8*i+3];
1386     alpha5 = x[8*i+4];
1387     alpha6 = x[8*i+5];
1388     alpha7 = x[8*i+6];
1389     alpha8 = x[8*i+7];
1390     while (n-->0) {
1391       y[8*(*idx)]   += alpha1*(*v);
1392       y[8*(*idx)+1] += alpha2*(*v);
1393       y[8*(*idx)+2] += alpha3*(*v);
1394       y[8*(*idx)+3] += alpha4*(*v);
1395       y[8*(*idx)+4] += alpha5*(*v);
1396       y[8*(*idx)+5] += alpha6*(*v);
1397       y[8*(*idx)+6] += alpha7*(*v);
1398       y[8*(*idx)+7] += alpha8*(*v);
1399       idx++; v++;
1400     }
1401   }
1402   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1403   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1404   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /* ------------------------------------------------------------------------------*/
1409 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1410 {
1411   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1412   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1413   const PetscScalar *x,*v;
1414   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1415   PetscErrorCode    ierr;
1416   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1417   PetscInt          nonzerorow=0,n,i,jrow,j;
1418 
1419   PetscFunctionBegin;
1420   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1421   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1422   idx  = a->j;
1423   v    = a->a;
1424   ii   = a->i;
1425 
1426   for (i=0; i<m; i++) {
1427     jrow  = ii[i];
1428     n     = ii[i+1] - jrow;
1429     sum1  = 0.0;
1430     sum2  = 0.0;
1431     sum3  = 0.0;
1432     sum4  = 0.0;
1433     sum5  = 0.0;
1434     sum6  = 0.0;
1435     sum7  = 0.0;
1436     sum8  = 0.0;
1437     sum9  = 0.0;
1438 
1439     nonzerorow += (n>0);
1440     for (j=0; j<n; j++) {
1441       sum1 += v[jrow]*x[9*idx[jrow]];
1442       sum2 += v[jrow]*x[9*idx[jrow]+1];
1443       sum3 += v[jrow]*x[9*idx[jrow]+2];
1444       sum4 += v[jrow]*x[9*idx[jrow]+3];
1445       sum5 += v[jrow]*x[9*idx[jrow]+4];
1446       sum6 += v[jrow]*x[9*idx[jrow]+5];
1447       sum7 += v[jrow]*x[9*idx[jrow]+6];
1448       sum8 += v[jrow]*x[9*idx[jrow]+7];
1449       sum9 += v[jrow]*x[9*idx[jrow]+8];
1450       jrow++;
1451     }
1452     y[9*i]   = sum1;
1453     y[9*i+1] = sum2;
1454     y[9*i+2] = sum3;
1455     y[9*i+3] = sum4;
1456     y[9*i+4] = sum5;
1457     y[9*i+5] = sum6;
1458     y[9*i+6] = sum7;
1459     y[9*i+7] = sum8;
1460     y[9*i+8] = sum9;
1461   }
1462 
1463   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1464   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1465   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 /* ------------------------------------------------------------------------------*/
1470 
1471 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1472 {
1473   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1474   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1475   const PetscScalar *x,*v;
1476   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1477   PetscErrorCode    ierr;
1478   const PetscInt    m = b->AIJ->rmap->n,*idx;
1479   PetscInt          n,i;
1480 
1481   PetscFunctionBegin;
1482   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1483   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1484   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1485 
1486   for (i=0; i<m; i++) {
1487     idx    = a->j + a->i[i];
1488     v      = a->a + a->i[i];
1489     n      = a->i[i+1] - a->i[i];
1490     alpha1 = x[9*i];
1491     alpha2 = x[9*i+1];
1492     alpha3 = x[9*i+2];
1493     alpha4 = x[9*i+3];
1494     alpha5 = x[9*i+4];
1495     alpha6 = x[9*i+5];
1496     alpha7 = x[9*i+6];
1497     alpha8 = x[9*i+7];
1498     alpha9 = x[9*i+8];
1499     while (n-->0) {
1500       y[9*(*idx)]   += alpha1*(*v);
1501       y[9*(*idx)+1] += alpha2*(*v);
1502       y[9*(*idx)+2] += alpha3*(*v);
1503       y[9*(*idx)+3] += alpha4*(*v);
1504       y[9*(*idx)+4] += alpha5*(*v);
1505       y[9*(*idx)+5] += alpha6*(*v);
1506       y[9*(*idx)+6] += alpha7*(*v);
1507       y[9*(*idx)+7] += alpha8*(*v);
1508       y[9*(*idx)+8] += alpha9*(*v);
1509       idx++; v++;
1510     }
1511   }
1512   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1513   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1514   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1519 {
1520   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1521   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1522   const PetscScalar *x,*v;
1523   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1524   PetscErrorCode    ierr;
1525   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1526   PetscInt          n,i,jrow,j;
1527 
1528   PetscFunctionBegin;
1529   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1530   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1531   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1532   idx  = a->j;
1533   v    = a->a;
1534   ii   = a->i;
1535 
1536   for (i=0; i<m; i++) {
1537     jrow = ii[i];
1538     n    = ii[i+1] - jrow;
1539     sum1 = 0.0;
1540     sum2 = 0.0;
1541     sum3 = 0.0;
1542     sum4 = 0.0;
1543     sum5 = 0.0;
1544     sum6 = 0.0;
1545     sum7 = 0.0;
1546     sum8 = 0.0;
1547     sum9 = 0.0;
1548     for (j=0; j<n; j++) {
1549       sum1 += v[jrow]*x[9*idx[jrow]];
1550       sum2 += v[jrow]*x[9*idx[jrow]+1];
1551       sum3 += v[jrow]*x[9*idx[jrow]+2];
1552       sum4 += v[jrow]*x[9*idx[jrow]+3];
1553       sum5 += v[jrow]*x[9*idx[jrow]+4];
1554       sum6 += v[jrow]*x[9*idx[jrow]+5];
1555       sum7 += v[jrow]*x[9*idx[jrow]+6];
1556       sum8 += v[jrow]*x[9*idx[jrow]+7];
1557       sum9 += v[jrow]*x[9*idx[jrow]+8];
1558       jrow++;
1559     }
1560     y[9*i]   += sum1;
1561     y[9*i+1] += sum2;
1562     y[9*i+2] += sum3;
1563     y[9*i+3] += sum4;
1564     y[9*i+4] += sum5;
1565     y[9*i+5] += sum6;
1566     y[9*i+6] += sum7;
1567     y[9*i+7] += sum8;
1568     y[9*i+8] += sum9;
1569   }
1570 
1571   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1572   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1573   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1578 {
1579   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1580   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1581   const PetscScalar *x,*v;
1582   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1583   PetscErrorCode    ierr;
1584   const PetscInt    m = b->AIJ->rmap->n,*idx;
1585   PetscInt          n,i;
1586 
1587   PetscFunctionBegin;
1588   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1589   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1590   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1591   for (i=0; i<m; i++) {
1592     idx    = a->j + a->i[i];
1593     v      = a->a + a->i[i];
1594     n      = a->i[i+1] - a->i[i];
1595     alpha1 = x[9*i];
1596     alpha2 = x[9*i+1];
1597     alpha3 = x[9*i+2];
1598     alpha4 = x[9*i+3];
1599     alpha5 = x[9*i+4];
1600     alpha6 = x[9*i+5];
1601     alpha7 = x[9*i+6];
1602     alpha8 = x[9*i+7];
1603     alpha9 = x[9*i+8];
1604     while (n-->0) {
1605       y[9*(*idx)]   += alpha1*(*v);
1606       y[9*(*idx)+1] += alpha2*(*v);
1607       y[9*(*idx)+2] += alpha3*(*v);
1608       y[9*(*idx)+3] += alpha4*(*v);
1609       y[9*(*idx)+4] += alpha5*(*v);
1610       y[9*(*idx)+5] += alpha6*(*v);
1611       y[9*(*idx)+6] += alpha7*(*v);
1612       y[9*(*idx)+7] += alpha8*(*v);
1613       y[9*(*idx)+8] += alpha9*(*v);
1614       idx++; v++;
1615     }
1616   }
1617   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1618   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1619   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1623 {
1624   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1625   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1626   const PetscScalar *x,*v;
1627   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1628   PetscErrorCode    ierr;
1629   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1630   PetscInt          nonzerorow=0,n,i,jrow,j;
1631 
1632   PetscFunctionBegin;
1633   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1634   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1635   idx  = a->j;
1636   v    = a->a;
1637   ii   = a->i;
1638 
1639   for (i=0; i<m; i++) {
1640     jrow  = ii[i];
1641     n     = ii[i+1] - jrow;
1642     sum1  = 0.0;
1643     sum2  = 0.0;
1644     sum3  = 0.0;
1645     sum4  = 0.0;
1646     sum5  = 0.0;
1647     sum6  = 0.0;
1648     sum7  = 0.0;
1649     sum8  = 0.0;
1650     sum9  = 0.0;
1651     sum10 = 0.0;
1652 
1653     nonzerorow += (n>0);
1654     for (j=0; j<n; j++) {
1655       sum1  += v[jrow]*x[10*idx[jrow]];
1656       sum2  += v[jrow]*x[10*idx[jrow]+1];
1657       sum3  += v[jrow]*x[10*idx[jrow]+2];
1658       sum4  += v[jrow]*x[10*idx[jrow]+3];
1659       sum5  += v[jrow]*x[10*idx[jrow]+4];
1660       sum6  += v[jrow]*x[10*idx[jrow]+5];
1661       sum7  += v[jrow]*x[10*idx[jrow]+6];
1662       sum8  += v[jrow]*x[10*idx[jrow]+7];
1663       sum9  += v[jrow]*x[10*idx[jrow]+8];
1664       sum10 += v[jrow]*x[10*idx[jrow]+9];
1665       jrow++;
1666     }
1667     y[10*i]   = sum1;
1668     y[10*i+1] = sum2;
1669     y[10*i+2] = sum3;
1670     y[10*i+3] = sum4;
1671     y[10*i+4] = sum5;
1672     y[10*i+5] = sum6;
1673     y[10*i+6] = sum7;
1674     y[10*i+7] = sum8;
1675     y[10*i+8] = sum9;
1676     y[10*i+9] = sum10;
1677   }
1678 
1679   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1680   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1681   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1686 {
1687   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1688   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1689   const PetscScalar *x,*v;
1690   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1691   PetscErrorCode    ierr;
1692   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1693   PetscInt          n,i,jrow,j;
1694 
1695   PetscFunctionBegin;
1696   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1697   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1698   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1699   idx  = a->j;
1700   v    = a->a;
1701   ii   = a->i;
1702 
1703   for (i=0; i<m; i++) {
1704     jrow  = ii[i];
1705     n     = ii[i+1] - jrow;
1706     sum1  = 0.0;
1707     sum2  = 0.0;
1708     sum3  = 0.0;
1709     sum4  = 0.0;
1710     sum5  = 0.0;
1711     sum6  = 0.0;
1712     sum7  = 0.0;
1713     sum8  = 0.0;
1714     sum9  = 0.0;
1715     sum10 = 0.0;
1716     for (j=0; j<n; j++) {
1717       sum1  += v[jrow]*x[10*idx[jrow]];
1718       sum2  += v[jrow]*x[10*idx[jrow]+1];
1719       sum3  += v[jrow]*x[10*idx[jrow]+2];
1720       sum4  += v[jrow]*x[10*idx[jrow]+3];
1721       sum5  += v[jrow]*x[10*idx[jrow]+4];
1722       sum6  += v[jrow]*x[10*idx[jrow]+5];
1723       sum7  += v[jrow]*x[10*idx[jrow]+6];
1724       sum8  += v[jrow]*x[10*idx[jrow]+7];
1725       sum9  += v[jrow]*x[10*idx[jrow]+8];
1726       sum10 += v[jrow]*x[10*idx[jrow]+9];
1727       jrow++;
1728     }
1729     y[10*i]   += sum1;
1730     y[10*i+1] += sum2;
1731     y[10*i+2] += sum3;
1732     y[10*i+3] += sum4;
1733     y[10*i+4] += sum5;
1734     y[10*i+5] += sum6;
1735     y[10*i+6] += sum7;
1736     y[10*i+7] += sum8;
1737     y[10*i+8] += sum9;
1738     y[10*i+9] += sum10;
1739   }
1740 
1741   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1742   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1743   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1748 {
1749   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1750   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1751   const PetscScalar *x,*v;
1752   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1753   PetscErrorCode    ierr;
1754   const PetscInt    m = b->AIJ->rmap->n,*idx;
1755   PetscInt          n,i;
1756 
1757   PetscFunctionBegin;
1758   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1759   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1760   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1761 
1762   for (i=0; i<m; i++) {
1763     idx     = a->j + a->i[i];
1764     v       = a->a + a->i[i];
1765     n       = a->i[i+1] - a->i[i];
1766     alpha1  = x[10*i];
1767     alpha2  = x[10*i+1];
1768     alpha3  = x[10*i+2];
1769     alpha4  = x[10*i+3];
1770     alpha5  = x[10*i+4];
1771     alpha6  = x[10*i+5];
1772     alpha7  = x[10*i+6];
1773     alpha8  = x[10*i+7];
1774     alpha9  = x[10*i+8];
1775     alpha10 = x[10*i+9];
1776     while (n-->0) {
1777       y[10*(*idx)]   += alpha1*(*v);
1778       y[10*(*idx)+1] += alpha2*(*v);
1779       y[10*(*idx)+2] += alpha3*(*v);
1780       y[10*(*idx)+3] += alpha4*(*v);
1781       y[10*(*idx)+4] += alpha5*(*v);
1782       y[10*(*idx)+5] += alpha6*(*v);
1783       y[10*(*idx)+6] += alpha7*(*v);
1784       y[10*(*idx)+7] += alpha8*(*v);
1785       y[10*(*idx)+8] += alpha9*(*v);
1786       y[10*(*idx)+9] += alpha10*(*v);
1787       idx++; v++;
1788     }
1789   }
1790   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1791   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1792   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1797 {
1798   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1799   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1800   const PetscScalar *x,*v;
1801   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1802   PetscErrorCode    ierr;
1803   const PetscInt    m = b->AIJ->rmap->n,*idx;
1804   PetscInt          n,i;
1805 
1806   PetscFunctionBegin;
1807   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1808   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1809   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1810   for (i=0; i<m; i++) {
1811     idx     = a->j + a->i[i];
1812     v       = a->a + a->i[i];
1813     n       = a->i[i+1] - a->i[i];
1814     alpha1  = x[10*i];
1815     alpha2  = x[10*i+1];
1816     alpha3  = x[10*i+2];
1817     alpha4  = x[10*i+3];
1818     alpha5  = x[10*i+4];
1819     alpha6  = x[10*i+5];
1820     alpha7  = x[10*i+6];
1821     alpha8  = x[10*i+7];
1822     alpha9  = x[10*i+8];
1823     alpha10 = x[10*i+9];
1824     while (n-->0) {
1825       y[10*(*idx)]   += alpha1*(*v);
1826       y[10*(*idx)+1] += alpha2*(*v);
1827       y[10*(*idx)+2] += alpha3*(*v);
1828       y[10*(*idx)+3] += alpha4*(*v);
1829       y[10*(*idx)+4] += alpha5*(*v);
1830       y[10*(*idx)+5] += alpha6*(*v);
1831       y[10*(*idx)+6] += alpha7*(*v);
1832       y[10*(*idx)+7] += alpha8*(*v);
1833       y[10*(*idx)+8] += alpha9*(*v);
1834       y[10*(*idx)+9] += alpha10*(*v);
1835       idx++; v++;
1836     }
1837   }
1838   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1839   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1840   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 
1845 /*--------------------------------------------------------------------------------------------*/
1846 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1847 {
1848   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1849   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1850   const PetscScalar *x,*v;
1851   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1852   PetscErrorCode    ierr;
1853   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1854   PetscInt          nonzerorow=0,n,i,jrow,j;
1855 
1856   PetscFunctionBegin;
1857   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1858   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1859   idx  = a->j;
1860   v    = a->a;
1861   ii   = a->i;
1862 
1863   for (i=0; i<m; i++) {
1864     jrow  = ii[i];
1865     n     = ii[i+1] - jrow;
1866     sum1  = 0.0;
1867     sum2  = 0.0;
1868     sum3  = 0.0;
1869     sum4  = 0.0;
1870     sum5  = 0.0;
1871     sum6  = 0.0;
1872     sum7  = 0.0;
1873     sum8  = 0.0;
1874     sum9  = 0.0;
1875     sum10 = 0.0;
1876     sum11 = 0.0;
1877 
1878     nonzerorow += (n>0);
1879     for (j=0; j<n; j++) {
1880       sum1  += v[jrow]*x[11*idx[jrow]];
1881       sum2  += v[jrow]*x[11*idx[jrow]+1];
1882       sum3  += v[jrow]*x[11*idx[jrow]+2];
1883       sum4  += v[jrow]*x[11*idx[jrow]+3];
1884       sum5  += v[jrow]*x[11*idx[jrow]+4];
1885       sum6  += v[jrow]*x[11*idx[jrow]+5];
1886       sum7  += v[jrow]*x[11*idx[jrow]+6];
1887       sum8  += v[jrow]*x[11*idx[jrow]+7];
1888       sum9  += v[jrow]*x[11*idx[jrow]+8];
1889       sum10 += v[jrow]*x[11*idx[jrow]+9];
1890       sum11 += v[jrow]*x[11*idx[jrow]+10];
1891       jrow++;
1892     }
1893     y[11*i]    = sum1;
1894     y[11*i+1]  = sum2;
1895     y[11*i+2]  = sum3;
1896     y[11*i+3]  = sum4;
1897     y[11*i+4]  = sum5;
1898     y[11*i+5]  = sum6;
1899     y[11*i+6]  = sum7;
1900     y[11*i+7]  = sum8;
1901     y[11*i+8]  = sum9;
1902     y[11*i+9]  = sum10;
1903     y[11*i+10] = sum11;
1904   }
1905 
1906   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1907   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1908   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1913 {
1914   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1915   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1916   const PetscScalar *x,*v;
1917   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1918   PetscErrorCode    ierr;
1919   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1920   PetscInt          n,i,jrow,j;
1921 
1922   PetscFunctionBegin;
1923   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1924   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1925   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1926   idx  = a->j;
1927   v    = a->a;
1928   ii   = a->i;
1929 
1930   for (i=0; i<m; i++) {
1931     jrow  = ii[i];
1932     n     = ii[i+1] - jrow;
1933     sum1  = 0.0;
1934     sum2  = 0.0;
1935     sum3  = 0.0;
1936     sum4  = 0.0;
1937     sum5  = 0.0;
1938     sum6  = 0.0;
1939     sum7  = 0.0;
1940     sum8  = 0.0;
1941     sum9  = 0.0;
1942     sum10 = 0.0;
1943     sum11 = 0.0;
1944     for (j=0; j<n; j++) {
1945       sum1  += v[jrow]*x[11*idx[jrow]];
1946       sum2  += v[jrow]*x[11*idx[jrow]+1];
1947       sum3  += v[jrow]*x[11*idx[jrow]+2];
1948       sum4  += v[jrow]*x[11*idx[jrow]+3];
1949       sum5  += v[jrow]*x[11*idx[jrow]+4];
1950       sum6  += v[jrow]*x[11*idx[jrow]+5];
1951       sum7  += v[jrow]*x[11*idx[jrow]+6];
1952       sum8  += v[jrow]*x[11*idx[jrow]+7];
1953       sum9  += v[jrow]*x[11*idx[jrow]+8];
1954       sum10 += v[jrow]*x[11*idx[jrow]+9];
1955       sum11 += v[jrow]*x[11*idx[jrow]+10];
1956       jrow++;
1957     }
1958     y[11*i]    += sum1;
1959     y[11*i+1]  += sum2;
1960     y[11*i+2]  += sum3;
1961     y[11*i+3]  += sum4;
1962     y[11*i+4]  += sum5;
1963     y[11*i+5]  += sum6;
1964     y[11*i+6]  += sum7;
1965     y[11*i+7]  += sum8;
1966     y[11*i+8]  += sum9;
1967     y[11*i+9]  += sum10;
1968     y[11*i+10] += sum11;
1969   }
1970 
1971   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
1972   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1973   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1978 {
1979   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1980   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1981   const PetscScalar *x,*v;
1982   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1983   PetscErrorCode    ierr;
1984   const PetscInt    m = b->AIJ->rmap->n,*idx;
1985   PetscInt          n,i;
1986 
1987   PetscFunctionBegin;
1988   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1989   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1990   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1991 
1992   for (i=0; i<m; i++) {
1993     idx     = a->j + a->i[i];
1994     v       = a->a + a->i[i];
1995     n       = a->i[i+1] - a->i[i];
1996     alpha1  = x[11*i];
1997     alpha2  = x[11*i+1];
1998     alpha3  = x[11*i+2];
1999     alpha4  = x[11*i+3];
2000     alpha5  = x[11*i+4];
2001     alpha6  = x[11*i+5];
2002     alpha7  = x[11*i+6];
2003     alpha8  = x[11*i+7];
2004     alpha9  = x[11*i+8];
2005     alpha10 = x[11*i+9];
2006     alpha11 = x[11*i+10];
2007     while (n-->0) {
2008       y[11*(*idx)]    += alpha1*(*v);
2009       y[11*(*idx)+1]  += alpha2*(*v);
2010       y[11*(*idx)+2]  += alpha3*(*v);
2011       y[11*(*idx)+3]  += alpha4*(*v);
2012       y[11*(*idx)+4]  += alpha5*(*v);
2013       y[11*(*idx)+5]  += alpha6*(*v);
2014       y[11*(*idx)+6]  += alpha7*(*v);
2015       y[11*(*idx)+7]  += alpha8*(*v);
2016       y[11*(*idx)+8]  += alpha9*(*v);
2017       y[11*(*idx)+9]  += alpha10*(*v);
2018       y[11*(*idx)+10] += alpha11*(*v);
2019       idx++; v++;
2020     }
2021   }
2022   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2023   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2024   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2029 {
2030   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2031   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2032   const PetscScalar *x,*v;
2033   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2034   PetscErrorCode    ierr;
2035   const PetscInt    m = b->AIJ->rmap->n,*idx;
2036   PetscInt          n,i;
2037 
2038   PetscFunctionBegin;
2039   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2040   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2041   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2042   for (i=0; i<m; i++) {
2043     idx     = a->j + a->i[i];
2044     v       = a->a + a->i[i];
2045     n       = a->i[i+1] - a->i[i];
2046     alpha1  = x[11*i];
2047     alpha2  = x[11*i+1];
2048     alpha3  = x[11*i+2];
2049     alpha4  = x[11*i+3];
2050     alpha5  = x[11*i+4];
2051     alpha6  = x[11*i+5];
2052     alpha7  = x[11*i+6];
2053     alpha8  = x[11*i+7];
2054     alpha9  = x[11*i+8];
2055     alpha10 = x[11*i+9];
2056     alpha11 = x[11*i+10];
2057     while (n-->0) {
2058       y[11*(*idx)]    += alpha1*(*v);
2059       y[11*(*idx)+1]  += alpha2*(*v);
2060       y[11*(*idx)+2]  += alpha3*(*v);
2061       y[11*(*idx)+3]  += alpha4*(*v);
2062       y[11*(*idx)+4]  += alpha5*(*v);
2063       y[11*(*idx)+5]  += alpha6*(*v);
2064       y[11*(*idx)+6]  += alpha7*(*v);
2065       y[11*(*idx)+7]  += alpha8*(*v);
2066       y[11*(*idx)+8]  += alpha9*(*v);
2067       y[11*(*idx)+9]  += alpha10*(*v);
2068       y[11*(*idx)+10] += alpha11*(*v);
2069       idx++; v++;
2070     }
2071   }
2072   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2073   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2074   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 
2079 /*--------------------------------------------------------------------------------------------*/
2080 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2081 {
2082   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2083   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2084   const PetscScalar *x,*v;
2085   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2086   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2087   PetscErrorCode    ierr;
2088   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2089   PetscInt          nonzerorow=0,n,i,jrow,j;
2090 
2091   PetscFunctionBegin;
2092   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2093   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2094   idx  = a->j;
2095   v    = a->a;
2096   ii   = a->i;
2097 
2098   for (i=0; i<m; i++) {
2099     jrow  = ii[i];
2100     n     = ii[i+1] - jrow;
2101     sum1  = 0.0;
2102     sum2  = 0.0;
2103     sum3  = 0.0;
2104     sum4  = 0.0;
2105     sum5  = 0.0;
2106     sum6  = 0.0;
2107     sum7  = 0.0;
2108     sum8  = 0.0;
2109     sum9  = 0.0;
2110     sum10 = 0.0;
2111     sum11 = 0.0;
2112     sum12 = 0.0;
2113     sum13 = 0.0;
2114     sum14 = 0.0;
2115     sum15 = 0.0;
2116     sum16 = 0.0;
2117 
2118     nonzerorow += (n>0);
2119     for (j=0; j<n; j++) {
2120       sum1  += v[jrow]*x[16*idx[jrow]];
2121       sum2  += v[jrow]*x[16*idx[jrow]+1];
2122       sum3  += v[jrow]*x[16*idx[jrow]+2];
2123       sum4  += v[jrow]*x[16*idx[jrow]+3];
2124       sum5  += v[jrow]*x[16*idx[jrow]+4];
2125       sum6  += v[jrow]*x[16*idx[jrow]+5];
2126       sum7  += v[jrow]*x[16*idx[jrow]+6];
2127       sum8  += v[jrow]*x[16*idx[jrow]+7];
2128       sum9  += v[jrow]*x[16*idx[jrow]+8];
2129       sum10 += v[jrow]*x[16*idx[jrow]+9];
2130       sum11 += v[jrow]*x[16*idx[jrow]+10];
2131       sum12 += v[jrow]*x[16*idx[jrow]+11];
2132       sum13 += v[jrow]*x[16*idx[jrow]+12];
2133       sum14 += v[jrow]*x[16*idx[jrow]+13];
2134       sum15 += v[jrow]*x[16*idx[jrow]+14];
2135       sum16 += v[jrow]*x[16*idx[jrow]+15];
2136       jrow++;
2137     }
2138     y[16*i]    = sum1;
2139     y[16*i+1]  = sum2;
2140     y[16*i+2]  = sum3;
2141     y[16*i+3]  = sum4;
2142     y[16*i+4]  = sum5;
2143     y[16*i+5]  = sum6;
2144     y[16*i+6]  = sum7;
2145     y[16*i+7]  = sum8;
2146     y[16*i+8]  = sum9;
2147     y[16*i+9]  = sum10;
2148     y[16*i+10] = sum11;
2149     y[16*i+11] = sum12;
2150     y[16*i+12] = sum13;
2151     y[16*i+13] = sum14;
2152     y[16*i+14] = sum15;
2153     y[16*i+15] = sum16;
2154   }
2155 
2156   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2157   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2158   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2163 {
2164   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2165   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2166   const PetscScalar *x,*v;
2167   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2168   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2169   PetscErrorCode    ierr;
2170   const PetscInt    m = b->AIJ->rmap->n,*idx;
2171   PetscInt          n,i;
2172 
2173   PetscFunctionBegin;
2174   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2175   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2176   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2177 
2178   for (i=0; i<m; i++) {
2179     idx     = a->j + a->i[i];
2180     v       = a->a + a->i[i];
2181     n       = a->i[i+1] - a->i[i];
2182     alpha1  = x[16*i];
2183     alpha2  = x[16*i+1];
2184     alpha3  = x[16*i+2];
2185     alpha4  = x[16*i+3];
2186     alpha5  = x[16*i+4];
2187     alpha6  = x[16*i+5];
2188     alpha7  = x[16*i+6];
2189     alpha8  = x[16*i+7];
2190     alpha9  = x[16*i+8];
2191     alpha10 = x[16*i+9];
2192     alpha11 = x[16*i+10];
2193     alpha12 = x[16*i+11];
2194     alpha13 = x[16*i+12];
2195     alpha14 = x[16*i+13];
2196     alpha15 = x[16*i+14];
2197     alpha16 = x[16*i+15];
2198     while (n-->0) {
2199       y[16*(*idx)]    += alpha1*(*v);
2200       y[16*(*idx)+1]  += alpha2*(*v);
2201       y[16*(*idx)+2]  += alpha3*(*v);
2202       y[16*(*idx)+3]  += alpha4*(*v);
2203       y[16*(*idx)+4]  += alpha5*(*v);
2204       y[16*(*idx)+5]  += alpha6*(*v);
2205       y[16*(*idx)+6]  += alpha7*(*v);
2206       y[16*(*idx)+7]  += alpha8*(*v);
2207       y[16*(*idx)+8]  += alpha9*(*v);
2208       y[16*(*idx)+9]  += alpha10*(*v);
2209       y[16*(*idx)+10] += alpha11*(*v);
2210       y[16*(*idx)+11] += alpha12*(*v);
2211       y[16*(*idx)+12] += alpha13*(*v);
2212       y[16*(*idx)+13] += alpha14*(*v);
2213       y[16*(*idx)+14] += alpha15*(*v);
2214       y[16*(*idx)+15] += alpha16*(*v);
2215       idx++; v++;
2216     }
2217   }
2218   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2219   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2220   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2225 {
2226   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2227   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2228   const PetscScalar *x,*v;
2229   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2230   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2231   PetscErrorCode    ierr;
2232   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2233   PetscInt          n,i,jrow,j;
2234 
2235   PetscFunctionBegin;
2236   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2237   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2238   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2239   idx  = a->j;
2240   v    = a->a;
2241   ii   = a->i;
2242 
2243   for (i=0; i<m; i++) {
2244     jrow  = ii[i];
2245     n     = ii[i+1] - jrow;
2246     sum1  = 0.0;
2247     sum2  = 0.0;
2248     sum3  = 0.0;
2249     sum4  = 0.0;
2250     sum5  = 0.0;
2251     sum6  = 0.0;
2252     sum7  = 0.0;
2253     sum8  = 0.0;
2254     sum9  = 0.0;
2255     sum10 = 0.0;
2256     sum11 = 0.0;
2257     sum12 = 0.0;
2258     sum13 = 0.0;
2259     sum14 = 0.0;
2260     sum15 = 0.0;
2261     sum16 = 0.0;
2262     for (j=0; j<n; j++) {
2263       sum1  += v[jrow]*x[16*idx[jrow]];
2264       sum2  += v[jrow]*x[16*idx[jrow]+1];
2265       sum3  += v[jrow]*x[16*idx[jrow]+2];
2266       sum4  += v[jrow]*x[16*idx[jrow]+3];
2267       sum5  += v[jrow]*x[16*idx[jrow]+4];
2268       sum6  += v[jrow]*x[16*idx[jrow]+5];
2269       sum7  += v[jrow]*x[16*idx[jrow]+6];
2270       sum8  += v[jrow]*x[16*idx[jrow]+7];
2271       sum9  += v[jrow]*x[16*idx[jrow]+8];
2272       sum10 += v[jrow]*x[16*idx[jrow]+9];
2273       sum11 += v[jrow]*x[16*idx[jrow]+10];
2274       sum12 += v[jrow]*x[16*idx[jrow]+11];
2275       sum13 += v[jrow]*x[16*idx[jrow]+12];
2276       sum14 += v[jrow]*x[16*idx[jrow]+13];
2277       sum15 += v[jrow]*x[16*idx[jrow]+14];
2278       sum16 += v[jrow]*x[16*idx[jrow]+15];
2279       jrow++;
2280     }
2281     y[16*i]    += sum1;
2282     y[16*i+1]  += sum2;
2283     y[16*i+2]  += sum3;
2284     y[16*i+3]  += sum4;
2285     y[16*i+4]  += sum5;
2286     y[16*i+5]  += sum6;
2287     y[16*i+6]  += sum7;
2288     y[16*i+7]  += sum8;
2289     y[16*i+8]  += sum9;
2290     y[16*i+9]  += sum10;
2291     y[16*i+10] += sum11;
2292     y[16*i+11] += sum12;
2293     y[16*i+12] += sum13;
2294     y[16*i+13] += sum14;
2295     y[16*i+14] += sum15;
2296     y[16*i+15] += sum16;
2297   }
2298 
2299   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2300   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2301   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2306 {
2307   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2308   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2309   const PetscScalar *x,*v;
2310   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2311   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2312   PetscErrorCode    ierr;
2313   const PetscInt    m = b->AIJ->rmap->n,*idx;
2314   PetscInt          n,i;
2315 
2316   PetscFunctionBegin;
2317   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2318   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2319   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2320   for (i=0; i<m; i++) {
2321     idx     = a->j + a->i[i];
2322     v       = a->a + a->i[i];
2323     n       = a->i[i+1] - a->i[i];
2324     alpha1  = x[16*i];
2325     alpha2  = x[16*i+1];
2326     alpha3  = x[16*i+2];
2327     alpha4  = x[16*i+3];
2328     alpha5  = x[16*i+4];
2329     alpha6  = x[16*i+5];
2330     alpha7  = x[16*i+6];
2331     alpha8  = x[16*i+7];
2332     alpha9  = x[16*i+8];
2333     alpha10 = x[16*i+9];
2334     alpha11 = x[16*i+10];
2335     alpha12 = x[16*i+11];
2336     alpha13 = x[16*i+12];
2337     alpha14 = x[16*i+13];
2338     alpha15 = x[16*i+14];
2339     alpha16 = x[16*i+15];
2340     while (n-->0) {
2341       y[16*(*idx)]    += alpha1*(*v);
2342       y[16*(*idx)+1]  += alpha2*(*v);
2343       y[16*(*idx)+2]  += alpha3*(*v);
2344       y[16*(*idx)+3]  += alpha4*(*v);
2345       y[16*(*idx)+4]  += alpha5*(*v);
2346       y[16*(*idx)+5]  += alpha6*(*v);
2347       y[16*(*idx)+6]  += alpha7*(*v);
2348       y[16*(*idx)+7]  += alpha8*(*v);
2349       y[16*(*idx)+8]  += alpha9*(*v);
2350       y[16*(*idx)+9]  += alpha10*(*v);
2351       y[16*(*idx)+10] += alpha11*(*v);
2352       y[16*(*idx)+11] += alpha12*(*v);
2353       y[16*(*idx)+12] += alpha13*(*v);
2354       y[16*(*idx)+13] += alpha14*(*v);
2355       y[16*(*idx)+14] += alpha15*(*v);
2356       y[16*(*idx)+15] += alpha16*(*v);
2357       idx++; v++;
2358     }
2359   }
2360   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2361   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2362   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 /*--------------------------------------------------------------------------------------------*/
2367 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2368 {
2369   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2370   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2371   const PetscScalar *x,*v;
2372   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2373   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2374   PetscErrorCode    ierr;
2375   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2376   PetscInt          nonzerorow=0,n,i,jrow,j;
2377 
2378   PetscFunctionBegin;
2379   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2380   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2381   idx  = a->j;
2382   v    = a->a;
2383   ii   = a->i;
2384 
2385   for (i=0; i<m; i++) {
2386     jrow  = ii[i];
2387     n     = ii[i+1] - jrow;
2388     sum1  = 0.0;
2389     sum2  = 0.0;
2390     sum3  = 0.0;
2391     sum4  = 0.0;
2392     sum5  = 0.0;
2393     sum6  = 0.0;
2394     sum7  = 0.0;
2395     sum8  = 0.0;
2396     sum9  = 0.0;
2397     sum10 = 0.0;
2398     sum11 = 0.0;
2399     sum12 = 0.0;
2400     sum13 = 0.0;
2401     sum14 = 0.0;
2402     sum15 = 0.0;
2403     sum16 = 0.0;
2404     sum17 = 0.0;
2405     sum18 = 0.0;
2406 
2407     nonzerorow += (n>0);
2408     for (j=0; j<n; j++) {
2409       sum1  += v[jrow]*x[18*idx[jrow]];
2410       sum2  += v[jrow]*x[18*idx[jrow]+1];
2411       sum3  += v[jrow]*x[18*idx[jrow]+2];
2412       sum4  += v[jrow]*x[18*idx[jrow]+3];
2413       sum5  += v[jrow]*x[18*idx[jrow]+4];
2414       sum6  += v[jrow]*x[18*idx[jrow]+5];
2415       sum7  += v[jrow]*x[18*idx[jrow]+6];
2416       sum8  += v[jrow]*x[18*idx[jrow]+7];
2417       sum9  += v[jrow]*x[18*idx[jrow]+8];
2418       sum10 += v[jrow]*x[18*idx[jrow]+9];
2419       sum11 += v[jrow]*x[18*idx[jrow]+10];
2420       sum12 += v[jrow]*x[18*idx[jrow]+11];
2421       sum13 += v[jrow]*x[18*idx[jrow]+12];
2422       sum14 += v[jrow]*x[18*idx[jrow]+13];
2423       sum15 += v[jrow]*x[18*idx[jrow]+14];
2424       sum16 += v[jrow]*x[18*idx[jrow]+15];
2425       sum17 += v[jrow]*x[18*idx[jrow]+16];
2426       sum18 += v[jrow]*x[18*idx[jrow]+17];
2427       jrow++;
2428     }
2429     y[18*i]    = sum1;
2430     y[18*i+1]  = sum2;
2431     y[18*i+2]  = sum3;
2432     y[18*i+3]  = sum4;
2433     y[18*i+4]  = sum5;
2434     y[18*i+5]  = sum6;
2435     y[18*i+6]  = sum7;
2436     y[18*i+7]  = sum8;
2437     y[18*i+8]  = sum9;
2438     y[18*i+9]  = sum10;
2439     y[18*i+10] = sum11;
2440     y[18*i+11] = sum12;
2441     y[18*i+12] = sum13;
2442     y[18*i+13] = sum14;
2443     y[18*i+14] = sum15;
2444     y[18*i+15] = sum16;
2445     y[18*i+16] = sum17;
2446     y[18*i+17] = sum18;
2447   }
2448 
2449   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2450   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2451   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2456 {
2457   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2458   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2459   const PetscScalar *x,*v;
2460   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2461   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2462   PetscErrorCode    ierr;
2463   const PetscInt    m = b->AIJ->rmap->n,*idx;
2464   PetscInt          n,i;
2465 
2466   PetscFunctionBegin;
2467   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2468   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2469   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2470 
2471   for (i=0; i<m; i++) {
2472     idx     = a->j + a->i[i];
2473     v       = a->a + a->i[i];
2474     n       = a->i[i+1] - a->i[i];
2475     alpha1  = x[18*i];
2476     alpha2  = x[18*i+1];
2477     alpha3  = x[18*i+2];
2478     alpha4  = x[18*i+3];
2479     alpha5  = x[18*i+4];
2480     alpha6  = x[18*i+5];
2481     alpha7  = x[18*i+6];
2482     alpha8  = x[18*i+7];
2483     alpha9  = x[18*i+8];
2484     alpha10 = x[18*i+9];
2485     alpha11 = x[18*i+10];
2486     alpha12 = x[18*i+11];
2487     alpha13 = x[18*i+12];
2488     alpha14 = x[18*i+13];
2489     alpha15 = x[18*i+14];
2490     alpha16 = x[18*i+15];
2491     alpha17 = x[18*i+16];
2492     alpha18 = x[18*i+17];
2493     while (n-->0) {
2494       y[18*(*idx)]    += alpha1*(*v);
2495       y[18*(*idx)+1]  += alpha2*(*v);
2496       y[18*(*idx)+2]  += alpha3*(*v);
2497       y[18*(*idx)+3]  += alpha4*(*v);
2498       y[18*(*idx)+4]  += alpha5*(*v);
2499       y[18*(*idx)+5]  += alpha6*(*v);
2500       y[18*(*idx)+6]  += alpha7*(*v);
2501       y[18*(*idx)+7]  += alpha8*(*v);
2502       y[18*(*idx)+8]  += alpha9*(*v);
2503       y[18*(*idx)+9]  += alpha10*(*v);
2504       y[18*(*idx)+10] += alpha11*(*v);
2505       y[18*(*idx)+11] += alpha12*(*v);
2506       y[18*(*idx)+12] += alpha13*(*v);
2507       y[18*(*idx)+13] += alpha14*(*v);
2508       y[18*(*idx)+14] += alpha15*(*v);
2509       y[18*(*idx)+15] += alpha16*(*v);
2510       y[18*(*idx)+16] += alpha17*(*v);
2511       y[18*(*idx)+17] += alpha18*(*v);
2512       idx++; v++;
2513     }
2514   }
2515   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2516   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2517   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2522 {
2523   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2524   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2525   const PetscScalar *x,*v;
2526   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2527   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2528   PetscErrorCode    ierr;
2529   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2530   PetscInt          n,i,jrow,j;
2531 
2532   PetscFunctionBegin;
2533   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2534   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2535   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2536   idx  = a->j;
2537   v    = a->a;
2538   ii   = a->i;
2539 
2540   for (i=0; i<m; i++) {
2541     jrow  = ii[i];
2542     n     = ii[i+1] - jrow;
2543     sum1  = 0.0;
2544     sum2  = 0.0;
2545     sum3  = 0.0;
2546     sum4  = 0.0;
2547     sum5  = 0.0;
2548     sum6  = 0.0;
2549     sum7  = 0.0;
2550     sum8  = 0.0;
2551     sum9  = 0.0;
2552     sum10 = 0.0;
2553     sum11 = 0.0;
2554     sum12 = 0.0;
2555     sum13 = 0.0;
2556     sum14 = 0.0;
2557     sum15 = 0.0;
2558     sum16 = 0.0;
2559     sum17 = 0.0;
2560     sum18 = 0.0;
2561     for (j=0; j<n; j++) {
2562       sum1  += v[jrow]*x[18*idx[jrow]];
2563       sum2  += v[jrow]*x[18*idx[jrow]+1];
2564       sum3  += v[jrow]*x[18*idx[jrow]+2];
2565       sum4  += v[jrow]*x[18*idx[jrow]+3];
2566       sum5  += v[jrow]*x[18*idx[jrow]+4];
2567       sum6  += v[jrow]*x[18*idx[jrow]+5];
2568       sum7  += v[jrow]*x[18*idx[jrow]+6];
2569       sum8  += v[jrow]*x[18*idx[jrow]+7];
2570       sum9  += v[jrow]*x[18*idx[jrow]+8];
2571       sum10 += v[jrow]*x[18*idx[jrow]+9];
2572       sum11 += v[jrow]*x[18*idx[jrow]+10];
2573       sum12 += v[jrow]*x[18*idx[jrow]+11];
2574       sum13 += v[jrow]*x[18*idx[jrow]+12];
2575       sum14 += v[jrow]*x[18*idx[jrow]+13];
2576       sum15 += v[jrow]*x[18*idx[jrow]+14];
2577       sum16 += v[jrow]*x[18*idx[jrow]+15];
2578       sum17 += v[jrow]*x[18*idx[jrow]+16];
2579       sum18 += v[jrow]*x[18*idx[jrow]+17];
2580       jrow++;
2581     }
2582     y[18*i]    += sum1;
2583     y[18*i+1]  += sum2;
2584     y[18*i+2]  += sum3;
2585     y[18*i+3]  += sum4;
2586     y[18*i+4]  += sum5;
2587     y[18*i+5]  += sum6;
2588     y[18*i+6]  += sum7;
2589     y[18*i+7]  += sum8;
2590     y[18*i+8]  += sum9;
2591     y[18*i+9]  += sum10;
2592     y[18*i+10] += sum11;
2593     y[18*i+11] += sum12;
2594     y[18*i+12] += sum13;
2595     y[18*i+13] += sum14;
2596     y[18*i+14] += sum15;
2597     y[18*i+15] += sum16;
2598     y[18*i+16] += sum17;
2599     y[18*i+17] += sum18;
2600   }
2601 
2602   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2603   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2604   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2609 {
2610   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2611   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2612   const PetscScalar *x,*v;
2613   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2614   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2615   PetscErrorCode    ierr;
2616   const PetscInt    m = b->AIJ->rmap->n,*idx;
2617   PetscInt          n,i;
2618 
2619   PetscFunctionBegin;
2620   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2621   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2622   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2623   for (i=0; i<m; i++) {
2624     idx     = a->j + a->i[i];
2625     v       = a->a + a->i[i];
2626     n       = a->i[i+1] - a->i[i];
2627     alpha1  = x[18*i];
2628     alpha2  = x[18*i+1];
2629     alpha3  = x[18*i+2];
2630     alpha4  = x[18*i+3];
2631     alpha5  = x[18*i+4];
2632     alpha6  = x[18*i+5];
2633     alpha7  = x[18*i+6];
2634     alpha8  = x[18*i+7];
2635     alpha9  = x[18*i+8];
2636     alpha10 = x[18*i+9];
2637     alpha11 = x[18*i+10];
2638     alpha12 = x[18*i+11];
2639     alpha13 = x[18*i+12];
2640     alpha14 = x[18*i+13];
2641     alpha15 = x[18*i+14];
2642     alpha16 = x[18*i+15];
2643     alpha17 = x[18*i+16];
2644     alpha18 = x[18*i+17];
2645     while (n-->0) {
2646       y[18*(*idx)]    += alpha1*(*v);
2647       y[18*(*idx)+1]  += alpha2*(*v);
2648       y[18*(*idx)+2]  += alpha3*(*v);
2649       y[18*(*idx)+3]  += alpha4*(*v);
2650       y[18*(*idx)+4]  += alpha5*(*v);
2651       y[18*(*idx)+5]  += alpha6*(*v);
2652       y[18*(*idx)+6]  += alpha7*(*v);
2653       y[18*(*idx)+7]  += alpha8*(*v);
2654       y[18*(*idx)+8]  += alpha9*(*v);
2655       y[18*(*idx)+9]  += alpha10*(*v);
2656       y[18*(*idx)+10] += alpha11*(*v);
2657       y[18*(*idx)+11] += alpha12*(*v);
2658       y[18*(*idx)+12] += alpha13*(*v);
2659       y[18*(*idx)+13] += alpha14*(*v);
2660       y[18*(*idx)+14] += alpha15*(*v);
2661       y[18*(*idx)+15] += alpha16*(*v);
2662       y[18*(*idx)+16] += alpha17*(*v);
2663       y[18*(*idx)+17] += alpha18*(*v);
2664       idx++; v++;
2665     }
2666   }
2667   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2668   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2669   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2674 {
2675   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2676   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2677   const PetscScalar *x,*v;
2678   PetscScalar       *y,*sums;
2679   PetscErrorCode    ierr;
2680   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2681   PetscInt          n,i,jrow,j,dof = b->dof,k;
2682 
2683   PetscFunctionBegin;
2684   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2685   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2686   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2687   idx  = a->j;
2688   v    = a->a;
2689   ii   = a->i;
2690 
2691   for (i=0; i<m; i++) {
2692     jrow = ii[i];
2693     n    = ii[i+1] - jrow;
2694     sums = y + dof*i;
2695     for (j=0; j<n; j++) {
2696       for (k=0; k<dof; k++) {
2697         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2698       }
2699       jrow++;
2700     }
2701   }
2702 
2703   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2704   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2705   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2710 {
2711   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2712   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2713   const PetscScalar *x,*v;
2714   PetscScalar       *y,*sums;
2715   PetscErrorCode    ierr;
2716   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2717   PetscInt          n,i,jrow,j,dof = b->dof,k;
2718 
2719   PetscFunctionBegin;
2720   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2721   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2722   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2723   idx  = a->j;
2724   v    = a->a;
2725   ii   = a->i;
2726 
2727   for (i=0; i<m; i++) {
2728     jrow = ii[i];
2729     n    = ii[i+1] - jrow;
2730     sums = y + dof*i;
2731     for (j=0; j<n; j++) {
2732       for (k=0; k<dof; k++) {
2733         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2734       }
2735       jrow++;
2736     }
2737   }
2738 
2739   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2740   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2741   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2746 {
2747   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2748   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2749   const PetscScalar *x,*v,*alpha;
2750   PetscScalar       *y;
2751   PetscErrorCode    ierr;
2752   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2753   PetscInt          n,i,k;
2754 
2755   PetscFunctionBegin;
2756   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2757   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2758   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2759   for (i=0; i<m; i++) {
2760     idx   = a->j + a->i[i];
2761     v     = a->a + a->i[i];
2762     n     = a->i[i+1] - a->i[i];
2763     alpha = x + dof*i;
2764     while (n-->0) {
2765       for (k=0; k<dof; k++) {
2766         y[dof*(*idx)+k] += alpha[k]*(*v);
2767       }
2768       idx++; v++;
2769     }
2770   }
2771   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2772   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2773   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2778 {
2779   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2780   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2781   const PetscScalar *x,*v,*alpha;
2782   PetscScalar       *y;
2783   PetscErrorCode    ierr;
2784   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2785   PetscInt          n,i,k;
2786 
2787   PetscFunctionBegin;
2788   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2789   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2790   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2791   for (i=0; i<m; i++) {
2792     idx   = a->j + a->i[i];
2793     v     = a->a + a->i[i];
2794     n     = a->i[i+1] - a->i[i];
2795     alpha = x + dof*i;
2796     while (n-->0) {
2797       for (k=0; k<dof; k++) {
2798         y[dof*(*idx)+k] += alpha[k]*(*v);
2799       }
2800       idx++; v++;
2801     }
2802   }
2803   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2804   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2805   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 /*===================================================================================*/
2810 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2811 {
2812   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   /* start the scatter */
2817   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2818   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2819   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2820   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2825 {
2826   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2827   PetscErrorCode ierr;
2828 
2829   PetscFunctionBegin;
2830   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2831   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2832   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2833   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2838 {
2839   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2840   PetscErrorCode ierr;
2841 
2842   PetscFunctionBegin;
2843   /* start the scatter */
2844   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2846   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2847   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2852 {
2853   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2854   PetscErrorCode ierr;
2855 
2856   PetscFunctionBegin;
2857   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2858   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2859   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2860   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 /* ----------------------------------------------------------------*/
2865 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2866 {
2867   PetscErrorCode     ierr;
2868   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2869   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2870   Mat                P         =pp->AIJ;
2871   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2872   PetscInt           *pti,*ptj,*ptJ;
2873   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2874   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2875   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2876   MatScalar          *ca;
2877   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2878 
2879   PetscFunctionBegin;
2880   /* Get ij structure of P^T */
2881   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2882 
2883   cn = pn*ppdof;
2884   /* Allocate ci array, arrays for fill computation and */
2885   /* free space for accumulating nonzero column info */
2886   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2887   ci[0] = 0;
2888 
2889   /* Work arrays for rows of P^T*A */
2890   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2891   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2892   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
2893 
2894   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2895   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2896   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2897   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2898   current_space = free_space;
2899 
2900   /* Determine symbolic info for each row of C: */
2901   for (i=0; i<pn; i++) {
2902     ptnzi = pti[i+1] - pti[i];
2903     ptJ   = ptj + pti[i];
2904     for (dof=0; dof<ppdof; dof++) {
2905       ptanzi = 0;
2906       /* Determine symbolic row of PtA: */
2907       for (j=0; j<ptnzi; j++) {
2908         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2909         arow = ptJ[j]*ppdof + dof;
2910         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2911         anzj = ai[arow+1] - ai[arow];
2912         ajj  = aj + ai[arow];
2913         for (k=0; k<anzj; k++) {
2914           if (!ptadenserow[ajj[k]]) {
2915             ptadenserow[ajj[k]]    = -1;
2916             ptasparserow[ptanzi++] = ajj[k];
2917           }
2918         }
2919       }
2920       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2921       ptaj = ptasparserow;
2922       cnzi = 0;
2923       for (j=0; j<ptanzi; j++) {
2924         /* Get offset within block of P */
2925         pshift = *ptaj%ppdof;
2926         /* Get block row of P */
2927         prow = (*ptaj++)/ppdof; /* integer division */
2928         /* P has same number of nonzeros per row as the compressed form */
2929         pnzj = pi[prow+1] - pi[prow];
2930         pjj  = pj + pi[prow];
2931         for (k=0;k<pnzj;k++) {
2932           /* Locations in C are shifted by the offset within the block */
2933           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2934           if (!denserow[pjj[k]*ppdof+pshift]) {
2935             denserow[pjj[k]*ppdof+pshift] = -1;
2936             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2937           }
2938         }
2939       }
2940 
2941       /* sort sparserow */
2942       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2943 
2944       /* If free space is not available, make more free space */
2945       /* Double the amount of total space in the list */
2946       if (current_space->local_remaining<cnzi) {
2947         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
2948       }
2949 
2950       /* Copy data into free space, and zero out denserows */
2951       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2952 
2953       current_space->array           += cnzi;
2954       current_space->local_used      += cnzi;
2955       current_space->local_remaining -= cnzi;
2956 
2957       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2958       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2959 
2960       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2961       /*        For now, we will recompute what is needed. */
2962       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2963     }
2964   }
2965   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2966   /* Allocate space for cj, initialize cj, and */
2967   /* destroy list of free space and other temporary array(s) */
2968   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
2969   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2970   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
2971 
2972   /* Allocate space for ca */
2973   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
2974 
2975   /* put together the new matrix */
2976   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2977   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
2978 
2979   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2980   /* Since these are PETSc arrays, change flags to free them as necessary. */
2981   c          = (Mat_SeqAIJ*)((*C)->data);
2982   c->free_a  = PETSC_TRUE;
2983   c->free_ij = PETSC_TRUE;
2984   c->nonew   = 0;
2985 
2986   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2987 
2988   /* Clean up. */
2989   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2994 {
2995   /* This routine requires testing -- first draft only */
2996   PetscErrorCode  ierr;
2997   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
2998   Mat             P  =pp->AIJ;
2999   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3000   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3001   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3002   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3003   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3004   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3005   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3006   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3007   MatScalar       *ca=c->a,*caj,*apa;
3008 
3009   PetscFunctionBegin;
3010   /* Allocate temporary array for storage of one row of A*P */
3011   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3012 
3013   /* Clear old values in C */
3014   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3015 
3016   for (i=0; i<am; i++) {
3017     /* Form sparse row of A*P */
3018     anzi  = ai[i+1] - ai[i];
3019     apnzj = 0;
3020     for (j=0; j<anzi; j++) {
3021       /* Get offset within block of P */
3022       pshift = *aj%ppdof;
3023       /* Get block row of P */
3024       prow = *aj++/ppdof;   /* integer division */
3025       pnzj = pi[prow+1] - pi[prow];
3026       pjj  = pj + pi[prow];
3027       paj  = pa + pi[prow];
3028       for (k=0; k<pnzj; k++) {
3029         poffset = pjj[k]*ppdof+pshift;
3030         if (!apjdense[poffset]) {
3031           apjdense[poffset] = -1;
3032           apj[apnzj++]      = poffset;
3033         }
3034         apa[poffset] += (*aa)*paj[k];
3035       }
3036       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3037       aa++;
3038     }
3039 
3040     /* Sort the j index array for quick sparse axpy. */
3041     /* Note: a array does not need sorting as it is in dense storage locations. */
3042     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3043 
3044     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3045     prow    = i/ppdof; /* integer division */
3046     pshift  = i%ppdof;
3047     poffset = pi[prow];
3048     pnzi    = pi[prow+1] - poffset;
3049     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3050     pJ = pj+poffset;
3051     pA = pa+poffset;
3052     for (j=0; j<pnzi; j++) {
3053       crow = (*pJ)*ppdof+pshift;
3054       cjj  = cj + ci[crow];
3055       caj  = ca + ci[crow];
3056       pJ++;
3057       /* Perform sparse axpy operation.  Note cjj includes apj. */
3058       for (k=0,nextap=0; nextap<apnzj; k++) {
3059         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3060       }
3061       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3062       pA++;
3063     }
3064 
3065     /* Zero the current row info for A*P */
3066     for (j=0; j<apnzj; j++) {
3067       apa[apj[j]]      = 0.;
3068       apjdense[apj[j]] = 0;
3069     }
3070   }
3071 
3072   /* Assemble the final matrix and clean up */
3073   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3074   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3075   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3080 {
3081   PetscErrorCode ierr;
3082 
3083   PetscFunctionBegin;
3084   if (scall == MAT_INITIAL_MATRIX) {
3085     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3086     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3087     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3088   }
3089   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3090   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3091   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3096 {
3097   PetscErrorCode ierr;
3098 
3099   PetscFunctionBegin;
3100   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3101   ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr);
3102   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3103   PetscFunctionReturn(0);
3104 }
3105 
3106 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3107 {
3108   PetscFunctionBegin;
3109   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3110   PetscFunctionReturn(0);
3111 }
3112 
3113 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3114 {
3115   PetscErrorCode ierr;
3116 
3117   PetscFunctionBegin;
3118   if (scall == MAT_INITIAL_MATRIX) {
3119     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3120     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3121     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3122   }
3123   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3124   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3125   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 
3129 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3130 {
3131   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3132   Mat            a    = b->AIJ,B;
3133   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3134   PetscErrorCode ierr;
3135   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3136   PetscInt       *cols;
3137   PetscScalar    *vals;
3138 
3139   PetscFunctionBegin;
3140   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3141   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3142   for (i=0; i<m; i++) {
3143     nmax = PetscMax(nmax,aij->ilen[i]);
3144     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3145   }
3146   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3147   ierr = PetscFree(ilen);CHKERRQ(ierr);
3148   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3149   ii   = 0;
3150   for (i=0; i<m; i++) {
3151     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3152     for (j=0; j<dof; j++) {
3153       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3154       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3155       ii++;
3156     }
3157     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3158   }
3159   ierr = PetscFree(icols);CHKERRQ(ierr);
3160   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162 
3163   if (reuse == MAT_INPLACE_MATRIX) {
3164     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3165   } else {
3166     *newmat = B;
3167   }
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3172 
3173 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3174 {
3175   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3176   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3177   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3178   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3179   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3180   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3181   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3182   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3183   PetscInt       rstart,cstart,*garray,ii,k;
3184   PetscErrorCode ierr;
3185   PetscScalar    *vals,*ovals;
3186 
3187   PetscFunctionBegin;
3188   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3189   for (i=0; i<A->rmap->n/dof; i++) {
3190     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3191     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3192     for (j=0; j<dof; j++) {
3193       dnz[dof*i+j] = AIJ->ilen[i];
3194       onz[dof*i+j] = OAIJ->ilen[i];
3195     }
3196   }
3197   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3198   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3199   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3200 
3201   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3202   rstart = dof*maij->A->rmap->rstart;
3203   cstart = dof*maij->A->cmap->rstart;
3204   garray = mpiaij->garray;
3205 
3206   ii = rstart;
3207   for (i=0; i<A->rmap->n/dof; i++) {
3208     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3209     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3210     for (j=0; j<dof; j++) {
3211       for (k=0; k<ncols; k++) {
3212         icols[k] = cstart + dof*cols[k]+j;
3213       }
3214       for (k=0; k<oncols; k++) {
3215         oicols[k] = dof*garray[ocols[k]]+j;
3216       }
3217       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3218       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3219       ii++;
3220     }
3221     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3222     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3223   }
3224   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3225 
3226   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3227   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3228 
3229   if (reuse == MAT_INPLACE_MATRIX) {
3230     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3231     ((PetscObject)A)->refct = 1;
3232 
3233     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3234 
3235     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3236   } else {
3237     *newmat = B;
3238   }
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3243 {
3244   PetscErrorCode ierr;
3245   Mat            A;
3246 
3247   PetscFunctionBegin;
3248   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3249   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3250   ierr = MatDestroy(&A);CHKERRQ(ierr);
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 /* ---------------------------------------------------------------------------------- */
3255 /*@
3256   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3257   operations for multicomponent problems.  It interpolates each component the same
3258   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3259   and MATMPIAIJ for distributed matrices.
3260 
3261   Collective
3262 
3263   Input Parameters:
3264 + A - the AIJ matrix describing the action on blocks
3265 - dof - the block size (number of components per node)
3266 
3267   Output Parameter:
3268 . maij - the new MAIJ matrix
3269 
3270   Operations provided:
3271 + MatMult
3272 . MatMultTranspose
3273 . MatMultAdd
3274 . MatMultTransposeAdd
3275 - MatView
3276 
3277   Level: advanced
3278 
3279 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3280 @*/
3281 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3282 {
3283   PetscErrorCode ierr;
3284   PetscMPIInt    size;
3285   PetscInt       n;
3286   Mat            B;
3287 
3288   PetscFunctionBegin;
3289   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3290 
3291   if (dof == 1) *maij = A;
3292   else {
3293     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3294     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3295     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3296     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3297     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3298     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3299 
3300     B->assembled = PETSC_TRUE;
3301 
3302     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3303     if (size == 1) {
3304       Mat_SeqMAIJ *b;
3305 
3306       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3307 
3308       B->ops->setup   = NULL;
3309       B->ops->destroy = MatDestroy_SeqMAIJ;
3310       B->ops->view    = MatView_SeqMAIJ;
3311       b               = (Mat_SeqMAIJ*)B->data;
3312       b->dof          = dof;
3313       b->AIJ          = A;
3314 
3315       if (dof == 2) {
3316         B->ops->mult             = MatMult_SeqMAIJ_2;
3317         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3318         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3319         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3320       } else if (dof == 3) {
3321         B->ops->mult             = MatMult_SeqMAIJ_3;
3322         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3323         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3324         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3325       } else if (dof == 4) {
3326         B->ops->mult             = MatMult_SeqMAIJ_4;
3327         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3328         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3329         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3330       } else if (dof == 5) {
3331         B->ops->mult             = MatMult_SeqMAIJ_5;
3332         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3333         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3334         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3335       } else if (dof == 6) {
3336         B->ops->mult             = MatMult_SeqMAIJ_6;
3337         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3338         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3339         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3340       } else if (dof == 7) {
3341         B->ops->mult             = MatMult_SeqMAIJ_7;
3342         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3343         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3344         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3345       } else if (dof == 8) {
3346         B->ops->mult             = MatMult_SeqMAIJ_8;
3347         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3348         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3349         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3350       } else if (dof == 9) {
3351         B->ops->mult             = MatMult_SeqMAIJ_9;
3352         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3353         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3354         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3355       } else if (dof == 10) {
3356         B->ops->mult             = MatMult_SeqMAIJ_10;
3357         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3358         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3359         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3360       } else if (dof == 11) {
3361         B->ops->mult             = MatMult_SeqMAIJ_11;
3362         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3363         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3364         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3365       } else if (dof == 16) {
3366         B->ops->mult             = MatMult_SeqMAIJ_16;
3367         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3368         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3369         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3370       } else if (dof == 18) {
3371         B->ops->mult             = MatMult_SeqMAIJ_18;
3372         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3373         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3374         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3375       } else {
3376         B->ops->mult             = MatMult_SeqMAIJ_N;
3377         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3378         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3379         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3380       }
3381       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3382       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3383       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3384       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3385     } else {
3386       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3387       Mat_MPIMAIJ *b;
3388       IS          from,to;
3389       Vec         gvec;
3390 
3391       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3392 
3393       B->ops->setup   = NULL;
3394       B->ops->destroy = MatDestroy_MPIMAIJ;
3395       B->ops->view    = MatView_MPIMAIJ;
3396 
3397       b      = (Mat_MPIMAIJ*)B->data;
3398       b->dof = dof;
3399       b->A   = A;
3400 
3401       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3402       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3403 
3404       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3405       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3406       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3407       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3408       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3409 
3410       /* create two temporary Index sets for build scatter gather */
3411       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3412       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3413 
3414       /* create temporary global vector to generate scatter context */
3415       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3416 
3417       /* generate the scatter context */
3418       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3419 
3420       ierr = ISDestroy(&from);CHKERRQ(ierr);
3421       ierr = ISDestroy(&to);CHKERRQ(ierr);
3422       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3423 
3424       B->ops->mult             = MatMult_MPIMAIJ_dof;
3425       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3426       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3427       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3428 
3429       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3430       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3431     }
3432     B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3433     ierr  = MatSetUp(B);CHKERRQ(ierr);
3434     *maij = B;
3435     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3436   }
3437   PetscFunctionReturn(0);
3438 }
3439