xref: /petsc/src/mat/impls/maij/maij.c (revision 864c62dfd577300df05c8a4e4992489f187e8c79)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
36 
37 .seealso: MatCreateMAIJ()
38 @*/
39 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
40 {
41   PetscErrorCode ierr;
42   PetscBool      ismpimaij,isseqmaij;
43 
44   PetscFunctionBegin;
45   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
46   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
47   if (ismpimaij) {
48     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49 
50     *B = b->A;
51   } else if (isseqmaij) {
52     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53 
54     *B = b->AIJ;
55   } else {
56     *B = A;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 /*@
62    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63 
64    Logically Collective
65 
66    Input Parameter:
67 +  A - the MAIJ matrix
68 -  dof - the block size for the new matrix
69 
70    Output Parameter:
71 .  B - the new MAIJ matrix
72 
73    Level: advanced
74 
75 .seealso: MatCreateMAIJ()
76 @*/
77 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78 {
79   PetscErrorCode ierr;
80   Mat            Aij = NULL;
81 
82   PetscFunctionBegin;
83   PetscValidLogicalCollectiveInt(A,dof,2);
84   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
85   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
90 {
91   PetscErrorCode ierr;
92   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
93 
94   PetscFunctionBegin;
95   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
96   ierr = PetscFree(A->data);CHKERRQ(ierr);
97   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr);
98   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr);
99   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 PetscErrorCode MatSetUp_MAIJ(Mat A)
104 {
105   PetscFunctionBegin;
106   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107   PetscFunctionReturn(0);
108 }
109 
110 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
111 {
112   PetscErrorCode ierr;
113   Mat            B;
114 
115   PetscFunctionBegin;
116   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
117   ierr = MatView(B,viewer);CHKERRQ(ierr);
118   ierr = MatDestroy(&B);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
123 {
124   PetscErrorCode ierr;
125   Mat            B;
126 
127   PetscFunctionBegin;
128   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
129   ierr = MatView(B,viewer);CHKERRQ(ierr);
130   ierr = MatDestroy(&B);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
135 {
136   PetscErrorCode ierr;
137   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
138 
139   PetscFunctionBegin;
140   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
141   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
142   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
143   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
144   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
145   ierr = PetscFree(A->data);CHKERRQ(ierr);
146   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr);
147   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr);
148   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*MC
153   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
154   multicomponent problems, interpolating or restricting each component the same way independently.
155   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
156 
157   Operations provided:
158 . MatMult
159 . MatMultTranspose
160 . MatMultAdd
161 . MatMultTransposeAdd
162 
163   Level: advanced
164 
165 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
166 M*/
167 
168 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
169 {
170   PetscErrorCode ierr;
171   Mat_MPIMAIJ    *b;
172   PetscMPIInt    size;
173 
174   PetscFunctionBegin;
175   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
176   A->data  = (void*)b;
177 
178   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
179 
180   A->ops->setup = MatSetUp_MAIJ;
181 
182   b->AIJ  = 0;
183   b->dof  = 0;
184   b->OAIJ = 0;
185   b->ctx  = 0;
186   b->w    = 0;
187   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
188   if (size == 1) {
189     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
190   } else {
191     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
192   }
193   A->preallocated  = PETSC_TRUE;
194   A->assembled     = PETSC_TRUE;
195   PetscFunctionReturn(0);
196 }
197 
198 /* --------------------------------------------------------------------------------------*/
199 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
200 {
201   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
202   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
203   const PetscScalar *x,*v;
204   PetscScalar       *y, sum1, sum2;
205   PetscErrorCode    ierr;
206   PetscInt          nonzerorow=0,n,i,jrow,j;
207   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
208 
209   PetscFunctionBegin;
210   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
211   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
212   idx  = a->j;
213   v    = a->a;
214   ii   = a->i;
215 
216   for (i=0; i<m; i++) {
217     jrow  = ii[i];
218     n     = ii[i+1] - jrow;
219     sum1  = 0.0;
220     sum2  = 0.0;
221 
222     nonzerorow += (n>0);
223     for (j=0; j<n; j++) {
224       sum1 += v[jrow]*x[2*idx[jrow]];
225       sum2 += v[jrow]*x[2*idx[jrow]+1];
226       jrow++;
227     }
228     y[2*i]   = sum1;
229     y[2*i+1] = sum2;
230   }
231 
232   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
233   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
234   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
239 {
240   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
241   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
242   const PetscScalar *x,*v;
243   PetscScalar       *y,alpha1,alpha2;
244   PetscErrorCode    ierr;
245   PetscInt          n,i;
246   const PetscInt    m = b->AIJ->rmap->n,*idx;
247 
248   PetscFunctionBegin;
249   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
250   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
251   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
252 
253   for (i=0; i<m; i++) {
254     idx    = a->j + a->i[i];
255     v      = a->a + a->i[i];
256     n      = a->i[i+1] - a->i[i];
257     alpha1 = x[2*i];
258     alpha2 = x[2*i+1];
259     while (n-->0) {
260       y[2*(*idx)]   += alpha1*(*v);
261       y[2*(*idx)+1] += alpha2*(*v);
262       idx++; v++;
263     }
264   }
265   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
266   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
267   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
272 {
273   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
274   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
275   const PetscScalar *x,*v;
276   PetscScalar       *y,sum1, sum2;
277   PetscErrorCode    ierr;
278   PetscInt          n,i,jrow,j;
279   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
280 
281   PetscFunctionBegin;
282   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
283   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
284   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
285   idx  = a->j;
286   v    = a->a;
287   ii   = a->i;
288 
289   for (i=0; i<m; i++) {
290     jrow = ii[i];
291     n    = ii[i+1] - jrow;
292     sum1 = 0.0;
293     sum2 = 0.0;
294     for (j=0; j<n; j++) {
295       sum1 += v[jrow]*x[2*idx[jrow]];
296       sum2 += v[jrow]*x[2*idx[jrow]+1];
297       jrow++;
298     }
299     y[2*i]   += sum1;
300     y[2*i+1] += sum2;
301   }
302 
303   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
304   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
305   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
309 {
310   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
311   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
312   const PetscScalar *x,*v;
313   PetscScalar       *y,alpha1,alpha2;
314   PetscErrorCode    ierr;
315   PetscInt          n,i;
316   const PetscInt    m = b->AIJ->rmap->n,*idx;
317 
318   PetscFunctionBegin;
319   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
320   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
321   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
322 
323   for (i=0; i<m; i++) {
324     idx    = a->j + a->i[i];
325     v      = a->a + a->i[i];
326     n      = a->i[i+1] - a->i[i];
327     alpha1 = x[2*i];
328     alpha2 = x[2*i+1];
329     while (n-->0) {
330       y[2*(*idx)]   += alpha1*(*v);
331       y[2*(*idx)+1] += alpha2*(*v);
332       idx++; v++;
333     }
334   }
335   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
336   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
337   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 /* --------------------------------------------------------------------------------------*/
341 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
342 {
343   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
345   const PetscScalar *x,*v;
346   PetscScalar       *y,sum1, sum2, sum3;
347   PetscErrorCode    ierr;
348   PetscInt          nonzerorow=0,n,i,jrow,j;
349   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
350 
351   PetscFunctionBegin;
352   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
353   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
354   idx  = a->j;
355   v    = a->a;
356   ii   = a->i;
357 
358   for (i=0; i<m; i++) {
359     jrow  = ii[i];
360     n     = ii[i+1] - jrow;
361     sum1  = 0.0;
362     sum2  = 0.0;
363     sum3  = 0.0;
364 
365     nonzerorow += (n>0);
366     for (j=0; j<n; j++) {
367       sum1 += v[jrow]*x[3*idx[jrow]];
368       sum2 += v[jrow]*x[3*idx[jrow]+1];
369       sum3 += v[jrow]*x[3*idx[jrow]+2];
370       jrow++;
371      }
372     y[3*i]   = sum1;
373     y[3*i+1] = sum2;
374     y[3*i+2] = sum3;
375   }
376 
377   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
378   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
379   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
384 {
385   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
386   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
387   const PetscScalar *x,*v;
388   PetscScalar       *y,alpha1,alpha2,alpha3;
389   PetscErrorCode    ierr;
390   PetscInt          n,i;
391   const PetscInt    m = b->AIJ->rmap->n,*idx;
392 
393   PetscFunctionBegin;
394   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
395   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
396   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
397 
398   for (i=0; i<m; i++) {
399     idx    = a->j + a->i[i];
400     v      = a->a + a->i[i];
401     n      = a->i[i+1] - a->i[i];
402     alpha1 = x[3*i];
403     alpha2 = x[3*i+1];
404     alpha3 = x[3*i+2];
405     while (n-->0) {
406       y[3*(*idx)]   += alpha1*(*v);
407       y[3*(*idx)+1] += alpha2*(*v);
408       y[3*(*idx)+2] += alpha3*(*v);
409       idx++; v++;
410     }
411   }
412   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
413   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
414   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
419 {
420   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
421   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
422   const PetscScalar *x,*v;
423   PetscScalar       *y,sum1, sum2, sum3;
424   PetscErrorCode    ierr;
425   PetscInt          n,i,jrow,j;
426   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
427 
428   PetscFunctionBegin;
429   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
430   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
431   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
432   idx  = a->j;
433   v    = a->a;
434   ii   = a->i;
435 
436   for (i=0; i<m; i++) {
437     jrow = ii[i];
438     n    = ii[i+1] - jrow;
439     sum1 = 0.0;
440     sum2 = 0.0;
441     sum3 = 0.0;
442     for (j=0; j<n; j++) {
443       sum1 += v[jrow]*x[3*idx[jrow]];
444       sum2 += v[jrow]*x[3*idx[jrow]+1];
445       sum3 += v[jrow]*x[3*idx[jrow]+2];
446       jrow++;
447     }
448     y[3*i]   += sum1;
449     y[3*i+1] += sum2;
450     y[3*i+2] += sum3;
451   }
452 
453   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
454   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
455   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
459 {
460   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
461   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
462   const PetscScalar *x,*v;
463   PetscScalar       *y,alpha1,alpha2,alpha3;
464   PetscErrorCode    ierr;
465   PetscInt          n,i;
466   const PetscInt    m = b->AIJ->rmap->n,*idx;
467 
468   PetscFunctionBegin;
469   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
470   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
472   for (i=0; i<m; i++) {
473     idx    = a->j + a->i[i];
474     v      = a->a + a->i[i];
475     n      = a->i[i+1] - a->i[i];
476     alpha1 = x[3*i];
477     alpha2 = x[3*i+1];
478     alpha3 = x[3*i+2];
479     while (n-->0) {
480       y[3*(*idx)]   += alpha1*(*v);
481       y[3*(*idx)+1] += alpha2*(*v);
482       y[3*(*idx)+2] += alpha3*(*v);
483       idx++; v++;
484     }
485   }
486   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
487   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
488   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 /* ------------------------------------------------------------------------------*/
493 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
494 {
495   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
496   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
497   const PetscScalar *x,*v;
498   PetscScalar       *y,sum1, sum2, sum3, sum4;
499   PetscErrorCode    ierr;
500   PetscInt          nonzerorow=0,n,i,jrow,j;
501   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
502 
503   PetscFunctionBegin;
504   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
505   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
506   idx  = a->j;
507   v    = a->a;
508   ii   = a->i;
509 
510   for (i=0; i<m; i++) {
511     jrow        = ii[i];
512     n           = ii[i+1] - jrow;
513     sum1        = 0.0;
514     sum2        = 0.0;
515     sum3        = 0.0;
516     sum4        = 0.0;
517     nonzerorow += (n>0);
518     for (j=0; j<n; j++) {
519       sum1 += v[jrow]*x[4*idx[jrow]];
520       sum2 += v[jrow]*x[4*idx[jrow]+1];
521       sum3 += v[jrow]*x[4*idx[jrow]+2];
522       sum4 += v[jrow]*x[4*idx[jrow]+3];
523       jrow++;
524     }
525     y[4*i]   = sum1;
526     y[4*i+1] = sum2;
527     y[4*i+2] = sum3;
528     y[4*i+3] = sum4;
529   }
530 
531   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
532   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
533   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
538 {
539   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
540   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
541   const PetscScalar *x,*v;
542   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
543   PetscErrorCode    ierr;
544   PetscInt          n,i;
545   const PetscInt    m = b->AIJ->rmap->n,*idx;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
549   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
550   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
551   for (i=0; i<m; i++) {
552     idx    = a->j + a->i[i];
553     v      = a->a + a->i[i];
554     n      = a->i[i+1] - a->i[i];
555     alpha1 = x[4*i];
556     alpha2 = x[4*i+1];
557     alpha3 = x[4*i+2];
558     alpha4 = x[4*i+3];
559     while (n-->0) {
560       y[4*(*idx)]   += alpha1*(*v);
561       y[4*(*idx)+1] += alpha2*(*v);
562       y[4*(*idx)+2] += alpha3*(*v);
563       y[4*(*idx)+3] += alpha4*(*v);
564       idx++; v++;
565     }
566   }
567   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
569   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
574 {
575   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
576   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
577   const PetscScalar *x,*v;
578   PetscScalar       *y,sum1, sum2, sum3, sum4;
579   PetscErrorCode    ierr;
580   PetscInt          n,i,jrow,j;
581   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
582 
583   PetscFunctionBegin;
584   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
585   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
586   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
587   idx  = a->j;
588   v    = a->a;
589   ii   = a->i;
590 
591   for (i=0; i<m; i++) {
592     jrow = ii[i];
593     n    = ii[i+1] - jrow;
594     sum1 = 0.0;
595     sum2 = 0.0;
596     sum3 = 0.0;
597     sum4 = 0.0;
598     for (j=0; j<n; j++) {
599       sum1 += v[jrow]*x[4*idx[jrow]];
600       sum2 += v[jrow]*x[4*idx[jrow]+1];
601       sum3 += v[jrow]*x[4*idx[jrow]+2];
602       sum4 += v[jrow]*x[4*idx[jrow]+3];
603       jrow++;
604     }
605     y[4*i]   += sum1;
606     y[4*i+1] += sum2;
607     y[4*i+2] += sum3;
608     y[4*i+3] += sum4;
609   }
610 
611   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
612   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
613   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
617 {
618   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
619   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
620   const PetscScalar *x,*v;
621   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
622   PetscErrorCode    ierr;
623   PetscInt          n,i;
624   const PetscInt    m = b->AIJ->rmap->n,*idx;
625 
626   PetscFunctionBegin;
627   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
628   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
629   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
630 
631   for (i=0; i<m; i++) {
632     idx    = a->j + a->i[i];
633     v      = a->a + a->i[i];
634     n      = a->i[i+1] - a->i[i];
635     alpha1 = x[4*i];
636     alpha2 = x[4*i+1];
637     alpha3 = x[4*i+2];
638     alpha4 = x[4*i+3];
639     while (n-->0) {
640       y[4*(*idx)]   += alpha1*(*v);
641       y[4*(*idx)+1] += alpha2*(*v);
642       y[4*(*idx)+2] += alpha3*(*v);
643       y[4*(*idx)+3] += alpha4*(*v);
644       idx++; v++;
645     }
646   }
647   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
648   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
649   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 /* ------------------------------------------------------------------------------*/
653 
654 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
655 {
656   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658   const PetscScalar *x,*v;
659   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
660   PetscErrorCode    ierr;
661   PetscInt          nonzerorow=0,n,i,jrow,j;
662   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
663 
664   PetscFunctionBegin;
665   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
666   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
667   idx  = a->j;
668   v    = a->a;
669   ii   = a->i;
670 
671   for (i=0; i<m; i++) {
672     jrow  = ii[i];
673     n     = ii[i+1] - jrow;
674     sum1  = 0.0;
675     sum2  = 0.0;
676     sum3  = 0.0;
677     sum4  = 0.0;
678     sum5  = 0.0;
679 
680     nonzerorow += (n>0);
681     for (j=0; j<n; j++) {
682       sum1 += v[jrow]*x[5*idx[jrow]];
683       sum2 += v[jrow]*x[5*idx[jrow]+1];
684       sum3 += v[jrow]*x[5*idx[jrow]+2];
685       sum4 += v[jrow]*x[5*idx[jrow]+3];
686       sum5 += v[jrow]*x[5*idx[jrow]+4];
687       jrow++;
688     }
689     y[5*i]   = sum1;
690     y[5*i+1] = sum2;
691     y[5*i+2] = sum3;
692     y[5*i+3] = sum4;
693     y[5*i+4] = sum5;
694   }
695 
696   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
697   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
698   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
703 {
704   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
705   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
706   const PetscScalar *x,*v;
707   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
708   PetscErrorCode    ierr;
709   PetscInt          n,i;
710   const PetscInt    m = b->AIJ->rmap->n,*idx;
711 
712   PetscFunctionBegin;
713   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
714   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
715   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
716 
717   for (i=0; i<m; i++) {
718     idx    = a->j + a->i[i];
719     v      = a->a + a->i[i];
720     n      = a->i[i+1] - a->i[i];
721     alpha1 = x[5*i];
722     alpha2 = x[5*i+1];
723     alpha3 = x[5*i+2];
724     alpha4 = x[5*i+3];
725     alpha5 = x[5*i+4];
726     while (n-->0) {
727       y[5*(*idx)]   += alpha1*(*v);
728       y[5*(*idx)+1] += alpha2*(*v);
729       y[5*(*idx)+2] += alpha3*(*v);
730       y[5*(*idx)+3] += alpha4*(*v);
731       y[5*(*idx)+4] += alpha5*(*v);
732       idx++; v++;
733     }
734   }
735   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
736   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
737   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
742 {
743   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
744   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
745   const PetscScalar *x,*v;
746   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
747   PetscErrorCode    ierr;
748   PetscInt          n,i,jrow,j;
749   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
750 
751   PetscFunctionBegin;
752   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
753   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
754   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
755   idx  = a->j;
756   v    = a->a;
757   ii   = a->i;
758 
759   for (i=0; i<m; i++) {
760     jrow = ii[i];
761     n    = ii[i+1] - jrow;
762     sum1 = 0.0;
763     sum2 = 0.0;
764     sum3 = 0.0;
765     sum4 = 0.0;
766     sum5 = 0.0;
767     for (j=0; j<n; j++) {
768       sum1 += v[jrow]*x[5*idx[jrow]];
769       sum2 += v[jrow]*x[5*idx[jrow]+1];
770       sum3 += v[jrow]*x[5*idx[jrow]+2];
771       sum4 += v[jrow]*x[5*idx[jrow]+3];
772       sum5 += v[jrow]*x[5*idx[jrow]+4];
773       jrow++;
774     }
775     y[5*i]   += sum1;
776     y[5*i+1] += sum2;
777     y[5*i+2] += sum3;
778     y[5*i+3] += sum4;
779     y[5*i+4] += sum5;
780   }
781 
782   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
783   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
784   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
789 {
790   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
791   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
792   const PetscScalar *x,*v;
793   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
794   PetscErrorCode    ierr;
795   PetscInt          n,i;
796   const PetscInt    m = b->AIJ->rmap->n,*idx;
797 
798   PetscFunctionBegin;
799   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
800   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
801   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
802 
803   for (i=0; i<m; i++) {
804     idx    = a->j + a->i[i];
805     v      = a->a + a->i[i];
806     n      = a->i[i+1] - a->i[i];
807     alpha1 = x[5*i];
808     alpha2 = x[5*i+1];
809     alpha3 = x[5*i+2];
810     alpha4 = x[5*i+3];
811     alpha5 = x[5*i+4];
812     while (n-->0) {
813       y[5*(*idx)]   += alpha1*(*v);
814       y[5*(*idx)+1] += alpha2*(*v);
815       y[5*(*idx)+2] += alpha3*(*v);
816       y[5*(*idx)+3] += alpha4*(*v);
817       y[5*(*idx)+4] += alpha5*(*v);
818       idx++; v++;
819     }
820   }
821   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
822   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
823   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 /* ------------------------------------------------------------------------------*/
828 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
829 {
830   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
831   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
832   const PetscScalar *x,*v;
833   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
834   PetscErrorCode    ierr;
835   PetscInt          nonzerorow=0,n,i,jrow,j;
836   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
837 
838   PetscFunctionBegin;
839   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
840   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
841   idx  = a->j;
842   v    = a->a;
843   ii   = a->i;
844 
845   for (i=0; i<m; i++) {
846     jrow  = ii[i];
847     n     = ii[i+1] - jrow;
848     sum1  = 0.0;
849     sum2  = 0.0;
850     sum3  = 0.0;
851     sum4  = 0.0;
852     sum5  = 0.0;
853     sum6  = 0.0;
854 
855     nonzerorow += (n>0);
856     for (j=0; j<n; j++) {
857       sum1 += v[jrow]*x[6*idx[jrow]];
858       sum2 += v[jrow]*x[6*idx[jrow]+1];
859       sum3 += v[jrow]*x[6*idx[jrow]+2];
860       sum4 += v[jrow]*x[6*idx[jrow]+3];
861       sum5 += v[jrow]*x[6*idx[jrow]+4];
862       sum6 += v[jrow]*x[6*idx[jrow]+5];
863       jrow++;
864     }
865     y[6*i]   = sum1;
866     y[6*i+1] = sum2;
867     y[6*i+2] = sum3;
868     y[6*i+3] = sum4;
869     y[6*i+4] = sum5;
870     y[6*i+5] = sum6;
871   }
872 
873   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
874   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
875   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
880 {
881   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
882   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
883   const PetscScalar *x,*v;
884   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
885   PetscErrorCode    ierr;
886   PetscInt          n,i;
887   const PetscInt    m = b->AIJ->rmap->n,*idx;
888 
889   PetscFunctionBegin;
890   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
891   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
892   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
893 
894   for (i=0; i<m; i++) {
895     idx    = a->j + a->i[i];
896     v      = a->a + a->i[i];
897     n      = a->i[i+1] - a->i[i];
898     alpha1 = x[6*i];
899     alpha2 = x[6*i+1];
900     alpha3 = x[6*i+2];
901     alpha4 = x[6*i+3];
902     alpha5 = x[6*i+4];
903     alpha6 = x[6*i+5];
904     while (n-->0) {
905       y[6*(*idx)]   += alpha1*(*v);
906       y[6*(*idx)+1] += alpha2*(*v);
907       y[6*(*idx)+2] += alpha3*(*v);
908       y[6*(*idx)+3] += alpha4*(*v);
909       y[6*(*idx)+4] += alpha5*(*v);
910       y[6*(*idx)+5] += alpha6*(*v);
911       idx++; v++;
912     }
913   }
914   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
915   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
916   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
921 {
922   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
923   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
924   const PetscScalar *x,*v;
925   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
926   PetscErrorCode    ierr;
927   PetscInt          n,i,jrow,j;
928   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
929 
930   PetscFunctionBegin;
931   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
932   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
933   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
934   idx  = a->j;
935   v    = a->a;
936   ii   = a->i;
937 
938   for (i=0; i<m; i++) {
939     jrow = ii[i];
940     n    = ii[i+1] - jrow;
941     sum1 = 0.0;
942     sum2 = 0.0;
943     sum3 = 0.0;
944     sum4 = 0.0;
945     sum5 = 0.0;
946     sum6 = 0.0;
947     for (j=0; j<n; j++) {
948       sum1 += v[jrow]*x[6*idx[jrow]];
949       sum2 += v[jrow]*x[6*idx[jrow]+1];
950       sum3 += v[jrow]*x[6*idx[jrow]+2];
951       sum4 += v[jrow]*x[6*idx[jrow]+3];
952       sum5 += v[jrow]*x[6*idx[jrow]+4];
953       sum6 += v[jrow]*x[6*idx[jrow]+5];
954       jrow++;
955     }
956     y[6*i]   += sum1;
957     y[6*i+1] += sum2;
958     y[6*i+2] += sum3;
959     y[6*i+3] += sum4;
960     y[6*i+4] += sum5;
961     y[6*i+5] += sum6;
962   }
963 
964   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
966   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
971 {
972   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
973   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
974   const PetscScalar *x,*v;
975   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
976   PetscErrorCode    ierr;
977   PetscInt          n,i;
978   const PetscInt    m = b->AIJ->rmap->n,*idx;
979 
980   PetscFunctionBegin;
981   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
982   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
984 
985   for (i=0; i<m; i++) {
986     idx    = a->j + a->i[i];
987     v      = a->a + a->i[i];
988     n      = a->i[i+1] - a->i[i];
989     alpha1 = x[6*i];
990     alpha2 = x[6*i+1];
991     alpha3 = x[6*i+2];
992     alpha4 = x[6*i+3];
993     alpha5 = x[6*i+4];
994     alpha6 = x[6*i+5];
995     while (n-->0) {
996       y[6*(*idx)]   += alpha1*(*v);
997       y[6*(*idx)+1] += alpha2*(*v);
998       y[6*(*idx)+2] += alpha3*(*v);
999       y[6*(*idx)+3] += alpha4*(*v);
1000       y[6*(*idx)+4] += alpha5*(*v);
1001       y[6*(*idx)+5] += alpha6*(*v);
1002       idx++; v++;
1003     }
1004   }
1005   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1006   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1007   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 /* ------------------------------------------------------------------------------*/
1012 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1013 {
1014   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1015   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1016   const PetscScalar *x,*v;
1017   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1018   PetscErrorCode    ierr;
1019   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1020   PetscInt          nonzerorow=0,n,i,jrow,j;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1024   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1025   idx  = a->j;
1026   v    = a->a;
1027   ii   = a->i;
1028 
1029   for (i=0; i<m; i++) {
1030     jrow  = ii[i];
1031     n     = ii[i+1] - jrow;
1032     sum1  = 0.0;
1033     sum2  = 0.0;
1034     sum3  = 0.0;
1035     sum4  = 0.0;
1036     sum5  = 0.0;
1037     sum6  = 0.0;
1038     sum7  = 0.0;
1039 
1040     nonzerorow += (n>0);
1041     for (j=0; j<n; j++) {
1042       sum1 += v[jrow]*x[7*idx[jrow]];
1043       sum2 += v[jrow]*x[7*idx[jrow]+1];
1044       sum3 += v[jrow]*x[7*idx[jrow]+2];
1045       sum4 += v[jrow]*x[7*idx[jrow]+3];
1046       sum5 += v[jrow]*x[7*idx[jrow]+4];
1047       sum6 += v[jrow]*x[7*idx[jrow]+5];
1048       sum7 += v[jrow]*x[7*idx[jrow]+6];
1049       jrow++;
1050     }
1051     y[7*i]   = sum1;
1052     y[7*i+1] = sum2;
1053     y[7*i+2] = sum3;
1054     y[7*i+3] = sum4;
1055     y[7*i+4] = sum5;
1056     y[7*i+5] = sum6;
1057     y[7*i+6] = sum7;
1058   }
1059 
1060   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1061   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1062   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1067 {
1068   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1069   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1070   const PetscScalar *x,*v;
1071   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1072   PetscErrorCode    ierr;
1073   const PetscInt    m = b->AIJ->rmap->n,*idx;
1074   PetscInt          n,i;
1075 
1076   PetscFunctionBegin;
1077   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1078   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1079   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1080 
1081   for (i=0; i<m; i++) {
1082     idx    = a->j + a->i[i];
1083     v      = a->a + a->i[i];
1084     n      = a->i[i+1] - a->i[i];
1085     alpha1 = x[7*i];
1086     alpha2 = x[7*i+1];
1087     alpha3 = x[7*i+2];
1088     alpha4 = x[7*i+3];
1089     alpha5 = x[7*i+4];
1090     alpha6 = x[7*i+5];
1091     alpha7 = x[7*i+6];
1092     while (n-->0) {
1093       y[7*(*idx)]   += alpha1*(*v);
1094       y[7*(*idx)+1] += alpha2*(*v);
1095       y[7*(*idx)+2] += alpha3*(*v);
1096       y[7*(*idx)+3] += alpha4*(*v);
1097       y[7*(*idx)+4] += alpha5*(*v);
1098       y[7*(*idx)+5] += alpha6*(*v);
1099       y[7*(*idx)+6] += alpha7*(*v);
1100       idx++; v++;
1101     }
1102   }
1103   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1104   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1105   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1112   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1113   const PetscScalar *x,*v;
1114   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1115   PetscErrorCode    ierr;
1116   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1117   PetscInt          n,i,jrow,j;
1118 
1119   PetscFunctionBegin;
1120   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1121   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1122   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1123   idx  = a->j;
1124   v    = a->a;
1125   ii   = a->i;
1126 
1127   for (i=0; i<m; i++) {
1128     jrow = ii[i];
1129     n    = ii[i+1] - jrow;
1130     sum1 = 0.0;
1131     sum2 = 0.0;
1132     sum3 = 0.0;
1133     sum4 = 0.0;
1134     sum5 = 0.0;
1135     sum6 = 0.0;
1136     sum7 = 0.0;
1137     for (j=0; j<n; j++) {
1138       sum1 += v[jrow]*x[7*idx[jrow]];
1139       sum2 += v[jrow]*x[7*idx[jrow]+1];
1140       sum3 += v[jrow]*x[7*idx[jrow]+2];
1141       sum4 += v[jrow]*x[7*idx[jrow]+3];
1142       sum5 += v[jrow]*x[7*idx[jrow]+4];
1143       sum6 += v[jrow]*x[7*idx[jrow]+5];
1144       sum7 += v[jrow]*x[7*idx[jrow]+6];
1145       jrow++;
1146     }
1147     y[7*i]   += sum1;
1148     y[7*i+1] += sum2;
1149     y[7*i+2] += sum3;
1150     y[7*i+3] += sum4;
1151     y[7*i+4] += sum5;
1152     y[7*i+5] += sum6;
1153     y[7*i+6] += sum7;
1154   }
1155 
1156   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1157   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1158   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1165   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1166   const PetscScalar *x,*v;
1167   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1168   PetscErrorCode    ierr;
1169   const PetscInt    m = b->AIJ->rmap->n,*idx;
1170   PetscInt          n,i;
1171 
1172   PetscFunctionBegin;
1173   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1174   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1175   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1176   for (i=0; i<m; i++) {
1177     idx    = a->j + a->i[i];
1178     v      = a->a + a->i[i];
1179     n      = a->i[i+1] - a->i[i];
1180     alpha1 = x[7*i];
1181     alpha2 = x[7*i+1];
1182     alpha3 = x[7*i+2];
1183     alpha4 = x[7*i+3];
1184     alpha5 = x[7*i+4];
1185     alpha6 = x[7*i+5];
1186     alpha7 = x[7*i+6];
1187     while (n-->0) {
1188       y[7*(*idx)]   += alpha1*(*v);
1189       y[7*(*idx)+1] += alpha2*(*v);
1190       y[7*(*idx)+2] += alpha3*(*v);
1191       y[7*(*idx)+3] += alpha4*(*v);
1192       y[7*(*idx)+4] += alpha5*(*v);
1193       y[7*(*idx)+5] += alpha6*(*v);
1194       y[7*(*idx)+6] += alpha7*(*v);
1195       idx++; v++;
1196     }
1197   }
1198   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1199   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1200   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1205 {
1206   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1207   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1208   const PetscScalar *x,*v;
1209   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1210   PetscErrorCode    ierr;
1211   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1212   PetscInt          nonzerorow=0,n,i,jrow,j;
1213 
1214   PetscFunctionBegin;
1215   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1216   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1217   idx  = a->j;
1218   v    = a->a;
1219   ii   = a->i;
1220 
1221   for (i=0; i<m; i++) {
1222     jrow  = ii[i];
1223     n     = ii[i+1] - jrow;
1224     sum1  = 0.0;
1225     sum2  = 0.0;
1226     sum3  = 0.0;
1227     sum4  = 0.0;
1228     sum5  = 0.0;
1229     sum6  = 0.0;
1230     sum7  = 0.0;
1231     sum8  = 0.0;
1232 
1233     nonzerorow += (n>0);
1234     for (j=0; j<n; j++) {
1235       sum1 += v[jrow]*x[8*idx[jrow]];
1236       sum2 += v[jrow]*x[8*idx[jrow]+1];
1237       sum3 += v[jrow]*x[8*idx[jrow]+2];
1238       sum4 += v[jrow]*x[8*idx[jrow]+3];
1239       sum5 += v[jrow]*x[8*idx[jrow]+4];
1240       sum6 += v[jrow]*x[8*idx[jrow]+5];
1241       sum7 += v[jrow]*x[8*idx[jrow]+6];
1242       sum8 += v[jrow]*x[8*idx[jrow]+7];
1243       jrow++;
1244     }
1245     y[8*i]   = sum1;
1246     y[8*i+1] = sum2;
1247     y[8*i+2] = sum3;
1248     y[8*i+3] = sum4;
1249     y[8*i+4] = sum5;
1250     y[8*i+5] = sum6;
1251     y[8*i+6] = sum7;
1252     y[8*i+7] = sum8;
1253   }
1254 
1255   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1256   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1257   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1262 {
1263   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1264   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1265   const PetscScalar *x,*v;
1266   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1267   PetscErrorCode    ierr;
1268   const PetscInt    m = b->AIJ->rmap->n,*idx;
1269   PetscInt          n,i;
1270 
1271   PetscFunctionBegin;
1272   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1273   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1274   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1275 
1276   for (i=0; i<m; i++) {
1277     idx    = a->j + a->i[i];
1278     v      = a->a + a->i[i];
1279     n      = a->i[i+1] - a->i[i];
1280     alpha1 = x[8*i];
1281     alpha2 = x[8*i+1];
1282     alpha3 = x[8*i+2];
1283     alpha4 = x[8*i+3];
1284     alpha5 = x[8*i+4];
1285     alpha6 = x[8*i+5];
1286     alpha7 = x[8*i+6];
1287     alpha8 = x[8*i+7];
1288     while (n-->0) {
1289       y[8*(*idx)]   += alpha1*(*v);
1290       y[8*(*idx)+1] += alpha2*(*v);
1291       y[8*(*idx)+2] += alpha3*(*v);
1292       y[8*(*idx)+3] += alpha4*(*v);
1293       y[8*(*idx)+4] += alpha5*(*v);
1294       y[8*(*idx)+5] += alpha6*(*v);
1295       y[8*(*idx)+6] += alpha7*(*v);
1296       y[8*(*idx)+7] += alpha8*(*v);
1297       idx++; v++;
1298     }
1299   }
1300   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1301   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1302   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1307 {
1308   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1309   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1310   const PetscScalar *x,*v;
1311   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1312   PetscErrorCode    ierr;
1313   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1314   PetscInt          n,i,jrow,j;
1315 
1316   PetscFunctionBegin;
1317   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1318   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1319   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1320   idx  = a->j;
1321   v    = a->a;
1322   ii   = a->i;
1323 
1324   for (i=0; i<m; i++) {
1325     jrow = ii[i];
1326     n    = ii[i+1] - jrow;
1327     sum1 = 0.0;
1328     sum2 = 0.0;
1329     sum3 = 0.0;
1330     sum4 = 0.0;
1331     sum5 = 0.0;
1332     sum6 = 0.0;
1333     sum7 = 0.0;
1334     sum8 = 0.0;
1335     for (j=0; j<n; j++) {
1336       sum1 += v[jrow]*x[8*idx[jrow]];
1337       sum2 += v[jrow]*x[8*idx[jrow]+1];
1338       sum3 += v[jrow]*x[8*idx[jrow]+2];
1339       sum4 += v[jrow]*x[8*idx[jrow]+3];
1340       sum5 += v[jrow]*x[8*idx[jrow]+4];
1341       sum6 += v[jrow]*x[8*idx[jrow]+5];
1342       sum7 += v[jrow]*x[8*idx[jrow]+6];
1343       sum8 += v[jrow]*x[8*idx[jrow]+7];
1344       jrow++;
1345     }
1346     y[8*i]   += sum1;
1347     y[8*i+1] += sum2;
1348     y[8*i+2] += sum3;
1349     y[8*i+3] += sum4;
1350     y[8*i+4] += sum5;
1351     y[8*i+5] += sum6;
1352     y[8*i+6] += sum7;
1353     y[8*i+7] += sum8;
1354   }
1355 
1356   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1357   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1358   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1363 {
1364   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1365   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1366   const PetscScalar *x,*v;
1367   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1368   PetscErrorCode    ierr;
1369   const PetscInt    m = b->AIJ->rmap->n,*idx;
1370   PetscInt          n,i;
1371 
1372   PetscFunctionBegin;
1373   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1374   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1375   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1376   for (i=0; i<m; i++) {
1377     idx    = a->j + a->i[i];
1378     v      = a->a + a->i[i];
1379     n      = a->i[i+1] - a->i[i];
1380     alpha1 = x[8*i];
1381     alpha2 = x[8*i+1];
1382     alpha3 = x[8*i+2];
1383     alpha4 = x[8*i+3];
1384     alpha5 = x[8*i+4];
1385     alpha6 = x[8*i+5];
1386     alpha7 = x[8*i+6];
1387     alpha8 = x[8*i+7];
1388     while (n-->0) {
1389       y[8*(*idx)]   += alpha1*(*v);
1390       y[8*(*idx)+1] += alpha2*(*v);
1391       y[8*(*idx)+2] += alpha3*(*v);
1392       y[8*(*idx)+3] += alpha4*(*v);
1393       y[8*(*idx)+4] += alpha5*(*v);
1394       y[8*(*idx)+5] += alpha6*(*v);
1395       y[8*(*idx)+6] += alpha7*(*v);
1396       y[8*(*idx)+7] += alpha8*(*v);
1397       idx++; v++;
1398     }
1399   }
1400   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1401   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1402   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /* ------------------------------------------------------------------------------*/
1407 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1408 {
1409   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1410   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1411   const PetscScalar *x,*v;
1412   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1413   PetscErrorCode    ierr;
1414   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1415   PetscInt          nonzerorow=0,n,i,jrow,j;
1416 
1417   PetscFunctionBegin;
1418   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1419   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1420   idx  = a->j;
1421   v    = a->a;
1422   ii   = a->i;
1423 
1424   for (i=0; i<m; i++) {
1425     jrow  = ii[i];
1426     n     = ii[i+1] - jrow;
1427     sum1  = 0.0;
1428     sum2  = 0.0;
1429     sum3  = 0.0;
1430     sum4  = 0.0;
1431     sum5  = 0.0;
1432     sum6  = 0.0;
1433     sum7  = 0.0;
1434     sum8  = 0.0;
1435     sum9  = 0.0;
1436 
1437     nonzerorow += (n>0);
1438     for (j=0; j<n; j++) {
1439       sum1 += v[jrow]*x[9*idx[jrow]];
1440       sum2 += v[jrow]*x[9*idx[jrow]+1];
1441       sum3 += v[jrow]*x[9*idx[jrow]+2];
1442       sum4 += v[jrow]*x[9*idx[jrow]+3];
1443       sum5 += v[jrow]*x[9*idx[jrow]+4];
1444       sum6 += v[jrow]*x[9*idx[jrow]+5];
1445       sum7 += v[jrow]*x[9*idx[jrow]+6];
1446       sum8 += v[jrow]*x[9*idx[jrow]+7];
1447       sum9 += v[jrow]*x[9*idx[jrow]+8];
1448       jrow++;
1449     }
1450     y[9*i]   = sum1;
1451     y[9*i+1] = sum2;
1452     y[9*i+2] = sum3;
1453     y[9*i+3] = sum4;
1454     y[9*i+4] = sum5;
1455     y[9*i+5] = sum6;
1456     y[9*i+6] = sum7;
1457     y[9*i+7] = sum8;
1458     y[9*i+8] = sum9;
1459   }
1460 
1461   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1462   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1463   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 /* ------------------------------------------------------------------------------*/
1468 
1469 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1470 {
1471   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1472   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1473   const PetscScalar *x,*v;
1474   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1475   PetscErrorCode    ierr;
1476   const PetscInt    m = b->AIJ->rmap->n,*idx;
1477   PetscInt          n,i;
1478 
1479   PetscFunctionBegin;
1480   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1481   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1482   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1483 
1484   for (i=0; i<m; i++) {
1485     idx    = a->j + a->i[i];
1486     v      = a->a + a->i[i];
1487     n      = a->i[i+1] - a->i[i];
1488     alpha1 = x[9*i];
1489     alpha2 = x[9*i+1];
1490     alpha3 = x[9*i+2];
1491     alpha4 = x[9*i+3];
1492     alpha5 = x[9*i+4];
1493     alpha6 = x[9*i+5];
1494     alpha7 = x[9*i+6];
1495     alpha8 = x[9*i+7];
1496     alpha9 = x[9*i+8];
1497     while (n-->0) {
1498       y[9*(*idx)]   += alpha1*(*v);
1499       y[9*(*idx)+1] += alpha2*(*v);
1500       y[9*(*idx)+2] += alpha3*(*v);
1501       y[9*(*idx)+3] += alpha4*(*v);
1502       y[9*(*idx)+4] += alpha5*(*v);
1503       y[9*(*idx)+5] += alpha6*(*v);
1504       y[9*(*idx)+6] += alpha7*(*v);
1505       y[9*(*idx)+7] += alpha8*(*v);
1506       y[9*(*idx)+8] += alpha9*(*v);
1507       idx++; v++;
1508     }
1509   }
1510   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1511   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1512   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1517 {
1518   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1519   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1520   const PetscScalar *x,*v;
1521   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1522   PetscErrorCode    ierr;
1523   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1524   PetscInt          n,i,jrow,j;
1525 
1526   PetscFunctionBegin;
1527   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1528   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1529   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1530   idx  = a->j;
1531   v    = a->a;
1532   ii   = a->i;
1533 
1534   for (i=0; i<m; i++) {
1535     jrow = ii[i];
1536     n    = ii[i+1] - jrow;
1537     sum1 = 0.0;
1538     sum2 = 0.0;
1539     sum3 = 0.0;
1540     sum4 = 0.0;
1541     sum5 = 0.0;
1542     sum6 = 0.0;
1543     sum7 = 0.0;
1544     sum8 = 0.0;
1545     sum9 = 0.0;
1546     for (j=0; j<n; j++) {
1547       sum1 += v[jrow]*x[9*idx[jrow]];
1548       sum2 += v[jrow]*x[9*idx[jrow]+1];
1549       sum3 += v[jrow]*x[9*idx[jrow]+2];
1550       sum4 += v[jrow]*x[9*idx[jrow]+3];
1551       sum5 += v[jrow]*x[9*idx[jrow]+4];
1552       sum6 += v[jrow]*x[9*idx[jrow]+5];
1553       sum7 += v[jrow]*x[9*idx[jrow]+6];
1554       sum8 += v[jrow]*x[9*idx[jrow]+7];
1555       sum9 += v[jrow]*x[9*idx[jrow]+8];
1556       jrow++;
1557     }
1558     y[9*i]   += sum1;
1559     y[9*i+1] += sum2;
1560     y[9*i+2] += sum3;
1561     y[9*i+3] += sum4;
1562     y[9*i+4] += sum5;
1563     y[9*i+5] += sum6;
1564     y[9*i+6] += sum7;
1565     y[9*i+7] += sum8;
1566     y[9*i+8] += sum9;
1567   }
1568 
1569   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1570   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1571   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1576 {
1577   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1578   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1579   const PetscScalar *x,*v;
1580   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1581   PetscErrorCode    ierr;
1582   const PetscInt    m = b->AIJ->rmap->n,*idx;
1583   PetscInt          n,i;
1584 
1585   PetscFunctionBegin;
1586   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1587   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1588   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1589   for (i=0; i<m; i++) {
1590     idx    = a->j + a->i[i];
1591     v      = a->a + a->i[i];
1592     n      = a->i[i+1] - a->i[i];
1593     alpha1 = x[9*i];
1594     alpha2 = x[9*i+1];
1595     alpha3 = x[9*i+2];
1596     alpha4 = x[9*i+3];
1597     alpha5 = x[9*i+4];
1598     alpha6 = x[9*i+5];
1599     alpha7 = x[9*i+6];
1600     alpha8 = x[9*i+7];
1601     alpha9 = x[9*i+8];
1602     while (n-->0) {
1603       y[9*(*idx)]   += alpha1*(*v);
1604       y[9*(*idx)+1] += alpha2*(*v);
1605       y[9*(*idx)+2] += alpha3*(*v);
1606       y[9*(*idx)+3] += alpha4*(*v);
1607       y[9*(*idx)+4] += alpha5*(*v);
1608       y[9*(*idx)+5] += alpha6*(*v);
1609       y[9*(*idx)+6] += alpha7*(*v);
1610       y[9*(*idx)+7] += alpha8*(*v);
1611       y[9*(*idx)+8] += alpha9*(*v);
1612       idx++; v++;
1613     }
1614   }
1615   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1616   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1617   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1621 {
1622   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1623   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1624   const PetscScalar *x,*v;
1625   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626   PetscErrorCode    ierr;
1627   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1628   PetscInt          nonzerorow=0,n,i,jrow,j;
1629 
1630   PetscFunctionBegin;
1631   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1632   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1633   idx  = a->j;
1634   v    = a->a;
1635   ii   = a->i;
1636 
1637   for (i=0; i<m; i++) {
1638     jrow  = ii[i];
1639     n     = ii[i+1] - jrow;
1640     sum1  = 0.0;
1641     sum2  = 0.0;
1642     sum3  = 0.0;
1643     sum4  = 0.0;
1644     sum5  = 0.0;
1645     sum6  = 0.0;
1646     sum7  = 0.0;
1647     sum8  = 0.0;
1648     sum9  = 0.0;
1649     sum10 = 0.0;
1650 
1651     nonzerorow += (n>0);
1652     for (j=0; j<n; j++) {
1653       sum1  += v[jrow]*x[10*idx[jrow]];
1654       sum2  += v[jrow]*x[10*idx[jrow]+1];
1655       sum3  += v[jrow]*x[10*idx[jrow]+2];
1656       sum4  += v[jrow]*x[10*idx[jrow]+3];
1657       sum5  += v[jrow]*x[10*idx[jrow]+4];
1658       sum6  += v[jrow]*x[10*idx[jrow]+5];
1659       sum7  += v[jrow]*x[10*idx[jrow]+6];
1660       sum8  += v[jrow]*x[10*idx[jrow]+7];
1661       sum9  += v[jrow]*x[10*idx[jrow]+8];
1662       sum10 += v[jrow]*x[10*idx[jrow]+9];
1663       jrow++;
1664     }
1665     y[10*i]   = sum1;
1666     y[10*i+1] = sum2;
1667     y[10*i+2] = sum3;
1668     y[10*i+3] = sum4;
1669     y[10*i+4] = sum5;
1670     y[10*i+5] = sum6;
1671     y[10*i+6] = sum7;
1672     y[10*i+7] = sum8;
1673     y[10*i+8] = sum9;
1674     y[10*i+9] = sum10;
1675   }
1676 
1677   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1678   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1679   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1684 {
1685   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1686   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1687   const PetscScalar *x,*v;
1688   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689   PetscErrorCode    ierr;
1690   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1691   PetscInt          n,i,jrow,j;
1692 
1693   PetscFunctionBegin;
1694   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1695   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1696   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1697   idx  = a->j;
1698   v    = a->a;
1699   ii   = a->i;
1700 
1701   for (i=0; i<m; i++) {
1702     jrow  = ii[i];
1703     n     = ii[i+1] - jrow;
1704     sum1  = 0.0;
1705     sum2  = 0.0;
1706     sum3  = 0.0;
1707     sum4  = 0.0;
1708     sum5  = 0.0;
1709     sum6  = 0.0;
1710     sum7  = 0.0;
1711     sum8  = 0.0;
1712     sum9  = 0.0;
1713     sum10 = 0.0;
1714     for (j=0; j<n; j++) {
1715       sum1  += v[jrow]*x[10*idx[jrow]];
1716       sum2  += v[jrow]*x[10*idx[jrow]+1];
1717       sum3  += v[jrow]*x[10*idx[jrow]+2];
1718       sum4  += v[jrow]*x[10*idx[jrow]+3];
1719       sum5  += v[jrow]*x[10*idx[jrow]+4];
1720       sum6  += v[jrow]*x[10*idx[jrow]+5];
1721       sum7  += v[jrow]*x[10*idx[jrow]+6];
1722       sum8  += v[jrow]*x[10*idx[jrow]+7];
1723       sum9  += v[jrow]*x[10*idx[jrow]+8];
1724       sum10 += v[jrow]*x[10*idx[jrow]+9];
1725       jrow++;
1726     }
1727     y[10*i]   += sum1;
1728     y[10*i+1] += sum2;
1729     y[10*i+2] += sum3;
1730     y[10*i+3] += sum4;
1731     y[10*i+4] += sum5;
1732     y[10*i+5] += sum6;
1733     y[10*i+6] += sum7;
1734     y[10*i+7] += sum8;
1735     y[10*i+8] += sum9;
1736     y[10*i+9] += sum10;
1737   }
1738 
1739   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1740   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1741   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1746 {
1747   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749   const PetscScalar *x,*v;
1750   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1751   PetscErrorCode    ierr;
1752   const PetscInt    m = b->AIJ->rmap->n,*idx;
1753   PetscInt          n,i;
1754 
1755   PetscFunctionBegin;
1756   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1757   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1758   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1759 
1760   for (i=0; i<m; i++) {
1761     idx     = a->j + a->i[i];
1762     v       = a->a + a->i[i];
1763     n       = a->i[i+1] - a->i[i];
1764     alpha1  = x[10*i];
1765     alpha2  = x[10*i+1];
1766     alpha3  = x[10*i+2];
1767     alpha4  = x[10*i+3];
1768     alpha5  = x[10*i+4];
1769     alpha6  = x[10*i+5];
1770     alpha7  = x[10*i+6];
1771     alpha8  = x[10*i+7];
1772     alpha9  = x[10*i+8];
1773     alpha10 = x[10*i+9];
1774     while (n-->0) {
1775       y[10*(*idx)]   += alpha1*(*v);
1776       y[10*(*idx)+1] += alpha2*(*v);
1777       y[10*(*idx)+2] += alpha3*(*v);
1778       y[10*(*idx)+3] += alpha4*(*v);
1779       y[10*(*idx)+4] += alpha5*(*v);
1780       y[10*(*idx)+5] += alpha6*(*v);
1781       y[10*(*idx)+6] += alpha7*(*v);
1782       y[10*(*idx)+7] += alpha8*(*v);
1783       y[10*(*idx)+8] += alpha9*(*v);
1784       y[10*(*idx)+9] += alpha10*(*v);
1785       idx++; v++;
1786     }
1787   }
1788   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1789   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1790   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1795 {
1796   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1797   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1798   const PetscScalar *x,*v;
1799   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1800   PetscErrorCode    ierr;
1801   const PetscInt    m = b->AIJ->rmap->n,*idx;
1802   PetscInt          n,i;
1803 
1804   PetscFunctionBegin;
1805   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1806   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1807   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1808   for (i=0; i<m; i++) {
1809     idx     = a->j + a->i[i];
1810     v       = a->a + a->i[i];
1811     n       = a->i[i+1] - a->i[i];
1812     alpha1  = x[10*i];
1813     alpha2  = x[10*i+1];
1814     alpha3  = x[10*i+2];
1815     alpha4  = x[10*i+3];
1816     alpha5  = x[10*i+4];
1817     alpha6  = x[10*i+5];
1818     alpha7  = x[10*i+6];
1819     alpha8  = x[10*i+7];
1820     alpha9  = x[10*i+8];
1821     alpha10 = x[10*i+9];
1822     while (n-->0) {
1823       y[10*(*idx)]   += alpha1*(*v);
1824       y[10*(*idx)+1] += alpha2*(*v);
1825       y[10*(*idx)+2] += alpha3*(*v);
1826       y[10*(*idx)+3] += alpha4*(*v);
1827       y[10*(*idx)+4] += alpha5*(*v);
1828       y[10*(*idx)+5] += alpha6*(*v);
1829       y[10*(*idx)+6] += alpha7*(*v);
1830       y[10*(*idx)+7] += alpha8*(*v);
1831       y[10*(*idx)+8] += alpha9*(*v);
1832       y[10*(*idx)+9] += alpha10*(*v);
1833       idx++; v++;
1834     }
1835   }
1836   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1837   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1838   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 
1843 /*--------------------------------------------------------------------------------------------*/
1844 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1845 {
1846   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1847   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1848   const PetscScalar *x,*v;
1849   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1850   PetscErrorCode    ierr;
1851   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1852   PetscInt          nonzerorow=0,n,i,jrow,j;
1853 
1854   PetscFunctionBegin;
1855   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1856   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1857   idx  = a->j;
1858   v    = a->a;
1859   ii   = a->i;
1860 
1861   for (i=0; i<m; i++) {
1862     jrow  = ii[i];
1863     n     = ii[i+1] - jrow;
1864     sum1  = 0.0;
1865     sum2  = 0.0;
1866     sum3  = 0.0;
1867     sum4  = 0.0;
1868     sum5  = 0.0;
1869     sum6  = 0.0;
1870     sum7  = 0.0;
1871     sum8  = 0.0;
1872     sum9  = 0.0;
1873     sum10 = 0.0;
1874     sum11 = 0.0;
1875 
1876     nonzerorow += (n>0);
1877     for (j=0; j<n; j++) {
1878       sum1  += v[jrow]*x[11*idx[jrow]];
1879       sum2  += v[jrow]*x[11*idx[jrow]+1];
1880       sum3  += v[jrow]*x[11*idx[jrow]+2];
1881       sum4  += v[jrow]*x[11*idx[jrow]+3];
1882       sum5  += v[jrow]*x[11*idx[jrow]+4];
1883       sum6  += v[jrow]*x[11*idx[jrow]+5];
1884       sum7  += v[jrow]*x[11*idx[jrow]+6];
1885       sum8  += v[jrow]*x[11*idx[jrow]+7];
1886       sum9  += v[jrow]*x[11*idx[jrow]+8];
1887       sum10 += v[jrow]*x[11*idx[jrow]+9];
1888       sum11 += v[jrow]*x[11*idx[jrow]+10];
1889       jrow++;
1890     }
1891     y[11*i]    = sum1;
1892     y[11*i+1]  = sum2;
1893     y[11*i+2]  = sum3;
1894     y[11*i+3]  = sum4;
1895     y[11*i+4]  = sum5;
1896     y[11*i+5]  = sum6;
1897     y[11*i+6]  = sum7;
1898     y[11*i+7]  = sum8;
1899     y[11*i+8]  = sum9;
1900     y[11*i+9]  = sum10;
1901     y[11*i+10] = sum11;
1902   }
1903 
1904   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1905   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1906   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1911 {
1912   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1913   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1914   const PetscScalar *x,*v;
1915   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1916   PetscErrorCode    ierr;
1917   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1918   PetscInt          n,i,jrow,j;
1919 
1920   PetscFunctionBegin;
1921   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1922   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1923   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1924   idx  = a->j;
1925   v    = a->a;
1926   ii   = a->i;
1927 
1928   for (i=0; i<m; i++) {
1929     jrow  = ii[i];
1930     n     = ii[i+1] - jrow;
1931     sum1  = 0.0;
1932     sum2  = 0.0;
1933     sum3  = 0.0;
1934     sum4  = 0.0;
1935     sum5  = 0.0;
1936     sum6  = 0.0;
1937     sum7  = 0.0;
1938     sum8  = 0.0;
1939     sum9  = 0.0;
1940     sum10 = 0.0;
1941     sum11 = 0.0;
1942     for (j=0; j<n; j++) {
1943       sum1  += v[jrow]*x[11*idx[jrow]];
1944       sum2  += v[jrow]*x[11*idx[jrow]+1];
1945       sum3  += v[jrow]*x[11*idx[jrow]+2];
1946       sum4  += v[jrow]*x[11*idx[jrow]+3];
1947       sum5  += v[jrow]*x[11*idx[jrow]+4];
1948       sum6  += v[jrow]*x[11*idx[jrow]+5];
1949       sum7  += v[jrow]*x[11*idx[jrow]+6];
1950       sum8  += v[jrow]*x[11*idx[jrow]+7];
1951       sum9  += v[jrow]*x[11*idx[jrow]+8];
1952       sum10 += v[jrow]*x[11*idx[jrow]+9];
1953       sum11 += v[jrow]*x[11*idx[jrow]+10];
1954       jrow++;
1955     }
1956     y[11*i]    += sum1;
1957     y[11*i+1]  += sum2;
1958     y[11*i+2]  += sum3;
1959     y[11*i+3]  += sum4;
1960     y[11*i+4]  += sum5;
1961     y[11*i+5]  += sum6;
1962     y[11*i+6]  += sum7;
1963     y[11*i+7]  += sum8;
1964     y[11*i+8]  += sum9;
1965     y[11*i+9]  += sum10;
1966     y[11*i+10] += sum11;
1967   }
1968 
1969   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
1970   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1971   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1976 {
1977   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1979   const PetscScalar *x,*v;
1980   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1981   PetscErrorCode    ierr;
1982   const PetscInt    m = b->AIJ->rmap->n,*idx;
1983   PetscInt          n,i;
1984 
1985   PetscFunctionBegin;
1986   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1987   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1988   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1989 
1990   for (i=0; i<m; i++) {
1991     idx     = a->j + a->i[i];
1992     v       = a->a + a->i[i];
1993     n       = a->i[i+1] - a->i[i];
1994     alpha1  = x[11*i];
1995     alpha2  = x[11*i+1];
1996     alpha3  = x[11*i+2];
1997     alpha4  = x[11*i+3];
1998     alpha5  = x[11*i+4];
1999     alpha6  = x[11*i+5];
2000     alpha7  = x[11*i+6];
2001     alpha8  = x[11*i+7];
2002     alpha9  = x[11*i+8];
2003     alpha10 = x[11*i+9];
2004     alpha11 = x[11*i+10];
2005     while (n-->0) {
2006       y[11*(*idx)]    += alpha1*(*v);
2007       y[11*(*idx)+1]  += alpha2*(*v);
2008       y[11*(*idx)+2]  += alpha3*(*v);
2009       y[11*(*idx)+3]  += alpha4*(*v);
2010       y[11*(*idx)+4]  += alpha5*(*v);
2011       y[11*(*idx)+5]  += alpha6*(*v);
2012       y[11*(*idx)+6]  += alpha7*(*v);
2013       y[11*(*idx)+7]  += alpha8*(*v);
2014       y[11*(*idx)+8]  += alpha9*(*v);
2015       y[11*(*idx)+9]  += alpha10*(*v);
2016       y[11*(*idx)+10] += alpha11*(*v);
2017       idx++; v++;
2018     }
2019   }
2020   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2021   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2022   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2027 {
2028   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2029   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2030   const PetscScalar *x,*v;
2031   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2032   PetscErrorCode    ierr;
2033   const PetscInt    m = b->AIJ->rmap->n,*idx;
2034   PetscInt          n,i;
2035 
2036   PetscFunctionBegin;
2037   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2038   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2039   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2040   for (i=0; i<m; i++) {
2041     idx     = a->j + a->i[i];
2042     v       = a->a + a->i[i];
2043     n       = a->i[i+1] - a->i[i];
2044     alpha1  = x[11*i];
2045     alpha2  = x[11*i+1];
2046     alpha3  = x[11*i+2];
2047     alpha4  = x[11*i+3];
2048     alpha5  = x[11*i+4];
2049     alpha6  = x[11*i+5];
2050     alpha7  = x[11*i+6];
2051     alpha8  = x[11*i+7];
2052     alpha9  = x[11*i+8];
2053     alpha10 = x[11*i+9];
2054     alpha11 = x[11*i+10];
2055     while (n-->0) {
2056       y[11*(*idx)]    += alpha1*(*v);
2057       y[11*(*idx)+1]  += alpha2*(*v);
2058       y[11*(*idx)+2]  += alpha3*(*v);
2059       y[11*(*idx)+3]  += alpha4*(*v);
2060       y[11*(*idx)+4]  += alpha5*(*v);
2061       y[11*(*idx)+5]  += alpha6*(*v);
2062       y[11*(*idx)+6]  += alpha7*(*v);
2063       y[11*(*idx)+7]  += alpha8*(*v);
2064       y[11*(*idx)+8]  += alpha9*(*v);
2065       y[11*(*idx)+9]  += alpha10*(*v);
2066       y[11*(*idx)+10] += alpha11*(*v);
2067       idx++; v++;
2068     }
2069   }
2070   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2071   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2072   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 
2077 /*--------------------------------------------------------------------------------------------*/
2078 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2079 {
2080   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2081   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2082   const PetscScalar *x,*v;
2083   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2084   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2085   PetscErrorCode    ierr;
2086   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2087   PetscInt          nonzerorow=0,n,i,jrow,j;
2088 
2089   PetscFunctionBegin;
2090   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2091   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2092   idx  = a->j;
2093   v    = a->a;
2094   ii   = a->i;
2095 
2096   for (i=0; i<m; i++) {
2097     jrow  = ii[i];
2098     n     = ii[i+1] - jrow;
2099     sum1  = 0.0;
2100     sum2  = 0.0;
2101     sum3  = 0.0;
2102     sum4  = 0.0;
2103     sum5  = 0.0;
2104     sum6  = 0.0;
2105     sum7  = 0.0;
2106     sum8  = 0.0;
2107     sum9  = 0.0;
2108     sum10 = 0.0;
2109     sum11 = 0.0;
2110     sum12 = 0.0;
2111     sum13 = 0.0;
2112     sum14 = 0.0;
2113     sum15 = 0.0;
2114     sum16 = 0.0;
2115 
2116     nonzerorow += (n>0);
2117     for (j=0; j<n; j++) {
2118       sum1  += v[jrow]*x[16*idx[jrow]];
2119       sum2  += v[jrow]*x[16*idx[jrow]+1];
2120       sum3  += v[jrow]*x[16*idx[jrow]+2];
2121       sum4  += v[jrow]*x[16*idx[jrow]+3];
2122       sum5  += v[jrow]*x[16*idx[jrow]+4];
2123       sum6  += v[jrow]*x[16*idx[jrow]+5];
2124       sum7  += v[jrow]*x[16*idx[jrow]+6];
2125       sum8  += v[jrow]*x[16*idx[jrow]+7];
2126       sum9  += v[jrow]*x[16*idx[jrow]+8];
2127       sum10 += v[jrow]*x[16*idx[jrow]+9];
2128       sum11 += v[jrow]*x[16*idx[jrow]+10];
2129       sum12 += v[jrow]*x[16*idx[jrow]+11];
2130       sum13 += v[jrow]*x[16*idx[jrow]+12];
2131       sum14 += v[jrow]*x[16*idx[jrow]+13];
2132       sum15 += v[jrow]*x[16*idx[jrow]+14];
2133       sum16 += v[jrow]*x[16*idx[jrow]+15];
2134       jrow++;
2135     }
2136     y[16*i]    = sum1;
2137     y[16*i+1]  = sum2;
2138     y[16*i+2]  = sum3;
2139     y[16*i+3]  = sum4;
2140     y[16*i+4]  = sum5;
2141     y[16*i+5]  = sum6;
2142     y[16*i+6]  = sum7;
2143     y[16*i+7]  = sum8;
2144     y[16*i+8]  = sum9;
2145     y[16*i+9]  = sum10;
2146     y[16*i+10] = sum11;
2147     y[16*i+11] = sum12;
2148     y[16*i+12] = sum13;
2149     y[16*i+13] = sum14;
2150     y[16*i+14] = sum15;
2151     y[16*i+15] = sum16;
2152   }
2153 
2154   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2155   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2156   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2161 {
2162   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2163   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2164   const PetscScalar *x,*v;
2165   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2166   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2167   PetscErrorCode    ierr;
2168   const PetscInt    m = b->AIJ->rmap->n,*idx;
2169   PetscInt          n,i;
2170 
2171   PetscFunctionBegin;
2172   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2173   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2174   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2175 
2176   for (i=0; i<m; i++) {
2177     idx     = a->j + a->i[i];
2178     v       = a->a + a->i[i];
2179     n       = a->i[i+1] - a->i[i];
2180     alpha1  = x[16*i];
2181     alpha2  = x[16*i+1];
2182     alpha3  = x[16*i+2];
2183     alpha4  = x[16*i+3];
2184     alpha5  = x[16*i+4];
2185     alpha6  = x[16*i+5];
2186     alpha7  = x[16*i+6];
2187     alpha8  = x[16*i+7];
2188     alpha9  = x[16*i+8];
2189     alpha10 = x[16*i+9];
2190     alpha11 = x[16*i+10];
2191     alpha12 = x[16*i+11];
2192     alpha13 = x[16*i+12];
2193     alpha14 = x[16*i+13];
2194     alpha15 = x[16*i+14];
2195     alpha16 = x[16*i+15];
2196     while (n-->0) {
2197       y[16*(*idx)]    += alpha1*(*v);
2198       y[16*(*idx)+1]  += alpha2*(*v);
2199       y[16*(*idx)+2]  += alpha3*(*v);
2200       y[16*(*idx)+3]  += alpha4*(*v);
2201       y[16*(*idx)+4]  += alpha5*(*v);
2202       y[16*(*idx)+5]  += alpha6*(*v);
2203       y[16*(*idx)+6]  += alpha7*(*v);
2204       y[16*(*idx)+7]  += alpha8*(*v);
2205       y[16*(*idx)+8]  += alpha9*(*v);
2206       y[16*(*idx)+9]  += alpha10*(*v);
2207       y[16*(*idx)+10] += alpha11*(*v);
2208       y[16*(*idx)+11] += alpha12*(*v);
2209       y[16*(*idx)+12] += alpha13*(*v);
2210       y[16*(*idx)+13] += alpha14*(*v);
2211       y[16*(*idx)+14] += alpha15*(*v);
2212       y[16*(*idx)+15] += alpha16*(*v);
2213       idx++; v++;
2214     }
2215   }
2216   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2217   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2218   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2223 {
2224   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2225   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2226   const PetscScalar *x,*v;
2227   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2228   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2229   PetscErrorCode    ierr;
2230   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2231   PetscInt          n,i,jrow,j;
2232 
2233   PetscFunctionBegin;
2234   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2235   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2236   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2237   idx  = a->j;
2238   v    = a->a;
2239   ii   = a->i;
2240 
2241   for (i=0; i<m; i++) {
2242     jrow  = ii[i];
2243     n     = ii[i+1] - jrow;
2244     sum1  = 0.0;
2245     sum2  = 0.0;
2246     sum3  = 0.0;
2247     sum4  = 0.0;
2248     sum5  = 0.0;
2249     sum6  = 0.0;
2250     sum7  = 0.0;
2251     sum8  = 0.0;
2252     sum9  = 0.0;
2253     sum10 = 0.0;
2254     sum11 = 0.0;
2255     sum12 = 0.0;
2256     sum13 = 0.0;
2257     sum14 = 0.0;
2258     sum15 = 0.0;
2259     sum16 = 0.0;
2260     for (j=0; j<n; j++) {
2261       sum1  += v[jrow]*x[16*idx[jrow]];
2262       sum2  += v[jrow]*x[16*idx[jrow]+1];
2263       sum3  += v[jrow]*x[16*idx[jrow]+2];
2264       sum4  += v[jrow]*x[16*idx[jrow]+3];
2265       sum5  += v[jrow]*x[16*idx[jrow]+4];
2266       sum6  += v[jrow]*x[16*idx[jrow]+5];
2267       sum7  += v[jrow]*x[16*idx[jrow]+6];
2268       sum8  += v[jrow]*x[16*idx[jrow]+7];
2269       sum9  += v[jrow]*x[16*idx[jrow]+8];
2270       sum10 += v[jrow]*x[16*idx[jrow]+9];
2271       sum11 += v[jrow]*x[16*idx[jrow]+10];
2272       sum12 += v[jrow]*x[16*idx[jrow]+11];
2273       sum13 += v[jrow]*x[16*idx[jrow]+12];
2274       sum14 += v[jrow]*x[16*idx[jrow]+13];
2275       sum15 += v[jrow]*x[16*idx[jrow]+14];
2276       sum16 += v[jrow]*x[16*idx[jrow]+15];
2277       jrow++;
2278     }
2279     y[16*i]    += sum1;
2280     y[16*i+1]  += sum2;
2281     y[16*i+2]  += sum3;
2282     y[16*i+3]  += sum4;
2283     y[16*i+4]  += sum5;
2284     y[16*i+5]  += sum6;
2285     y[16*i+6]  += sum7;
2286     y[16*i+7]  += sum8;
2287     y[16*i+8]  += sum9;
2288     y[16*i+9]  += sum10;
2289     y[16*i+10] += sum11;
2290     y[16*i+11] += sum12;
2291     y[16*i+12] += sum13;
2292     y[16*i+13] += sum14;
2293     y[16*i+14] += sum15;
2294     y[16*i+15] += sum16;
2295   }
2296 
2297   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2298   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2299   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304 {
2305   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2306   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307   const PetscScalar *x,*v;
2308   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2309   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2310   PetscErrorCode    ierr;
2311   const PetscInt    m = b->AIJ->rmap->n,*idx;
2312   PetscInt          n,i;
2313 
2314   PetscFunctionBegin;
2315   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2316   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2317   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2318   for (i=0; i<m; i++) {
2319     idx     = a->j + a->i[i];
2320     v       = a->a + a->i[i];
2321     n       = a->i[i+1] - a->i[i];
2322     alpha1  = x[16*i];
2323     alpha2  = x[16*i+1];
2324     alpha3  = x[16*i+2];
2325     alpha4  = x[16*i+3];
2326     alpha5  = x[16*i+4];
2327     alpha6  = x[16*i+5];
2328     alpha7  = x[16*i+6];
2329     alpha8  = x[16*i+7];
2330     alpha9  = x[16*i+8];
2331     alpha10 = x[16*i+9];
2332     alpha11 = x[16*i+10];
2333     alpha12 = x[16*i+11];
2334     alpha13 = x[16*i+12];
2335     alpha14 = x[16*i+13];
2336     alpha15 = x[16*i+14];
2337     alpha16 = x[16*i+15];
2338     while (n-->0) {
2339       y[16*(*idx)]    += alpha1*(*v);
2340       y[16*(*idx)+1]  += alpha2*(*v);
2341       y[16*(*idx)+2]  += alpha3*(*v);
2342       y[16*(*idx)+3]  += alpha4*(*v);
2343       y[16*(*idx)+4]  += alpha5*(*v);
2344       y[16*(*idx)+5]  += alpha6*(*v);
2345       y[16*(*idx)+6]  += alpha7*(*v);
2346       y[16*(*idx)+7]  += alpha8*(*v);
2347       y[16*(*idx)+8]  += alpha9*(*v);
2348       y[16*(*idx)+9]  += alpha10*(*v);
2349       y[16*(*idx)+10] += alpha11*(*v);
2350       y[16*(*idx)+11] += alpha12*(*v);
2351       y[16*(*idx)+12] += alpha13*(*v);
2352       y[16*(*idx)+13] += alpha14*(*v);
2353       y[16*(*idx)+14] += alpha15*(*v);
2354       y[16*(*idx)+15] += alpha16*(*v);
2355       idx++; v++;
2356     }
2357   }
2358   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2359   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2360   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 /*--------------------------------------------------------------------------------------------*/
2365 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2366 {
2367   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2368   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2369   const PetscScalar *x,*v;
2370   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2371   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2372   PetscErrorCode    ierr;
2373   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2374   PetscInt          nonzerorow=0,n,i,jrow,j;
2375 
2376   PetscFunctionBegin;
2377   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2378   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2379   idx  = a->j;
2380   v    = a->a;
2381   ii   = a->i;
2382 
2383   for (i=0; i<m; i++) {
2384     jrow  = ii[i];
2385     n     = ii[i+1] - jrow;
2386     sum1  = 0.0;
2387     sum2  = 0.0;
2388     sum3  = 0.0;
2389     sum4  = 0.0;
2390     sum5  = 0.0;
2391     sum6  = 0.0;
2392     sum7  = 0.0;
2393     sum8  = 0.0;
2394     sum9  = 0.0;
2395     sum10 = 0.0;
2396     sum11 = 0.0;
2397     sum12 = 0.0;
2398     sum13 = 0.0;
2399     sum14 = 0.0;
2400     sum15 = 0.0;
2401     sum16 = 0.0;
2402     sum17 = 0.0;
2403     sum18 = 0.0;
2404 
2405     nonzerorow += (n>0);
2406     for (j=0; j<n; j++) {
2407       sum1  += v[jrow]*x[18*idx[jrow]];
2408       sum2  += v[jrow]*x[18*idx[jrow]+1];
2409       sum3  += v[jrow]*x[18*idx[jrow]+2];
2410       sum4  += v[jrow]*x[18*idx[jrow]+3];
2411       sum5  += v[jrow]*x[18*idx[jrow]+4];
2412       sum6  += v[jrow]*x[18*idx[jrow]+5];
2413       sum7  += v[jrow]*x[18*idx[jrow]+6];
2414       sum8  += v[jrow]*x[18*idx[jrow]+7];
2415       sum9  += v[jrow]*x[18*idx[jrow]+8];
2416       sum10 += v[jrow]*x[18*idx[jrow]+9];
2417       sum11 += v[jrow]*x[18*idx[jrow]+10];
2418       sum12 += v[jrow]*x[18*idx[jrow]+11];
2419       sum13 += v[jrow]*x[18*idx[jrow]+12];
2420       sum14 += v[jrow]*x[18*idx[jrow]+13];
2421       sum15 += v[jrow]*x[18*idx[jrow]+14];
2422       sum16 += v[jrow]*x[18*idx[jrow]+15];
2423       sum17 += v[jrow]*x[18*idx[jrow]+16];
2424       sum18 += v[jrow]*x[18*idx[jrow]+17];
2425       jrow++;
2426     }
2427     y[18*i]    = sum1;
2428     y[18*i+1]  = sum2;
2429     y[18*i+2]  = sum3;
2430     y[18*i+3]  = sum4;
2431     y[18*i+4]  = sum5;
2432     y[18*i+5]  = sum6;
2433     y[18*i+6]  = sum7;
2434     y[18*i+7]  = sum8;
2435     y[18*i+8]  = sum9;
2436     y[18*i+9]  = sum10;
2437     y[18*i+10] = sum11;
2438     y[18*i+11] = sum12;
2439     y[18*i+12] = sum13;
2440     y[18*i+13] = sum14;
2441     y[18*i+14] = sum15;
2442     y[18*i+15] = sum16;
2443     y[18*i+16] = sum17;
2444     y[18*i+17] = sum18;
2445   }
2446 
2447   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2448   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2454 {
2455   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2456   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2457   const PetscScalar *x,*v;
2458   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2459   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2460   PetscErrorCode    ierr;
2461   const PetscInt    m = b->AIJ->rmap->n,*idx;
2462   PetscInt          n,i;
2463 
2464   PetscFunctionBegin;
2465   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2466   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2467   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2468 
2469   for (i=0; i<m; i++) {
2470     idx     = a->j + a->i[i];
2471     v       = a->a + a->i[i];
2472     n       = a->i[i+1] - a->i[i];
2473     alpha1  = x[18*i];
2474     alpha2  = x[18*i+1];
2475     alpha3  = x[18*i+2];
2476     alpha4  = x[18*i+3];
2477     alpha5  = x[18*i+4];
2478     alpha6  = x[18*i+5];
2479     alpha7  = x[18*i+6];
2480     alpha8  = x[18*i+7];
2481     alpha9  = x[18*i+8];
2482     alpha10 = x[18*i+9];
2483     alpha11 = x[18*i+10];
2484     alpha12 = x[18*i+11];
2485     alpha13 = x[18*i+12];
2486     alpha14 = x[18*i+13];
2487     alpha15 = x[18*i+14];
2488     alpha16 = x[18*i+15];
2489     alpha17 = x[18*i+16];
2490     alpha18 = x[18*i+17];
2491     while (n-->0) {
2492       y[18*(*idx)]    += alpha1*(*v);
2493       y[18*(*idx)+1]  += alpha2*(*v);
2494       y[18*(*idx)+2]  += alpha3*(*v);
2495       y[18*(*idx)+3]  += alpha4*(*v);
2496       y[18*(*idx)+4]  += alpha5*(*v);
2497       y[18*(*idx)+5]  += alpha6*(*v);
2498       y[18*(*idx)+6]  += alpha7*(*v);
2499       y[18*(*idx)+7]  += alpha8*(*v);
2500       y[18*(*idx)+8]  += alpha9*(*v);
2501       y[18*(*idx)+9]  += alpha10*(*v);
2502       y[18*(*idx)+10] += alpha11*(*v);
2503       y[18*(*idx)+11] += alpha12*(*v);
2504       y[18*(*idx)+12] += alpha13*(*v);
2505       y[18*(*idx)+13] += alpha14*(*v);
2506       y[18*(*idx)+14] += alpha15*(*v);
2507       y[18*(*idx)+15] += alpha16*(*v);
2508       y[18*(*idx)+16] += alpha17*(*v);
2509       y[18*(*idx)+17] += alpha18*(*v);
2510       idx++; v++;
2511     }
2512   }
2513   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2514   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2515   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2520 {
2521   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2522   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2523   const PetscScalar *x,*v;
2524   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2525   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2526   PetscErrorCode    ierr;
2527   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2528   PetscInt          n,i,jrow,j;
2529 
2530   PetscFunctionBegin;
2531   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2532   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2533   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2534   idx  = a->j;
2535   v    = a->a;
2536   ii   = a->i;
2537 
2538   for (i=0; i<m; i++) {
2539     jrow  = ii[i];
2540     n     = ii[i+1] - jrow;
2541     sum1  = 0.0;
2542     sum2  = 0.0;
2543     sum3  = 0.0;
2544     sum4  = 0.0;
2545     sum5  = 0.0;
2546     sum6  = 0.0;
2547     sum7  = 0.0;
2548     sum8  = 0.0;
2549     sum9  = 0.0;
2550     sum10 = 0.0;
2551     sum11 = 0.0;
2552     sum12 = 0.0;
2553     sum13 = 0.0;
2554     sum14 = 0.0;
2555     sum15 = 0.0;
2556     sum16 = 0.0;
2557     sum17 = 0.0;
2558     sum18 = 0.0;
2559     for (j=0; j<n; j++) {
2560       sum1  += v[jrow]*x[18*idx[jrow]];
2561       sum2  += v[jrow]*x[18*idx[jrow]+1];
2562       sum3  += v[jrow]*x[18*idx[jrow]+2];
2563       sum4  += v[jrow]*x[18*idx[jrow]+3];
2564       sum5  += v[jrow]*x[18*idx[jrow]+4];
2565       sum6  += v[jrow]*x[18*idx[jrow]+5];
2566       sum7  += v[jrow]*x[18*idx[jrow]+6];
2567       sum8  += v[jrow]*x[18*idx[jrow]+7];
2568       sum9  += v[jrow]*x[18*idx[jrow]+8];
2569       sum10 += v[jrow]*x[18*idx[jrow]+9];
2570       sum11 += v[jrow]*x[18*idx[jrow]+10];
2571       sum12 += v[jrow]*x[18*idx[jrow]+11];
2572       sum13 += v[jrow]*x[18*idx[jrow]+12];
2573       sum14 += v[jrow]*x[18*idx[jrow]+13];
2574       sum15 += v[jrow]*x[18*idx[jrow]+14];
2575       sum16 += v[jrow]*x[18*idx[jrow]+15];
2576       sum17 += v[jrow]*x[18*idx[jrow]+16];
2577       sum18 += v[jrow]*x[18*idx[jrow]+17];
2578       jrow++;
2579     }
2580     y[18*i]    += sum1;
2581     y[18*i+1]  += sum2;
2582     y[18*i+2]  += sum3;
2583     y[18*i+3]  += sum4;
2584     y[18*i+4]  += sum5;
2585     y[18*i+5]  += sum6;
2586     y[18*i+6]  += sum7;
2587     y[18*i+7]  += sum8;
2588     y[18*i+8]  += sum9;
2589     y[18*i+9]  += sum10;
2590     y[18*i+10] += sum11;
2591     y[18*i+11] += sum12;
2592     y[18*i+12] += sum13;
2593     y[18*i+13] += sum14;
2594     y[18*i+14] += sum15;
2595     y[18*i+15] += sum16;
2596     y[18*i+16] += sum17;
2597     y[18*i+17] += sum18;
2598   }
2599 
2600   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2601   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2602   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607 {
2608   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2609   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2610   const PetscScalar *x,*v;
2611   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2612   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2613   PetscErrorCode    ierr;
2614   const PetscInt    m = b->AIJ->rmap->n,*idx;
2615   PetscInt          n,i;
2616 
2617   PetscFunctionBegin;
2618   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2619   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2620   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2621   for (i=0; i<m; i++) {
2622     idx     = a->j + a->i[i];
2623     v       = a->a + a->i[i];
2624     n       = a->i[i+1] - a->i[i];
2625     alpha1  = x[18*i];
2626     alpha2  = x[18*i+1];
2627     alpha3  = x[18*i+2];
2628     alpha4  = x[18*i+3];
2629     alpha5  = x[18*i+4];
2630     alpha6  = x[18*i+5];
2631     alpha7  = x[18*i+6];
2632     alpha8  = x[18*i+7];
2633     alpha9  = x[18*i+8];
2634     alpha10 = x[18*i+9];
2635     alpha11 = x[18*i+10];
2636     alpha12 = x[18*i+11];
2637     alpha13 = x[18*i+12];
2638     alpha14 = x[18*i+13];
2639     alpha15 = x[18*i+14];
2640     alpha16 = x[18*i+15];
2641     alpha17 = x[18*i+16];
2642     alpha18 = x[18*i+17];
2643     while (n-->0) {
2644       y[18*(*idx)]    += alpha1*(*v);
2645       y[18*(*idx)+1]  += alpha2*(*v);
2646       y[18*(*idx)+2]  += alpha3*(*v);
2647       y[18*(*idx)+3]  += alpha4*(*v);
2648       y[18*(*idx)+4]  += alpha5*(*v);
2649       y[18*(*idx)+5]  += alpha6*(*v);
2650       y[18*(*idx)+6]  += alpha7*(*v);
2651       y[18*(*idx)+7]  += alpha8*(*v);
2652       y[18*(*idx)+8]  += alpha9*(*v);
2653       y[18*(*idx)+9]  += alpha10*(*v);
2654       y[18*(*idx)+10] += alpha11*(*v);
2655       y[18*(*idx)+11] += alpha12*(*v);
2656       y[18*(*idx)+12] += alpha13*(*v);
2657       y[18*(*idx)+13] += alpha14*(*v);
2658       y[18*(*idx)+14] += alpha15*(*v);
2659       y[18*(*idx)+15] += alpha16*(*v);
2660       y[18*(*idx)+16] += alpha17*(*v);
2661       y[18*(*idx)+17] += alpha18*(*v);
2662       idx++; v++;
2663     }
2664   }
2665   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2666   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2667   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2672 {
2673   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2674   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2675   const PetscScalar *x,*v;
2676   PetscScalar       *y,*sums;
2677   PetscErrorCode    ierr;
2678   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2679   PetscInt          n,i,jrow,j,dof = b->dof,k;
2680 
2681   PetscFunctionBegin;
2682   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2683   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2684   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2685   idx  = a->j;
2686   v    = a->a;
2687   ii   = a->i;
2688 
2689   for (i=0; i<m; i++) {
2690     jrow = ii[i];
2691     n    = ii[i+1] - jrow;
2692     sums = y + dof*i;
2693     for (j=0; j<n; j++) {
2694       for (k=0; k<dof; k++) {
2695         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2696       }
2697       jrow++;
2698     }
2699   }
2700 
2701   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2702   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2703   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2708 {
2709   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2710   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2711   const PetscScalar *x,*v;
2712   PetscScalar       *y,*sums;
2713   PetscErrorCode    ierr;
2714   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2715   PetscInt          n,i,jrow,j,dof = b->dof,k;
2716 
2717   PetscFunctionBegin;
2718   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2719   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2720   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2721   idx  = a->j;
2722   v    = a->a;
2723   ii   = a->i;
2724 
2725   for (i=0; i<m; i++) {
2726     jrow = ii[i];
2727     n    = ii[i+1] - jrow;
2728     sums = y + dof*i;
2729     for (j=0; j<n; j++) {
2730       for (k=0; k<dof; k++) {
2731         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2732       }
2733       jrow++;
2734     }
2735   }
2736 
2737   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2738   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2739   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2744 {
2745   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2746   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2747   const PetscScalar *x,*v,*alpha;
2748   PetscScalar       *y;
2749   PetscErrorCode    ierr;
2750   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2751   PetscInt          n,i,k;
2752 
2753   PetscFunctionBegin;
2754   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2755   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2756   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2757   for (i=0; i<m; i++) {
2758     idx   = a->j + a->i[i];
2759     v     = a->a + a->i[i];
2760     n     = a->i[i+1] - a->i[i];
2761     alpha = x + dof*i;
2762     while (n-->0) {
2763       for (k=0; k<dof; k++) {
2764         y[dof*(*idx)+k] += alpha[k]*(*v);
2765       }
2766       idx++; v++;
2767     }
2768   }
2769   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2770   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2771   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2776 {
2777   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2778   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2779   const PetscScalar *x,*v,*alpha;
2780   PetscScalar       *y;
2781   PetscErrorCode    ierr;
2782   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2783   PetscInt          n,i,k;
2784 
2785   PetscFunctionBegin;
2786   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2787   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2788   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2789   for (i=0; i<m; i++) {
2790     idx   = a->j + a->i[i];
2791     v     = a->a + a->i[i];
2792     n     = a->i[i+1] - a->i[i];
2793     alpha = x + dof*i;
2794     while (n-->0) {
2795       for (k=0; k<dof; k++) {
2796         y[dof*(*idx)+k] += alpha[k]*(*v);
2797       }
2798       idx++; v++;
2799     }
2800   }
2801   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2802   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2803   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 /*===================================================================================*/
2808 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2809 {
2810   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   /* start the scatter */
2815   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2816   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2817   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2818   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2823 {
2824   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2829   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2830   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2831   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2836 {
2837   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2838   PetscErrorCode ierr;
2839 
2840   PetscFunctionBegin;
2841   /* start the scatter */
2842   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2843   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2844   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2850 {
2851   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2852   PetscErrorCode ierr;
2853 
2854   PetscFunctionBegin;
2855   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2856   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2857   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2858   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 /* ----------------------------------------------------------------*/
2863 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2864 {
2865   PetscErrorCode     ierr;
2866   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2867   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2868   Mat                P         =pp->AIJ;
2869   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2870   PetscInt           *pti,*ptj,*ptJ;
2871   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2872   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2873   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2874   MatScalar          *ca;
2875   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2876 
2877   PetscFunctionBegin;
2878   /* Get ij structure of P^T */
2879   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2880 
2881   cn = pn*ppdof;
2882   /* Allocate ci array, arrays for fill computation and */
2883   /* free space for accumulating nonzero column info */
2884   ierr  = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr);
2885   ci[0] = 0;
2886 
2887   /* Work arrays for rows of P^T*A */
2888   ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr);
2889   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
2890   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
2891 
2892   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2893   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2894   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2895   ierr          = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr);
2896   current_space = free_space;
2897 
2898   /* Determine symbolic info for each row of C: */
2899   for (i=0; i<pn; i++) {
2900     ptnzi = pti[i+1] - pti[i];
2901     ptJ   = ptj + pti[i];
2902     for (dof=0; dof<ppdof; dof++) {
2903       ptanzi = 0;
2904       /* Determine symbolic row of PtA: */
2905       for (j=0; j<ptnzi; j++) {
2906         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2907         arow = ptJ[j]*ppdof + dof;
2908         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2909         anzj = ai[arow+1] - ai[arow];
2910         ajj  = aj + ai[arow];
2911         for (k=0; k<anzj; k++) {
2912           if (!ptadenserow[ajj[k]]) {
2913             ptadenserow[ajj[k]]    = -1;
2914             ptasparserow[ptanzi++] = ajj[k];
2915           }
2916         }
2917       }
2918       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2919       ptaj = ptasparserow;
2920       cnzi = 0;
2921       for (j=0; j<ptanzi; j++) {
2922         /* Get offset within block of P */
2923         pshift = *ptaj%ppdof;
2924         /* Get block row of P */
2925         prow = (*ptaj++)/ppdof; /* integer division */
2926         /* P has same number of nonzeros per row as the compressed form */
2927         pnzj = pi[prow+1] - pi[prow];
2928         pjj  = pj + pi[prow];
2929         for (k=0;k<pnzj;k++) {
2930           /* Locations in C are shifted by the offset within the block */
2931           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2932           if (!denserow[pjj[k]*ppdof+pshift]) {
2933             denserow[pjj[k]*ppdof+pshift] = -1;
2934             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2935           }
2936         }
2937       }
2938 
2939       /* sort sparserow */
2940       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2941 
2942       /* If free space is not available, make more free space */
2943       /* Double the amount of total space in the list */
2944       if (current_space->local_remaining<cnzi) {
2945         ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
2946       }
2947 
2948       /* Copy data into free space, and zero out denserows */
2949       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2950 
2951       current_space->array           += cnzi;
2952       current_space->local_used      += cnzi;
2953       current_space->local_remaining -= cnzi;
2954 
2955       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2956       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2957 
2958       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2959       /*        For now, we will recompute what is needed. */
2960       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2961     }
2962   }
2963   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2964   /* Allocate space for cj, initialize cj, and */
2965   /* destroy list of free space and other temporary array(s) */
2966   ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr);
2967   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2968   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
2969 
2970   /* Allocate space for ca */
2971   ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr);
2972 
2973   /* put together the new matrix */
2974   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2975   ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr);
2976 
2977   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2978   /* Since these are PETSc arrays, change flags to free them as necessary. */
2979   c          = (Mat_SeqAIJ*)((*C)->data);
2980   c->free_a  = PETSC_TRUE;
2981   c->free_ij = PETSC_TRUE;
2982   c->nonew   = 0;
2983 
2984   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2985 
2986   /* Clean up. */
2987   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2992 {
2993   /* This routine requires testing -- first draft only */
2994   PetscErrorCode  ierr;
2995   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
2996   Mat             P  =pp->AIJ;
2997   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
2998   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
2999   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3000   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3001   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3002   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3003   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3004   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3005   MatScalar       *ca=c->a,*caj,*apa;
3006 
3007   PetscFunctionBegin;
3008   /* Allocate temporary array for storage of one row of A*P */
3009   ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr);
3010 
3011   /* Clear old values in C */
3012   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3013 
3014   for (i=0; i<am; i++) {
3015     /* Form sparse row of A*P */
3016     anzi  = ai[i+1] - ai[i];
3017     apnzj = 0;
3018     for (j=0; j<anzi; j++) {
3019       /* Get offset within block of P */
3020       pshift = *aj%ppdof;
3021       /* Get block row of P */
3022       prow = *aj++/ppdof;   /* integer division */
3023       pnzj = pi[prow+1] - pi[prow];
3024       pjj  = pj + pi[prow];
3025       paj  = pa + pi[prow];
3026       for (k=0; k<pnzj; k++) {
3027         poffset = pjj[k]*ppdof+pshift;
3028         if (!apjdense[poffset]) {
3029           apjdense[poffset] = -1;
3030           apj[apnzj++]      = poffset;
3031         }
3032         apa[poffset] += (*aa)*paj[k];
3033       }
3034       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3035       aa++;
3036     }
3037 
3038     /* Sort the j index array for quick sparse axpy. */
3039     /* Note: a array does not need sorting as it is in dense storage locations. */
3040     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3041 
3042     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3043     prow    = i/ppdof; /* integer division */
3044     pshift  = i%ppdof;
3045     poffset = pi[prow];
3046     pnzi    = pi[prow+1] - poffset;
3047     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3048     pJ = pj+poffset;
3049     pA = pa+poffset;
3050     for (j=0; j<pnzi; j++) {
3051       crow = (*pJ)*ppdof+pshift;
3052       cjj  = cj + ci[crow];
3053       caj  = ca + ci[crow];
3054       pJ++;
3055       /* Perform sparse axpy operation.  Note cjj includes apj. */
3056       for (k=0,nextap=0; nextap<apnzj; k++) {
3057         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3058       }
3059       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3060       pA++;
3061     }
3062 
3063     /* Zero the current row info for A*P */
3064     for (j=0; j<apnzj; j++) {
3065       apa[apj[j]]      = 0.;
3066       apjdense[apj[j]] = 0;
3067     }
3068   }
3069 
3070   /* Assemble the final matrix and clean up */
3071   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3072   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3078 {
3079   PetscErrorCode ierr;
3080 
3081   PetscFunctionBegin;
3082   if (scall == MAT_INITIAL_MATRIX) {
3083     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3084     ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr);
3085     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3086   }
3087   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3088   ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr);
3089   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3094 {
3095   PetscErrorCode ierr;
3096 
3097   PetscFunctionBegin;
3098   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3099   ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr);
3100   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3105 {
3106   PetscFunctionBegin;
3107   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3112 {
3113   PetscErrorCode ierr;
3114 
3115   PetscFunctionBegin;
3116   if (scall == MAT_INITIAL_MATRIX) {
3117     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3118     ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr);
3119     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
3120   }
3121   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3122   ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr);
3123   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
3124   PetscFunctionReturn(0);
3125 }
3126 
3127 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3128 {
3129   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3130   Mat            a    = b->AIJ,B;
3131   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3132   PetscErrorCode ierr;
3133   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3134   PetscInt       *cols;
3135   PetscScalar    *vals;
3136 
3137   PetscFunctionBegin;
3138   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3139   ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr);
3140   for (i=0; i<m; i++) {
3141     nmax = PetscMax(nmax,aij->ilen[i]);
3142     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3143   }
3144   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3145   ierr = PetscFree(ilen);CHKERRQ(ierr);
3146   ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr);
3147   ii   = 0;
3148   for (i=0; i<m; i++) {
3149     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3150     for (j=0; j<dof; j++) {
3151       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3152       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3153       ii++;
3154     }
3155     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3156   }
3157   ierr = PetscFree(icols);CHKERRQ(ierr);
3158   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160 
3161   if (reuse == MAT_INPLACE_MATRIX) {
3162     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3163   } else {
3164     *newmat = B;
3165   }
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3170 
3171 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3172 {
3173   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3174   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3175   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3176   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3177   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3178   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3179   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3180   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3181   PetscInt       rstart,cstart,*garray,ii,k;
3182   PetscErrorCode ierr;
3183   PetscScalar    *vals,*ovals;
3184 
3185   PetscFunctionBegin;
3186   ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr);
3187   for (i=0; i<A->rmap->n/dof; i++) {
3188     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3189     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3190     for (j=0; j<dof; j++) {
3191       dnz[dof*i+j] = AIJ->ilen[i];
3192       onz[dof*i+j] = OAIJ->ilen[i];
3193     }
3194   }
3195   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3196   ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr);
3197   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3198 
3199   ierr   = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr);
3200   rstart = dof*maij->A->rmap->rstart;
3201   cstart = dof*maij->A->cmap->rstart;
3202   garray = mpiaij->garray;
3203 
3204   ii = rstart;
3205   for (i=0; i<A->rmap->n/dof; i++) {
3206     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3207     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3208     for (j=0; j<dof; j++) {
3209       for (k=0; k<ncols; k++) {
3210         icols[k] = cstart + dof*cols[k]+j;
3211       }
3212       for (k=0; k<oncols; k++) {
3213         oicols[k] = dof*garray[ocols[k]]+j;
3214       }
3215       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3216       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3217       ii++;
3218     }
3219     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3220     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3221   }
3222   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3223 
3224   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3225   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3226 
3227   if (reuse == MAT_INPLACE_MATRIX) {
3228     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3229     ((PetscObject)A)->refct = 1;
3230 
3231     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
3232 
3233     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3234   } else {
3235     *newmat = B;
3236   }
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3241 {
3242   PetscErrorCode ierr;
3243   Mat            A;
3244 
3245   PetscFunctionBegin;
3246   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
3247   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
3248   ierr = MatDestroy(&A);CHKERRQ(ierr);
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 /* ---------------------------------------------------------------------------------- */
3253 /*@
3254   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3255   operations for multicomponent problems.  It interpolates each component the same
3256   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3257   and MATMPIAIJ for distributed matrices.
3258 
3259   Collective
3260 
3261   Input Parameters:
3262 + A - the AIJ matrix describing the action on blocks
3263 - dof - the block size (number of components per node)
3264 
3265   Output Parameter:
3266 . maij - the new MAIJ matrix
3267 
3268   Operations provided:
3269 + MatMult
3270 . MatMultTranspose
3271 . MatMultAdd
3272 . MatMultTransposeAdd
3273 - MatView
3274 
3275   Level: advanced
3276 
3277 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3278 @*/
3279 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3280 {
3281   PetscErrorCode ierr;
3282   PetscMPIInt    size;
3283   PetscInt       n;
3284   Mat            B;
3285 
3286   PetscFunctionBegin;
3287   ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3288 
3289   if (dof == 1) *maij = A;
3290   else {
3291     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3292     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3293     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3294     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3295     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3296     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3297 
3298     B->assembled = PETSC_TRUE;
3299 
3300     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3301     if (size == 1) {
3302       Mat_SeqMAIJ *b;
3303 
3304       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3305 
3306       B->ops->setup   = NULL;
3307       B->ops->destroy = MatDestroy_SeqMAIJ;
3308       B->ops->view    = MatView_SeqMAIJ;
3309       b               = (Mat_SeqMAIJ*)B->data;
3310       b->dof          = dof;
3311       b->AIJ          = A;
3312 
3313       if (dof == 2) {
3314         B->ops->mult             = MatMult_SeqMAIJ_2;
3315         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3316         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3317         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3318       } else if (dof == 3) {
3319         B->ops->mult             = MatMult_SeqMAIJ_3;
3320         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3321         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3322         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3323       } else if (dof == 4) {
3324         B->ops->mult             = MatMult_SeqMAIJ_4;
3325         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3326         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3327         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3328       } else if (dof == 5) {
3329         B->ops->mult             = MatMult_SeqMAIJ_5;
3330         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3331         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3332         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3333       } else if (dof == 6) {
3334         B->ops->mult             = MatMult_SeqMAIJ_6;
3335         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3336         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3337         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3338       } else if (dof == 7) {
3339         B->ops->mult             = MatMult_SeqMAIJ_7;
3340         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3341         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3342         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3343       } else if (dof == 8) {
3344         B->ops->mult             = MatMult_SeqMAIJ_8;
3345         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3346         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3347         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3348       } else if (dof == 9) {
3349         B->ops->mult             = MatMult_SeqMAIJ_9;
3350         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3351         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3352         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3353       } else if (dof == 10) {
3354         B->ops->mult             = MatMult_SeqMAIJ_10;
3355         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3356         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3357         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3358       } else if (dof == 11) {
3359         B->ops->mult             = MatMult_SeqMAIJ_11;
3360         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3361         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3362         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3363       } else if (dof == 16) {
3364         B->ops->mult             = MatMult_SeqMAIJ_16;
3365         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3366         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3367         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3368       } else if (dof == 18) {
3369         B->ops->mult             = MatMult_SeqMAIJ_18;
3370         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3371         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3372         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3373       } else {
3374         B->ops->mult             = MatMult_SeqMAIJ_N;
3375         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3376         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3377         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3378       }
3379       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3380       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3381       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr);
3382     } else {
3383       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3384       Mat_MPIMAIJ *b;
3385       IS          from,to;
3386       Vec         gvec;
3387 
3388       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3389 
3390       B->ops->setup   = NULL;
3391       B->ops->destroy = MatDestroy_MPIMAIJ;
3392       B->ops->view    = MatView_MPIMAIJ;
3393 
3394       b      = (Mat_MPIMAIJ*)B->data;
3395       b->dof = dof;
3396       b->A   = A;
3397 
3398       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3399       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3400 
3401       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3402       ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr);
3403       ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr);
3404       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3405       ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr);
3406 
3407       /* create two temporary Index sets for build scatter gather */
3408       ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3409       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3410 
3411       /* create temporary global vector to generate scatter context */
3412       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
3413 
3414       /* generate the scatter context */
3415       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3416 
3417       ierr = ISDestroy(&from);CHKERRQ(ierr);
3418       ierr = ISDestroy(&to);CHKERRQ(ierr);
3419       ierr = VecDestroy(&gvec);CHKERRQ(ierr);
3420 
3421       B->ops->mult             = MatMult_MPIMAIJ_dof;
3422       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3423       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3424       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3425 
3426       ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3427       ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr);
3428     }
3429     B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3430     ierr  = MatSetUp(B);CHKERRQ(ierr);
3431     *maij = B;
3432     ierr  = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr);
3433   }
3434   PetscFunctionReturn(0);
3435 }
3436