xref: /petsc/src/mat/impls/maij/maij.c (revision ef46b1a67e276116c83b5d4ce8efc2932ea4fc0a)
1 
2 /*
3     Defines the basic matrix operations for the MAIJ  matrix storage format.
4   This format is used for restriction and interpolation operations for
5   multicomponent problems. It interpolates each component the same way
6   independently.
7 
8      We provide:
9          MatMult()
10          MatMultTranspose()
11          MatMultTransposeAdd()
12          MatMultAdd()
13           and
14          MatCreateMAIJ(Mat,dof,Mat*)
15 
16      This single directory handles both the sequential and parallel codes
17 */
18 
19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20 #include <../src/mat/utils/freespace.h>
21 
22 /*@
23    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
24 
25    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
26 
27    Input Parameter:
28 .  A - the MAIJ matrix
29 
30    Output Parameter:
31 .  B - the AIJ matrix
32 
33    Level: advanced
34 
35    Notes:
36     The reference count on the AIJ matrix is not increased so you should not destroy it.
37 
38 .seealso: MatCreateMAIJ()
39 @*/
40 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
41 {
42   PetscBool      ismpimaij,isseqmaij;
43 
44   PetscFunctionBegin;
45   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij));
46   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij));
47   if (ismpimaij) {
48     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
49 
50     *B = b->A;
51   } else if (isseqmaij) {
52     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
53 
54     *B = b->AIJ;
55   } else {
56     *B = A;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 /*@
62    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
63 
64    Logically Collective
65 
66    Input Parameters:
67 +  A - the MAIJ matrix
68 -  dof - the block size for the new matrix
69 
70    Output Parameter:
71 .  B - the new MAIJ matrix
72 
73    Level: advanced
74 
75 .seealso: MatCreateMAIJ()
76 @*/
77 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
78 {
79   Mat            Aij = NULL;
80 
81   PetscFunctionBegin;
82   PetscValidLogicalCollectiveInt(A,dof,2);
83   PetscCall(MatMAIJGetAIJ(A,&Aij));
84   PetscCall(MatCreateMAIJ(Aij,dof,B));
85   PetscFunctionReturn(0);
86 }
87 
88 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
89 {
90   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
91 
92   PetscFunctionBegin;
93   PetscCall(MatDestroy(&b->AIJ));
94   PetscCall(PetscFree(A->data));
95   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL));
97   PetscFunctionReturn(0);
98 }
99 
100 PetscErrorCode MatSetUp_MAIJ(Mat A)
101 {
102   PetscFunctionBegin;
103   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
104 }
105 
106 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
107 {
108   Mat            B;
109 
110   PetscFunctionBegin;
111   PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
112   PetscCall(MatView(B,viewer));
113   PetscCall(MatDestroy(&B));
114   PetscFunctionReturn(0);
115 }
116 
117 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
118 {
119   Mat            B;
120 
121   PetscFunctionBegin;
122   PetscCall(MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B));
123   PetscCall(MatView(B,viewer));
124   PetscCall(MatDestroy(&B));
125   PetscFunctionReturn(0);
126 }
127 
128 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
129 {
130   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
131 
132   PetscFunctionBegin;
133   PetscCall(MatDestroy(&b->AIJ));
134   PetscCall(MatDestroy(&b->OAIJ));
135   PetscCall(MatDestroy(&b->A));
136   PetscCall(VecScatterDestroy(&b->ctx));
137   PetscCall(VecDestroy(&b->w));
138   PetscCall(PetscFree(A->data));
139   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL));
140   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL));
141   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
142   PetscFunctionReturn(0);
143 }
144 
145 /*MC
146   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
147   multicomponent problems, interpolating or restricting each component the same way independently.
148   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
149 
150   Operations provided:
151 .vb
152     MatMult
153     MatMultTranspose
154     MatMultAdd
155     MatMultTransposeAdd
156 .ve
157 
158   Level: advanced
159 
160 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
161 M*/
162 
163 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
164 {
165   Mat_MPIMAIJ    *b;
166   PetscMPIInt    size;
167 
168   PetscFunctionBegin;
169   PetscCall(PetscNewLog(A,&b));
170   A->data  = (void*)b;
171 
172   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
173 
174   A->ops->setup = MatSetUp_MAIJ;
175 
176   b->AIJ  = NULL;
177   b->dof  = 0;
178   b->OAIJ = NULL;
179   b->ctx  = NULL;
180   b->w    = NULL;
181   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
182   if (size == 1) {
183     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ));
184   } else {
185     PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ));
186   }
187   A->preallocated  = PETSC_TRUE;
188   A->assembled     = PETSC_TRUE;
189   PetscFunctionReturn(0);
190 }
191 
192 /* --------------------------------------------------------------------------------------*/
193 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
194 {
195   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
196   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
197   const PetscScalar *x,*v;
198   PetscScalar       *y, sum1, sum2;
199   PetscInt          nonzerorow=0,n,i,jrow,j;
200   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
201 
202   PetscFunctionBegin;
203   PetscCall(VecGetArrayRead(xx,&x));
204   PetscCall(VecGetArray(yy,&y));
205   idx  = a->j;
206   v    = a->a;
207   ii   = a->i;
208 
209   for (i=0; i<m; i++) {
210     jrow  = ii[i];
211     n     = ii[i+1] - jrow;
212     sum1  = 0.0;
213     sum2  = 0.0;
214 
215     nonzerorow += (n>0);
216     for (j=0; j<n; j++) {
217       sum1 += v[jrow]*x[2*idx[jrow]];
218       sum2 += v[jrow]*x[2*idx[jrow]+1];
219       jrow++;
220     }
221     y[2*i]   = sum1;
222     y[2*i+1] = sum2;
223   }
224 
225   PetscCall(PetscLogFlops(4.0*a->nz - 2.0*nonzerorow));
226   PetscCall(VecRestoreArrayRead(xx,&x));
227   PetscCall(VecRestoreArray(yy,&y));
228   PetscFunctionReturn(0);
229 }
230 
231 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
232 {
233   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
234   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
235   const PetscScalar *x,*v;
236   PetscScalar       *y,alpha1,alpha2;
237   PetscInt          n,i;
238   const PetscInt    m = b->AIJ->rmap->n,*idx;
239 
240   PetscFunctionBegin;
241   PetscCall(VecSet(yy,0.0));
242   PetscCall(VecGetArrayRead(xx,&x));
243   PetscCall(VecGetArray(yy,&y));
244 
245   for (i=0; i<m; i++) {
246     idx    = a->j + a->i[i];
247     v      = a->a + a->i[i];
248     n      = a->i[i+1] - a->i[i];
249     alpha1 = x[2*i];
250     alpha2 = x[2*i+1];
251     while (n-->0) {
252       y[2*(*idx)]   += alpha1*(*v);
253       y[2*(*idx)+1] += alpha2*(*v);
254       idx++; v++;
255     }
256   }
257   PetscCall(PetscLogFlops(4.0*a->nz));
258   PetscCall(VecRestoreArrayRead(xx,&x));
259   PetscCall(VecRestoreArray(yy,&y));
260   PetscFunctionReturn(0);
261 }
262 
263 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
264 {
265   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
266   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
267   const PetscScalar *x,*v;
268   PetscScalar       *y,sum1, sum2;
269   PetscInt          n,i,jrow,j;
270   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
271 
272   PetscFunctionBegin;
273   if (yy != zz) PetscCall(VecCopy(yy,zz));
274   PetscCall(VecGetArrayRead(xx,&x));
275   PetscCall(VecGetArray(zz,&y));
276   idx  = a->j;
277   v    = a->a;
278   ii   = a->i;
279 
280   for (i=0; i<m; i++) {
281     jrow = ii[i];
282     n    = ii[i+1] - jrow;
283     sum1 = 0.0;
284     sum2 = 0.0;
285     for (j=0; j<n; j++) {
286       sum1 += v[jrow]*x[2*idx[jrow]];
287       sum2 += v[jrow]*x[2*idx[jrow]+1];
288       jrow++;
289     }
290     y[2*i]   += sum1;
291     y[2*i+1] += sum2;
292   }
293 
294   PetscCall(PetscLogFlops(4.0*a->nz));
295   PetscCall(VecRestoreArrayRead(xx,&x));
296   PetscCall(VecRestoreArray(zz,&y));
297   PetscFunctionReturn(0);
298 }
299 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
300 {
301   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
302   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
303   const PetscScalar *x,*v;
304   PetscScalar       *y,alpha1,alpha2;
305   PetscInt          n,i;
306   const PetscInt    m = b->AIJ->rmap->n,*idx;
307 
308   PetscFunctionBegin;
309   if (yy != zz) PetscCall(VecCopy(yy,zz));
310   PetscCall(VecGetArrayRead(xx,&x));
311   PetscCall(VecGetArray(zz,&y));
312 
313   for (i=0; i<m; i++) {
314     idx    = a->j + a->i[i];
315     v      = a->a + a->i[i];
316     n      = a->i[i+1] - a->i[i];
317     alpha1 = x[2*i];
318     alpha2 = x[2*i+1];
319     while (n-->0) {
320       y[2*(*idx)]   += alpha1*(*v);
321       y[2*(*idx)+1] += alpha2*(*v);
322       idx++; v++;
323     }
324   }
325   PetscCall(PetscLogFlops(4.0*a->nz));
326   PetscCall(VecRestoreArrayRead(xx,&x));
327   PetscCall(VecRestoreArray(zz,&y));
328   PetscFunctionReturn(0);
329 }
330 /* --------------------------------------------------------------------------------------*/
331 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
332 {
333   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
334   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
335   const PetscScalar *x,*v;
336   PetscScalar       *y,sum1, sum2, sum3;
337   PetscInt          nonzerorow=0,n,i,jrow,j;
338   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
339 
340   PetscFunctionBegin;
341   PetscCall(VecGetArrayRead(xx,&x));
342   PetscCall(VecGetArray(yy,&y));
343   idx  = a->j;
344   v    = a->a;
345   ii   = a->i;
346 
347   for (i=0; i<m; i++) {
348     jrow  = ii[i];
349     n     = ii[i+1] - jrow;
350     sum1  = 0.0;
351     sum2  = 0.0;
352     sum3  = 0.0;
353 
354     nonzerorow += (n>0);
355     for (j=0; j<n; j++) {
356       sum1 += v[jrow]*x[3*idx[jrow]];
357       sum2 += v[jrow]*x[3*idx[jrow]+1];
358       sum3 += v[jrow]*x[3*idx[jrow]+2];
359       jrow++;
360      }
361     y[3*i]   = sum1;
362     y[3*i+1] = sum2;
363     y[3*i+2] = sum3;
364   }
365 
366   PetscCall(PetscLogFlops(6.0*a->nz - 3.0*nonzerorow));
367   PetscCall(VecRestoreArrayRead(xx,&x));
368   PetscCall(VecRestoreArray(yy,&y));
369   PetscFunctionReturn(0);
370 }
371 
372 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
373 {
374   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
375   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
376   const PetscScalar *x,*v;
377   PetscScalar       *y,alpha1,alpha2,alpha3;
378   PetscInt          n,i;
379   const PetscInt    m = b->AIJ->rmap->n,*idx;
380 
381   PetscFunctionBegin;
382   PetscCall(VecSet(yy,0.0));
383   PetscCall(VecGetArrayRead(xx,&x));
384   PetscCall(VecGetArray(yy,&y));
385 
386   for (i=0; i<m; i++) {
387     idx    = a->j + a->i[i];
388     v      = a->a + a->i[i];
389     n      = a->i[i+1] - a->i[i];
390     alpha1 = x[3*i];
391     alpha2 = x[3*i+1];
392     alpha3 = x[3*i+2];
393     while (n-->0) {
394       y[3*(*idx)]   += alpha1*(*v);
395       y[3*(*idx)+1] += alpha2*(*v);
396       y[3*(*idx)+2] += alpha3*(*v);
397       idx++; v++;
398     }
399   }
400   PetscCall(PetscLogFlops(6.0*a->nz));
401   PetscCall(VecRestoreArrayRead(xx,&x));
402   PetscCall(VecRestoreArray(yy,&y));
403   PetscFunctionReturn(0);
404 }
405 
406 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
409   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
410   const PetscScalar *x,*v;
411   PetscScalar       *y,sum1, sum2, sum3;
412   PetscInt          n,i,jrow,j;
413   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
414 
415   PetscFunctionBegin;
416   if (yy != zz) PetscCall(VecCopy(yy,zz));
417   PetscCall(VecGetArrayRead(xx,&x));
418   PetscCall(VecGetArray(zz,&y));
419   idx  = a->j;
420   v    = a->a;
421   ii   = a->i;
422 
423   for (i=0; i<m; i++) {
424     jrow = ii[i];
425     n    = ii[i+1] - jrow;
426     sum1 = 0.0;
427     sum2 = 0.0;
428     sum3 = 0.0;
429     for (j=0; j<n; j++) {
430       sum1 += v[jrow]*x[3*idx[jrow]];
431       sum2 += v[jrow]*x[3*idx[jrow]+1];
432       sum3 += v[jrow]*x[3*idx[jrow]+2];
433       jrow++;
434     }
435     y[3*i]   += sum1;
436     y[3*i+1] += sum2;
437     y[3*i+2] += sum3;
438   }
439 
440   PetscCall(PetscLogFlops(6.0*a->nz));
441   PetscCall(VecRestoreArrayRead(xx,&x));
442   PetscCall(VecRestoreArray(zz,&y));
443   PetscFunctionReturn(0);
444 }
445 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
446 {
447   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
448   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
449   const PetscScalar *x,*v;
450   PetscScalar       *y,alpha1,alpha2,alpha3;
451   PetscInt          n,i;
452   const PetscInt    m = b->AIJ->rmap->n,*idx;
453 
454   PetscFunctionBegin;
455   if (yy != zz) PetscCall(VecCopy(yy,zz));
456   PetscCall(VecGetArrayRead(xx,&x));
457   PetscCall(VecGetArray(zz,&y));
458   for (i=0; i<m; i++) {
459     idx    = a->j + a->i[i];
460     v      = a->a + a->i[i];
461     n      = a->i[i+1] - a->i[i];
462     alpha1 = x[3*i];
463     alpha2 = x[3*i+1];
464     alpha3 = x[3*i+2];
465     while (n-->0) {
466       y[3*(*idx)]   += alpha1*(*v);
467       y[3*(*idx)+1] += alpha2*(*v);
468       y[3*(*idx)+2] += alpha3*(*v);
469       idx++; v++;
470     }
471   }
472   PetscCall(PetscLogFlops(6.0*a->nz));
473   PetscCall(VecRestoreArrayRead(xx,&x));
474   PetscCall(VecRestoreArray(zz,&y));
475   PetscFunctionReturn(0);
476 }
477 
478 /* ------------------------------------------------------------------------------*/
479 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
480 {
481   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
482   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
483   const PetscScalar *x,*v;
484   PetscScalar       *y,sum1, sum2, sum3, sum4;
485   PetscInt          nonzerorow=0,n,i,jrow,j;
486   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
487 
488   PetscFunctionBegin;
489   PetscCall(VecGetArrayRead(xx,&x));
490   PetscCall(VecGetArray(yy,&y));
491   idx  = a->j;
492   v    = a->a;
493   ii   = a->i;
494 
495   for (i=0; i<m; i++) {
496     jrow        = ii[i];
497     n           = ii[i+1] - jrow;
498     sum1        = 0.0;
499     sum2        = 0.0;
500     sum3        = 0.0;
501     sum4        = 0.0;
502     nonzerorow += (n>0);
503     for (j=0; j<n; j++) {
504       sum1 += v[jrow]*x[4*idx[jrow]];
505       sum2 += v[jrow]*x[4*idx[jrow]+1];
506       sum3 += v[jrow]*x[4*idx[jrow]+2];
507       sum4 += v[jrow]*x[4*idx[jrow]+3];
508       jrow++;
509     }
510     y[4*i]   = sum1;
511     y[4*i+1] = sum2;
512     y[4*i+2] = sum3;
513     y[4*i+3] = sum4;
514   }
515 
516   PetscCall(PetscLogFlops(8.0*a->nz - 4.0*nonzerorow));
517   PetscCall(VecRestoreArrayRead(xx,&x));
518   PetscCall(VecRestoreArray(yy,&y));
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
523 {
524   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
525   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
526   const PetscScalar *x,*v;
527   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
528   PetscInt          n,i;
529   const PetscInt    m = b->AIJ->rmap->n,*idx;
530 
531   PetscFunctionBegin;
532   PetscCall(VecSet(yy,0.0));
533   PetscCall(VecGetArrayRead(xx,&x));
534   PetscCall(VecGetArray(yy,&y));
535   for (i=0; i<m; i++) {
536     idx    = a->j + a->i[i];
537     v      = a->a + a->i[i];
538     n      = a->i[i+1] - a->i[i];
539     alpha1 = x[4*i];
540     alpha2 = x[4*i+1];
541     alpha3 = x[4*i+2];
542     alpha4 = x[4*i+3];
543     while (n-->0) {
544       y[4*(*idx)]   += alpha1*(*v);
545       y[4*(*idx)+1] += alpha2*(*v);
546       y[4*(*idx)+2] += alpha3*(*v);
547       y[4*(*idx)+3] += alpha4*(*v);
548       idx++; v++;
549     }
550   }
551   PetscCall(PetscLogFlops(8.0*a->nz));
552   PetscCall(VecRestoreArrayRead(xx,&x));
553   PetscCall(VecRestoreArray(yy,&y));
554   PetscFunctionReturn(0);
555 }
556 
557 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
558 {
559   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
560   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
561   const PetscScalar *x,*v;
562   PetscScalar       *y,sum1, sum2, sum3, sum4;
563   PetscInt          n,i,jrow,j;
564   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
565 
566   PetscFunctionBegin;
567   if (yy != zz) PetscCall(VecCopy(yy,zz));
568   PetscCall(VecGetArrayRead(xx,&x));
569   PetscCall(VecGetArray(zz,&y));
570   idx  = a->j;
571   v    = a->a;
572   ii   = a->i;
573 
574   for (i=0; i<m; i++) {
575     jrow = ii[i];
576     n    = ii[i+1] - jrow;
577     sum1 = 0.0;
578     sum2 = 0.0;
579     sum3 = 0.0;
580     sum4 = 0.0;
581     for (j=0; j<n; j++) {
582       sum1 += v[jrow]*x[4*idx[jrow]];
583       sum2 += v[jrow]*x[4*idx[jrow]+1];
584       sum3 += v[jrow]*x[4*idx[jrow]+2];
585       sum4 += v[jrow]*x[4*idx[jrow]+3];
586       jrow++;
587     }
588     y[4*i]   += sum1;
589     y[4*i+1] += sum2;
590     y[4*i+2] += sum3;
591     y[4*i+3] += sum4;
592   }
593 
594   PetscCall(PetscLogFlops(8.0*a->nz));
595   PetscCall(VecRestoreArrayRead(xx,&x));
596   PetscCall(VecRestoreArray(zz,&y));
597   PetscFunctionReturn(0);
598 }
599 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
602   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
603   const PetscScalar *x,*v;
604   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
605   PetscInt          n,i;
606   const PetscInt    m = b->AIJ->rmap->n,*idx;
607 
608   PetscFunctionBegin;
609   if (yy != zz) PetscCall(VecCopy(yy,zz));
610   PetscCall(VecGetArrayRead(xx,&x));
611   PetscCall(VecGetArray(zz,&y));
612 
613   for (i=0; i<m; i++) {
614     idx    = a->j + a->i[i];
615     v      = a->a + a->i[i];
616     n      = a->i[i+1] - a->i[i];
617     alpha1 = x[4*i];
618     alpha2 = x[4*i+1];
619     alpha3 = x[4*i+2];
620     alpha4 = x[4*i+3];
621     while (n-->0) {
622       y[4*(*idx)]   += alpha1*(*v);
623       y[4*(*idx)+1] += alpha2*(*v);
624       y[4*(*idx)+2] += alpha3*(*v);
625       y[4*(*idx)+3] += alpha4*(*v);
626       idx++; v++;
627     }
628   }
629   PetscCall(PetscLogFlops(8.0*a->nz));
630   PetscCall(VecRestoreArrayRead(xx,&x));
631   PetscCall(VecRestoreArray(zz,&y));
632   PetscFunctionReturn(0);
633 }
634 /* ------------------------------------------------------------------------------*/
635 
636 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
637 {
638   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
639   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
640   const PetscScalar *x,*v;
641   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
642   PetscInt          nonzerorow=0,n,i,jrow,j;
643   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
644 
645   PetscFunctionBegin;
646   PetscCall(VecGetArrayRead(xx,&x));
647   PetscCall(VecGetArray(yy,&y));
648   idx  = a->j;
649   v    = a->a;
650   ii   = a->i;
651 
652   for (i=0; i<m; i++) {
653     jrow  = ii[i];
654     n     = ii[i+1] - jrow;
655     sum1  = 0.0;
656     sum2  = 0.0;
657     sum3  = 0.0;
658     sum4  = 0.0;
659     sum5  = 0.0;
660 
661     nonzerorow += (n>0);
662     for (j=0; j<n; j++) {
663       sum1 += v[jrow]*x[5*idx[jrow]];
664       sum2 += v[jrow]*x[5*idx[jrow]+1];
665       sum3 += v[jrow]*x[5*idx[jrow]+2];
666       sum4 += v[jrow]*x[5*idx[jrow]+3];
667       sum5 += v[jrow]*x[5*idx[jrow]+4];
668       jrow++;
669     }
670     y[5*i]   = sum1;
671     y[5*i+1] = sum2;
672     y[5*i+2] = sum3;
673     y[5*i+3] = sum4;
674     y[5*i+4] = sum5;
675   }
676 
677   PetscCall(PetscLogFlops(10.0*a->nz - 5.0*nonzerorow));
678   PetscCall(VecRestoreArrayRead(xx,&x));
679   PetscCall(VecRestoreArray(yy,&y));
680   PetscFunctionReturn(0);
681 }
682 
683 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
684 {
685   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
686   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
687   const PetscScalar *x,*v;
688   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
689   PetscInt          n,i;
690   const PetscInt    m = b->AIJ->rmap->n,*idx;
691 
692   PetscFunctionBegin;
693   PetscCall(VecSet(yy,0.0));
694   PetscCall(VecGetArrayRead(xx,&x));
695   PetscCall(VecGetArray(yy,&y));
696 
697   for (i=0; i<m; i++) {
698     idx    = a->j + a->i[i];
699     v      = a->a + a->i[i];
700     n      = a->i[i+1] - a->i[i];
701     alpha1 = x[5*i];
702     alpha2 = x[5*i+1];
703     alpha3 = x[5*i+2];
704     alpha4 = x[5*i+3];
705     alpha5 = x[5*i+4];
706     while (n-->0) {
707       y[5*(*idx)]   += alpha1*(*v);
708       y[5*(*idx)+1] += alpha2*(*v);
709       y[5*(*idx)+2] += alpha3*(*v);
710       y[5*(*idx)+3] += alpha4*(*v);
711       y[5*(*idx)+4] += alpha5*(*v);
712       idx++; v++;
713     }
714   }
715   PetscCall(PetscLogFlops(10.0*a->nz));
716   PetscCall(VecRestoreArrayRead(xx,&x));
717   PetscCall(VecRestoreArray(yy,&y));
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
722 {
723   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
724   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
725   const PetscScalar *x,*v;
726   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
727   PetscInt          n,i,jrow,j;
728   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
729 
730   PetscFunctionBegin;
731   if (yy != zz) PetscCall(VecCopy(yy,zz));
732   PetscCall(VecGetArrayRead(xx,&x));
733   PetscCall(VecGetArray(zz,&y));
734   idx  = a->j;
735   v    = a->a;
736   ii   = a->i;
737 
738   for (i=0; i<m; i++) {
739     jrow = ii[i];
740     n    = ii[i+1] - jrow;
741     sum1 = 0.0;
742     sum2 = 0.0;
743     sum3 = 0.0;
744     sum4 = 0.0;
745     sum5 = 0.0;
746     for (j=0; j<n; j++) {
747       sum1 += v[jrow]*x[5*idx[jrow]];
748       sum2 += v[jrow]*x[5*idx[jrow]+1];
749       sum3 += v[jrow]*x[5*idx[jrow]+2];
750       sum4 += v[jrow]*x[5*idx[jrow]+3];
751       sum5 += v[jrow]*x[5*idx[jrow]+4];
752       jrow++;
753     }
754     y[5*i]   += sum1;
755     y[5*i+1] += sum2;
756     y[5*i+2] += sum3;
757     y[5*i+3] += sum4;
758     y[5*i+4] += sum5;
759   }
760 
761   PetscCall(PetscLogFlops(10.0*a->nz));
762   PetscCall(VecRestoreArrayRead(xx,&x));
763   PetscCall(VecRestoreArray(zz,&y));
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
768 {
769   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
770   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
771   const PetscScalar *x,*v;
772   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
773   PetscInt          n,i;
774   const PetscInt    m = b->AIJ->rmap->n,*idx;
775 
776   PetscFunctionBegin;
777   if (yy != zz) PetscCall(VecCopy(yy,zz));
778   PetscCall(VecGetArrayRead(xx,&x));
779   PetscCall(VecGetArray(zz,&y));
780 
781   for (i=0; i<m; i++) {
782     idx    = a->j + a->i[i];
783     v      = a->a + a->i[i];
784     n      = a->i[i+1] - a->i[i];
785     alpha1 = x[5*i];
786     alpha2 = x[5*i+1];
787     alpha3 = x[5*i+2];
788     alpha4 = x[5*i+3];
789     alpha5 = x[5*i+4];
790     while (n-->0) {
791       y[5*(*idx)]   += alpha1*(*v);
792       y[5*(*idx)+1] += alpha2*(*v);
793       y[5*(*idx)+2] += alpha3*(*v);
794       y[5*(*idx)+3] += alpha4*(*v);
795       y[5*(*idx)+4] += alpha5*(*v);
796       idx++; v++;
797     }
798   }
799   PetscCall(PetscLogFlops(10.0*a->nz));
800   PetscCall(VecRestoreArrayRead(xx,&x));
801   PetscCall(VecRestoreArray(zz,&y));
802   PetscFunctionReturn(0);
803 }
804 
805 /* ------------------------------------------------------------------------------*/
806 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
807 {
808   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
809   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
810   const PetscScalar *x,*v;
811   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
812   PetscInt          nonzerorow=0,n,i,jrow,j;
813   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
814 
815   PetscFunctionBegin;
816   PetscCall(VecGetArrayRead(xx,&x));
817   PetscCall(VecGetArray(yy,&y));
818   idx  = a->j;
819   v    = a->a;
820   ii   = a->i;
821 
822   for (i=0; i<m; i++) {
823     jrow  = ii[i];
824     n     = ii[i+1] - jrow;
825     sum1  = 0.0;
826     sum2  = 0.0;
827     sum3  = 0.0;
828     sum4  = 0.0;
829     sum5  = 0.0;
830     sum6  = 0.0;
831 
832     nonzerorow += (n>0);
833     for (j=0; j<n; j++) {
834       sum1 += v[jrow]*x[6*idx[jrow]];
835       sum2 += v[jrow]*x[6*idx[jrow]+1];
836       sum3 += v[jrow]*x[6*idx[jrow]+2];
837       sum4 += v[jrow]*x[6*idx[jrow]+3];
838       sum5 += v[jrow]*x[6*idx[jrow]+4];
839       sum6 += v[jrow]*x[6*idx[jrow]+5];
840       jrow++;
841     }
842     y[6*i]   = sum1;
843     y[6*i+1] = sum2;
844     y[6*i+2] = sum3;
845     y[6*i+3] = sum4;
846     y[6*i+4] = sum5;
847     y[6*i+5] = sum6;
848   }
849 
850   PetscCall(PetscLogFlops(12.0*a->nz - 6.0*nonzerorow));
851   PetscCall(VecRestoreArrayRead(xx,&x));
852   PetscCall(VecRestoreArray(yy,&y));
853   PetscFunctionReturn(0);
854 }
855 
856 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
857 {
858   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
859   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
860   const PetscScalar *x,*v;
861   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
862   PetscInt          n,i;
863   const PetscInt    m = b->AIJ->rmap->n,*idx;
864 
865   PetscFunctionBegin;
866   PetscCall(VecSet(yy,0.0));
867   PetscCall(VecGetArrayRead(xx,&x));
868   PetscCall(VecGetArray(yy,&y));
869 
870   for (i=0; i<m; i++) {
871     idx    = a->j + a->i[i];
872     v      = a->a + a->i[i];
873     n      = a->i[i+1] - a->i[i];
874     alpha1 = x[6*i];
875     alpha2 = x[6*i+1];
876     alpha3 = x[6*i+2];
877     alpha4 = x[6*i+3];
878     alpha5 = x[6*i+4];
879     alpha6 = x[6*i+5];
880     while (n-->0) {
881       y[6*(*idx)]   += alpha1*(*v);
882       y[6*(*idx)+1] += alpha2*(*v);
883       y[6*(*idx)+2] += alpha3*(*v);
884       y[6*(*idx)+3] += alpha4*(*v);
885       y[6*(*idx)+4] += alpha5*(*v);
886       y[6*(*idx)+5] += alpha6*(*v);
887       idx++; v++;
888     }
889   }
890   PetscCall(PetscLogFlops(12.0*a->nz));
891   PetscCall(VecRestoreArrayRead(xx,&x));
892   PetscCall(VecRestoreArray(yy,&y));
893   PetscFunctionReturn(0);
894 }
895 
896 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
897 {
898   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
899   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
900   const PetscScalar *x,*v;
901   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
902   PetscInt          n,i,jrow,j;
903   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
904 
905   PetscFunctionBegin;
906   if (yy != zz) PetscCall(VecCopy(yy,zz));
907   PetscCall(VecGetArrayRead(xx,&x));
908   PetscCall(VecGetArray(zz,&y));
909   idx  = a->j;
910   v    = a->a;
911   ii   = a->i;
912 
913   for (i=0; i<m; i++) {
914     jrow = ii[i];
915     n    = ii[i+1] - jrow;
916     sum1 = 0.0;
917     sum2 = 0.0;
918     sum3 = 0.0;
919     sum4 = 0.0;
920     sum5 = 0.0;
921     sum6 = 0.0;
922     for (j=0; j<n; j++) {
923       sum1 += v[jrow]*x[6*idx[jrow]];
924       sum2 += v[jrow]*x[6*idx[jrow]+1];
925       sum3 += v[jrow]*x[6*idx[jrow]+2];
926       sum4 += v[jrow]*x[6*idx[jrow]+3];
927       sum5 += v[jrow]*x[6*idx[jrow]+4];
928       sum6 += v[jrow]*x[6*idx[jrow]+5];
929       jrow++;
930     }
931     y[6*i]   += sum1;
932     y[6*i+1] += sum2;
933     y[6*i+2] += sum3;
934     y[6*i+3] += sum4;
935     y[6*i+4] += sum5;
936     y[6*i+5] += sum6;
937   }
938 
939   PetscCall(PetscLogFlops(12.0*a->nz));
940   PetscCall(VecRestoreArrayRead(xx,&x));
941   PetscCall(VecRestoreArray(zz,&y));
942   PetscFunctionReturn(0);
943 }
944 
945 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
946 {
947   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
948   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
949   const PetscScalar *x,*v;
950   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
951   PetscInt          n,i;
952   const PetscInt    m = b->AIJ->rmap->n,*idx;
953 
954   PetscFunctionBegin;
955   if (yy != zz) PetscCall(VecCopy(yy,zz));
956   PetscCall(VecGetArrayRead(xx,&x));
957   PetscCall(VecGetArray(zz,&y));
958 
959   for (i=0; i<m; i++) {
960     idx    = a->j + a->i[i];
961     v      = a->a + a->i[i];
962     n      = a->i[i+1] - a->i[i];
963     alpha1 = x[6*i];
964     alpha2 = x[6*i+1];
965     alpha3 = x[6*i+2];
966     alpha4 = x[6*i+3];
967     alpha5 = x[6*i+4];
968     alpha6 = x[6*i+5];
969     while (n-->0) {
970       y[6*(*idx)]   += alpha1*(*v);
971       y[6*(*idx)+1] += alpha2*(*v);
972       y[6*(*idx)+2] += alpha3*(*v);
973       y[6*(*idx)+3] += alpha4*(*v);
974       y[6*(*idx)+4] += alpha5*(*v);
975       y[6*(*idx)+5] += alpha6*(*v);
976       idx++; v++;
977     }
978   }
979   PetscCall(PetscLogFlops(12.0*a->nz));
980   PetscCall(VecRestoreArrayRead(xx,&x));
981   PetscCall(VecRestoreArray(zz,&y));
982   PetscFunctionReturn(0);
983 }
984 
985 /* ------------------------------------------------------------------------------*/
986 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
987 {
988   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
989   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
990   const PetscScalar *x,*v;
991   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
992   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
993   PetscInt          nonzerorow=0,n,i,jrow,j;
994 
995   PetscFunctionBegin;
996   PetscCall(VecGetArrayRead(xx,&x));
997   PetscCall(VecGetArray(yy,&y));
998   idx  = a->j;
999   v    = a->a;
1000   ii   = a->i;
1001 
1002   for (i=0; i<m; i++) {
1003     jrow  = ii[i];
1004     n     = ii[i+1] - jrow;
1005     sum1  = 0.0;
1006     sum2  = 0.0;
1007     sum3  = 0.0;
1008     sum4  = 0.0;
1009     sum5  = 0.0;
1010     sum6  = 0.0;
1011     sum7  = 0.0;
1012 
1013     nonzerorow += (n>0);
1014     for (j=0; j<n; j++) {
1015       sum1 += v[jrow]*x[7*idx[jrow]];
1016       sum2 += v[jrow]*x[7*idx[jrow]+1];
1017       sum3 += v[jrow]*x[7*idx[jrow]+2];
1018       sum4 += v[jrow]*x[7*idx[jrow]+3];
1019       sum5 += v[jrow]*x[7*idx[jrow]+4];
1020       sum6 += v[jrow]*x[7*idx[jrow]+5];
1021       sum7 += v[jrow]*x[7*idx[jrow]+6];
1022       jrow++;
1023     }
1024     y[7*i]   = sum1;
1025     y[7*i+1] = sum2;
1026     y[7*i+2] = sum3;
1027     y[7*i+3] = sum4;
1028     y[7*i+4] = sum5;
1029     y[7*i+5] = sum6;
1030     y[7*i+6] = sum7;
1031   }
1032 
1033   PetscCall(PetscLogFlops(14.0*a->nz - 7.0*nonzerorow));
1034   PetscCall(VecRestoreArrayRead(xx,&x));
1035   PetscCall(VecRestoreArray(yy,&y));
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1040 {
1041   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1042   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1043   const PetscScalar *x,*v;
1044   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1045   const PetscInt    m = b->AIJ->rmap->n,*idx;
1046   PetscInt          n,i;
1047 
1048   PetscFunctionBegin;
1049   PetscCall(VecSet(yy,0.0));
1050   PetscCall(VecGetArrayRead(xx,&x));
1051   PetscCall(VecGetArray(yy,&y));
1052 
1053   for (i=0; i<m; i++) {
1054     idx    = a->j + a->i[i];
1055     v      = a->a + a->i[i];
1056     n      = a->i[i+1] - a->i[i];
1057     alpha1 = x[7*i];
1058     alpha2 = x[7*i+1];
1059     alpha3 = x[7*i+2];
1060     alpha4 = x[7*i+3];
1061     alpha5 = x[7*i+4];
1062     alpha6 = x[7*i+5];
1063     alpha7 = x[7*i+6];
1064     while (n-->0) {
1065       y[7*(*idx)]   += alpha1*(*v);
1066       y[7*(*idx)+1] += alpha2*(*v);
1067       y[7*(*idx)+2] += alpha3*(*v);
1068       y[7*(*idx)+3] += alpha4*(*v);
1069       y[7*(*idx)+4] += alpha5*(*v);
1070       y[7*(*idx)+5] += alpha6*(*v);
1071       y[7*(*idx)+6] += alpha7*(*v);
1072       idx++; v++;
1073     }
1074   }
1075   PetscCall(PetscLogFlops(14.0*a->nz));
1076   PetscCall(VecRestoreArrayRead(xx,&x));
1077   PetscCall(VecRestoreArray(yy,&y));
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1082 {
1083   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1084   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1085   const PetscScalar *x,*v;
1086   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1087   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1088   PetscInt          n,i,jrow,j;
1089 
1090   PetscFunctionBegin;
1091   if (yy != zz) PetscCall(VecCopy(yy,zz));
1092   PetscCall(VecGetArrayRead(xx,&x));
1093   PetscCall(VecGetArray(zz,&y));
1094   idx  = a->j;
1095   v    = a->a;
1096   ii   = a->i;
1097 
1098   for (i=0; i<m; i++) {
1099     jrow = ii[i];
1100     n    = ii[i+1] - jrow;
1101     sum1 = 0.0;
1102     sum2 = 0.0;
1103     sum3 = 0.0;
1104     sum4 = 0.0;
1105     sum5 = 0.0;
1106     sum6 = 0.0;
1107     sum7 = 0.0;
1108     for (j=0; j<n; j++) {
1109       sum1 += v[jrow]*x[7*idx[jrow]];
1110       sum2 += v[jrow]*x[7*idx[jrow]+1];
1111       sum3 += v[jrow]*x[7*idx[jrow]+2];
1112       sum4 += v[jrow]*x[7*idx[jrow]+3];
1113       sum5 += v[jrow]*x[7*idx[jrow]+4];
1114       sum6 += v[jrow]*x[7*idx[jrow]+5];
1115       sum7 += v[jrow]*x[7*idx[jrow]+6];
1116       jrow++;
1117     }
1118     y[7*i]   += sum1;
1119     y[7*i+1] += sum2;
1120     y[7*i+2] += sum3;
1121     y[7*i+3] += sum4;
1122     y[7*i+4] += sum5;
1123     y[7*i+5] += sum6;
1124     y[7*i+6] += sum7;
1125   }
1126 
1127   PetscCall(PetscLogFlops(14.0*a->nz));
1128   PetscCall(VecRestoreArrayRead(xx,&x));
1129   PetscCall(VecRestoreArray(zz,&y));
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1134 {
1135   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1136   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1137   const PetscScalar *x,*v;
1138   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1139   const PetscInt    m = b->AIJ->rmap->n,*idx;
1140   PetscInt          n,i;
1141 
1142   PetscFunctionBegin;
1143   if (yy != zz) PetscCall(VecCopy(yy,zz));
1144   PetscCall(VecGetArrayRead(xx,&x));
1145   PetscCall(VecGetArray(zz,&y));
1146   for (i=0; i<m; i++) {
1147     idx    = a->j + a->i[i];
1148     v      = a->a + a->i[i];
1149     n      = a->i[i+1] - a->i[i];
1150     alpha1 = x[7*i];
1151     alpha2 = x[7*i+1];
1152     alpha3 = x[7*i+2];
1153     alpha4 = x[7*i+3];
1154     alpha5 = x[7*i+4];
1155     alpha6 = x[7*i+5];
1156     alpha7 = x[7*i+6];
1157     while (n-->0) {
1158       y[7*(*idx)]   += alpha1*(*v);
1159       y[7*(*idx)+1] += alpha2*(*v);
1160       y[7*(*idx)+2] += alpha3*(*v);
1161       y[7*(*idx)+3] += alpha4*(*v);
1162       y[7*(*idx)+4] += alpha5*(*v);
1163       y[7*(*idx)+5] += alpha6*(*v);
1164       y[7*(*idx)+6] += alpha7*(*v);
1165       idx++; v++;
1166     }
1167   }
1168   PetscCall(PetscLogFlops(14.0*a->nz));
1169   PetscCall(VecRestoreArrayRead(xx,&x));
1170   PetscCall(VecRestoreArray(zz,&y));
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1175 {
1176   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1177   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1178   const PetscScalar *x,*v;
1179   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1180   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1181   PetscInt          nonzerorow=0,n,i,jrow,j;
1182 
1183   PetscFunctionBegin;
1184   PetscCall(VecGetArrayRead(xx,&x));
1185   PetscCall(VecGetArray(yy,&y));
1186   idx  = a->j;
1187   v    = a->a;
1188   ii   = a->i;
1189 
1190   for (i=0; i<m; i++) {
1191     jrow  = ii[i];
1192     n     = ii[i+1] - jrow;
1193     sum1  = 0.0;
1194     sum2  = 0.0;
1195     sum3  = 0.0;
1196     sum4  = 0.0;
1197     sum5  = 0.0;
1198     sum6  = 0.0;
1199     sum7  = 0.0;
1200     sum8  = 0.0;
1201 
1202     nonzerorow += (n>0);
1203     for (j=0; j<n; j++) {
1204       sum1 += v[jrow]*x[8*idx[jrow]];
1205       sum2 += v[jrow]*x[8*idx[jrow]+1];
1206       sum3 += v[jrow]*x[8*idx[jrow]+2];
1207       sum4 += v[jrow]*x[8*idx[jrow]+3];
1208       sum5 += v[jrow]*x[8*idx[jrow]+4];
1209       sum6 += v[jrow]*x[8*idx[jrow]+5];
1210       sum7 += v[jrow]*x[8*idx[jrow]+6];
1211       sum8 += v[jrow]*x[8*idx[jrow]+7];
1212       jrow++;
1213     }
1214     y[8*i]   = sum1;
1215     y[8*i+1] = sum2;
1216     y[8*i+2] = sum3;
1217     y[8*i+3] = sum4;
1218     y[8*i+4] = sum5;
1219     y[8*i+5] = sum6;
1220     y[8*i+6] = sum7;
1221     y[8*i+7] = sum8;
1222   }
1223 
1224   PetscCall(PetscLogFlops(16.0*a->nz - 8.0*nonzerorow));
1225   PetscCall(VecRestoreArrayRead(xx,&x));
1226   PetscCall(VecRestoreArray(yy,&y));
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1231 {
1232   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1233   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1234   const PetscScalar *x,*v;
1235   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1236   const PetscInt    m = b->AIJ->rmap->n,*idx;
1237   PetscInt          n,i;
1238 
1239   PetscFunctionBegin;
1240   PetscCall(VecSet(yy,0.0));
1241   PetscCall(VecGetArrayRead(xx,&x));
1242   PetscCall(VecGetArray(yy,&y));
1243 
1244   for (i=0; i<m; i++) {
1245     idx    = a->j + a->i[i];
1246     v      = a->a + a->i[i];
1247     n      = a->i[i+1] - a->i[i];
1248     alpha1 = x[8*i];
1249     alpha2 = x[8*i+1];
1250     alpha3 = x[8*i+2];
1251     alpha4 = x[8*i+3];
1252     alpha5 = x[8*i+4];
1253     alpha6 = x[8*i+5];
1254     alpha7 = x[8*i+6];
1255     alpha8 = x[8*i+7];
1256     while (n-->0) {
1257       y[8*(*idx)]   += alpha1*(*v);
1258       y[8*(*idx)+1] += alpha2*(*v);
1259       y[8*(*idx)+2] += alpha3*(*v);
1260       y[8*(*idx)+3] += alpha4*(*v);
1261       y[8*(*idx)+4] += alpha5*(*v);
1262       y[8*(*idx)+5] += alpha6*(*v);
1263       y[8*(*idx)+6] += alpha7*(*v);
1264       y[8*(*idx)+7] += alpha8*(*v);
1265       idx++; v++;
1266     }
1267   }
1268   PetscCall(PetscLogFlops(16.0*a->nz));
1269   PetscCall(VecRestoreArrayRead(xx,&x));
1270   PetscCall(VecRestoreArray(yy,&y));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1275 {
1276   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1277   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1278   const PetscScalar *x,*v;
1279   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1280   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1281   PetscInt          n,i,jrow,j;
1282 
1283   PetscFunctionBegin;
1284   if (yy != zz) PetscCall(VecCopy(yy,zz));
1285   PetscCall(VecGetArrayRead(xx,&x));
1286   PetscCall(VecGetArray(zz,&y));
1287   idx  = a->j;
1288   v    = a->a;
1289   ii   = a->i;
1290 
1291   for (i=0; i<m; i++) {
1292     jrow = ii[i];
1293     n    = ii[i+1] - jrow;
1294     sum1 = 0.0;
1295     sum2 = 0.0;
1296     sum3 = 0.0;
1297     sum4 = 0.0;
1298     sum5 = 0.0;
1299     sum6 = 0.0;
1300     sum7 = 0.0;
1301     sum8 = 0.0;
1302     for (j=0; j<n; j++) {
1303       sum1 += v[jrow]*x[8*idx[jrow]];
1304       sum2 += v[jrow]*x[8*idx[jrow]+1];
1305       sum3 += v[jrow]*x[8*idx[jrow]+2];
1306       sum4 += v[jrow]*x[8*idx[jrow]+3];
1307       sum5 += v[jrow]*x[8*idx[jrow]+4];
1308       sum6 += v[jrow]*x[8*idx[jrow]+5];
1309       sum7 += v[jrow]*x[8*idx[jrow]+6];
1310       sum8 += v[jrow]*x[8*idx[jrow]+7];
1311       jrow++;
1312     }
1313     y[8*i]   += sum1;
1314     y[8*i+1] += sum2;
1315     y[8*i+2] += sum3;
1316     y[8*i+3] += sum4;
1317     y[8*i+4] += sum5;
1318     y[8*i+5] += sum6;
1319     y[8*i+6] += sum7;
1320     y[8*i+7] += sum8;
1321   }
1322 
1323   PetscCall(PetscLogFlops(16.0*a->nz));
1324   PetscCall(VecRestoreArrayRead(xx,&x));
1325   PetscCall(VecRestoreArray(zz,&y));
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1330 {
1331   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1332   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1333   const PetscScalar *x,*v;
1334   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1335   const PetscInt    m = b->AIJ->rmap->n,*idx;
1336   PetscInt          n,i;
1337 
1338   PetscFunctionBegin;
1339   if (yy != zz) PetscCall(VecCopy(yy,zz));
1340   PetscCall(VecGetArrayRead(xx,&x));
1341   PetscCall(VecGetArray(zz,&y));
1342   for (i=0; i<m; i++) {
1343     idx    = a->j + a->i[i];
1344     v      = a->a + a->i[i];
1345     n      = a->i[i+1] - a->i[i];
1346     alpha1 = x[8*i];
1347     alpha2 = x[8*i+1];
1348     alpha3 = x[8*i+2];
1349     alpha4 = x[8*i+3];
1350     alpha5 = x[8*i+4];
1351     alpha6 = x[8*i+5];
1352     alpha7 = x[8*i+6];
1353     alpha8 = x[8*i+7];
1354     while (n-->0) {
1355       y[8*(*idx)]   += alpha1*(*v);
1356       y[8*(*idx)+1] += alpha2*(*v);
1357       y[8*(*idx)+2] += alpha3*(*v);
1358       y[8*(*idx)+3] += alpha4*(*v);
1359       y[8*(*idx)+4] += alpha5*(*v);
1360       y[8*(*idx)+5] += alpha6*(*v);
1361       y[8*(*idx)+6] += alpha7*(*v);
1362       y[8*(*idx)+7] += alpha8*(*v);
1363       idx++; v++;
1364     }
1365   }
1366   PetscCall(PetscLogFlops(16.0*a->nz));
1367   PetscCall(VecRestoreArrayRead(xx,&x));
1368   PetscCall(VecRestoreArray(zz,&y));
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 /* ------------------------------------------------------------------------------*/
1373 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1374 {
1375   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1376   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1377   const PetscScalar *x,*v;
1378   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1379   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1380   PetscInt          nonzerorow=0,n,i,jrow,j;
1381 
1382   PetscFunctionBegin;
1383   PetscCall(VecGetArrayRead(xx,&x));
1384   PetscCall(VecGetArray(yy,&y));
1385   idx  = a->j;
1386   v    = a->a;
1387   ii   = a->i;
1388 
1389   for (i=0; i<m; i++) {
1390     jrow  = ii[i];
1391     n     = ii[i+1] - jrow;
1392     sum1  = 0.0;
1393     sum2  = 0.0;
1394     sum3  = 0.0;
1395     sum4  = 0.0;
1396     sum5  = 0.0;
1397     sum6  = 0.0;
1398     sum7  = 0.0;
1399     sum8  = 0.0;
1400     sum9  = 0.0;
1401 
1402     nonzerorow += (n>0);
1403     for (j=0; j<n; j++) {
1404       sum1 += v[jrow]*x[9*idx[jrow]];
1405       sum2 += v[jrow]*x[9*idx[jrow]+1];
1406       sum3 += v[jrow]*x[9*idx[jrow]+2];
1407       sum4 += v[jrow]*x[9*idx[jrow]+3];
1408       sum5 += v[jrow]*x[9*idx[jrow]+4];
1409       sum6 += v[jrow]*x[9*idx[jrow]+5];
1410       sum7 += v[jrow]*x[9*idx[jrow]+6];
1411       sum8 += v[jrow]*x[9*idx[jrow]+7];
1412       sum9 += v[jrow]*x[9*idx[jrow]+8];
1413       jrow++;
1414     }
1415     y[9*i]   = sum1;
1416     y[9*i+1] = sum2;
1417     y[9*i+2] = sum3;
1418     y[9*i+3] = sum4;
1419     y[9*i+4] = sum5;
1420     y[9*i+5] = sum6;
1421     y[9*i+6] = sum7;
1422     y[9*i+7] = sum8;
1423     y[9*i+8] = sum9;
1424   }
1425 
1426   PetscCall(PetscLogFlops(18.0*a->nz - 9*nonzerorow));
1427   PetscCall(VecRestoreArrayRead(xx,&x));
1428   PetscCall(VecRestoreArray(yy,&y));
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 /* ------------------------------------------------------------------------------*/
1433 
1434 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1435 {
1436   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1437   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1438   const PetscScalar *x,*v;
1439   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1440   const PetscInt    m = b->AIJ->rmap->n,*idx;
1441   PetscInt          n,i;
1442 
1443   PetscFunctionBegin;
1444   PetscCall(VecSet(yy,0.0));
1445   PetscCall(VecGetArrayRead(xx,&x));
1446   PetscCall(VecGetArray(yy,&y));
1447 
1448   for (i=0; i<m; i++) {
1449     idx    = a->j + a->i[i];
1450     v      = a->a + a->i[i];
1451     n      = a->i[i+1] - a->i[i];
1452     alpha1 = x[9*i];
1453     alpha2 = x[9*i+1];
1454     alpha3 = x[9*i+2];
1455     alpha4 = x[9*i+3];
1456     alpha5 = x[9*i+4];
1457     alpha6 = x[9*i+5];
1458     alpha7 = x[9*i+6];
1459     alpha8 = x[9*i+7];
1460     alpha9 = x[9*i+8];
1461     while (n-->0) {
1462       y[9*(*idx)]   += alpha1*(*v);
1463       y[9*(*idx)+1] += alpha2*(*v);
1464       y[9*(*idx)+2] += alpha3*(*v);
1465       y[9*(*idx)+3] += alpha4*(*v);
1466       y[9*(*idx)+4] += alpha5*(*v);
1467       y[9*(*idx)+5] += alpha6*(*v);
1468       y[9*(*idx)+6] += alpha7*(*v);
1469       y[9*(*idx)+7] += alpha8*(*v);
1470       y[9*(*idx)+8] += alpha9*(*v);
1471       idx++; v++;
1472     }
1473   }
1474   PetscCall(PetscLogFlops(18.0*a->nz));
1475   PetscCall(VecRestoreArrayRead(xx,&x));
1476   PetscCall(VecRestoreArray(yy,&y));
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1481 {
1482   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1483   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1484   const PetscScalar *x,*v;
1485   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1486   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1487   PetscInt          n,i,jrow,j;
1488 
1489   PetscFunctionBegin;
1490   if (yy != zz) PetscCall(VecCopy(yy,zz));
1491   PetscCall(VecGetArrayRead(xx,&x));
1492   PetscCall(VecGetArray(zz,&y));
1493   idx  = a->j;
1494   v    = a->a;
1495   ii   = a->i;
1496 
1497   for (i=0; i<m; i++) {
1498     jrow = ii[i];
1499     n    = ii[i+1] - jrow;
1500     sum1 = 0.0;
1501     sum2 = 0.0;
1502     sum3 = 0.0;
1503     sum4 = 0.0;
1504     sum5 = 0.0;
1505     sum6 = 0.0;
1506     sum7 = 0.0;
1507     sum8 = 0.0;
1508     sum9 = 0.0;
1509     for (j=0; j<n; j++) {
1510       sum1 += v[jrow]*x[9*idx[jrow]];
1511       sum2 += v[jrow]*x[9*idx[jrow]+1];
1512       sum3 += v[jrow]*x[9*idx[jrow]+2];
1513       sum4 += v[jrow]*x[9*idx[jrow]+3];
1514       sum5 += v[jrow]*x[9*idx[jrow]+4];
1515       sum6 += v[jrow]*x[9*idx[jrow]+5];
1516       sum7 += v[jrow]*x[9*idx[jrow]+6];
1517       sum8 += v[jrow]*x[9*idx[jrow]+7];
1518       sum9 += v[jrow]*x[9*idx[jrow]+8];
1519       jrow++;
1520     }
1521     y[9*i]   += sum1;
1522     y[9*i+1] += sum2;
1523     y[9*i+2] += sum3;
1524     y[9*i+3] += sum4;
1525     y[9*i+4] += sum5;
1526     y[9*i+5] += sum6;
1527     y[9*i+6] += sum7;
1528     y[9*i+7] += sum8;
1529     y[9*i+8] += sum9;
1530   }
1531 
1532   PetscCall(PetscLogFlops(18.0*a->nz));
1533   PetscCall(VecRestoreArrayRead(xx,&x));
1534   PetscCall(VecRestoreArray(zz,&y));
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1539 {
1540   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1541   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1542   const PetscScalar *x,*v;
1543   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1544   const PetscInt    m = b->AIJ->rmap->n,*idx;
1545   PetscInt          n,i;
1546 
1547   PetscFunctionBegin;
1548   if (yy != zz) PetscCall(VecCopy(yy,zz));
1549   PetscCall(VecGetArrayRead(xx,&x));
1550   PetscCall(VecGetArray(zz,&y));
1551   for (i=0; i<m; i++) {
1552     idx    = a->j + a->i[i];
1553     v      = a->a + a->i[i];
1554     n      = a->i[i+1] - a->i[i];
1555     alpha1 = x[9*i];
1556     alpha2 = x[9*i+1];
1557     alpha3 = x[9*i+2];
1558     alpha4 = x[9*i+3];
1559     alpha5 = x[9*i+4];
1560     alpha6 = x[9*i+5];
1561     alpha7 = x[9*i+6];
1562     alpha8 = x[9*i+7];
1563     alpha9 = x[9*i+8];
1564     while (n-->0) {
1565       y[9*(*idx)]   += alpha1*(*v);
1566       y[9*(*idx)+1] += alpha2*(*v);
1567       y[9*(*idx)+2] += alpha3*(*v);
1568       y[9*(*idx)+3] += alpha4*(*v);
1569       y[9*(*idx)+4] += alpha5*(*v);
1570       y[9*(*idx)+5] += alpha6*(*v);
1571       y[9*(*idx)+6] += alpha7*(*v);
1572       y[9*(*idx)+7] += alpha8*(*v);
1573       y[9*(*idx)+8] += alpha9*(*v);
1574       idx++; v++;
1575     }
1576   }
1577   PetscCall(PetscLogFlops(18.0*a->nz));
1578   PetscCall(VecRestoreArrayRead(xx,&x));
1579   PetscCall(VecRestoreArray(zz,&y));
1580   PetscFunctionReturn(0);
1581 }
1582 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1583 {
1584   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1585   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1586   const PetscScalar *x,*v;
1587   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1588   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1589   PetscInt          nonzerorow=0,n,i,jrow,j;
1590 
1591   PetscFunctionBegin;
1592   PetscCall(VecGetArrayRead(xx,&x));
1593   PetscCall(VecGetArray(yy,&y));
1594   idx  = a->j;
1595   v    = a->a;
1596   ii   = a->i;
1597 
1598   for (i=0; i<m; i++) {
1599     jrow  = ii[i];
1600     n     = ii[i+1] - jrow;
1601     sum1  = 0.0;
1602     sum2  = 0.0;
1603     sum3  = 0.0;
1604     sum4  = 0.0;
1605     sum5  = 0.0;
1606     sum6  = 0.0;
1607     sum7  = 0.0;
1608     sum8  = 0.0;
1609     sum9  = 0.0;
1610     sum10 = 0.0;
1611 
1612     nonzerorow += (n>0);
1613     for (j=0; j<n; j++) {
1614       sum1  += v[jrow]*x[10*idx[jrow]];
1615       sum2  += v[jrow]*x[10*idx[jrow]+1];
1616       sum3  += v[jrow]*x[10*idx[jrow]+2];
1617       sum4  += v[jrow]*x[10*idx[jrow]+3];
1618       sum5  += v[jrow]*x[10*idx[jrow]+4];
1619       sum6  += v[jrow]*x[10*idx[jrow]+5];
1620       sum7  += v[jrow]*x[10*idx[jrow]+6];
1621       sum8  += v[jrow]*x[10*idx[jrow]+7];
1622       sum9  += v[jrow]*x[10*idx[jrow]+8];
1623       sum10 += v[jrow]*x[10*idx[jrow]+9];
1624       jrow++;
1625     }
1626     y[10*i]   = sum1;
1627     y[10*i+1] = sum2;
1628     y[10*i+2] = sum3;
1629     y[10*i+3] = sum4;
1630     y[10*i+4] = sum5;
1631     y[10*i+5] = sum6;
1632     y[10*i+6] = sum7;
1633     y[10*i+7] = sum8;
1634     y[10*i+8] = sum9;
1635     y[10*i+9] = sum10;
1636   }
1637 
1638   PetscCall(PetscLogFlops(20.0*a->nz - 10.0*nonzerorow));
1639   PetscCall(VecRestoreArrayRead(xx,&x));
1640   PetscCall(VecRestoreArray(yy,&y));
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1645 {
1646   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1647   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1648   const PetscScalar *x,*v;
1649   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1650   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1651   PetscInt          n,i,jrow,j;
1652 
1653   PetscFunctionBegin;
1654   if (yy != zz) PetscCall(VecCopy(yy,zz));
1655   PetscCall(VecGetArrayRead(xx,&x));
1656   PetscCall(VecGetArray(zz,&y));
1657   idx  = a->j;
1658   v    = a->a;
1659   ii   = a->i;
1660 
1661   for (i=0; i<m; i++) {
1662     jrow  = ii[i];
1663     n     = ii[i+1] - jrow;
1664     sum1  = 0.0;
1665     sum2  = 0.0;
1666     sum3  = 0.0;
1667     sum4  = 0.0;
1668     sum5  = 0.0;
1669     sum6  = 0.0;
1670     sum7  = 0.0;
1671     sum8  = 0.0;
1672     sum9  = 0.0;
1673     sum10 = 0.0;
1674     for (j=0; j<n; j++) {
1675       sum1  += v[jrow]*x[10*idx[jrow]];
1676       sum2  += v[jrow]*x[10*idx[jrow]+1];
1677       sum3  += v[jrow]*x[10*idx[jrow]+2];
1678       sum4  += v[jrow]*x[10*idx[jrow]+3];
1679       sum5  += v[jrow]*x[10*idx[jrow]+4];
1680       sum6  += v[jrow]*x[10*idx[jrow]+5];
1681       sum7  += v[jrow]*x[10*idx[jrow]+6];
1682       sum8  += v[jrow]*x[10*idx[jrow]+7];
1683       sum9  += v[jrow]*x[10*idx[jrow]+8];
1684       sum10 += v[jrow]*x[10*idx[jrow]+9];
1685       jrow++;
1686     }
1687     y[10*i]   += sum1;
1688     y[10*i+1] += sum2;
1689     y[10*i+2] += sum3;
1690     y[10*i+3] += sum4;
1691     y[10*i+4] += sum5;
1692     y[10*i+5] += sum6;
1693     y[10*i+6] += sum7;
1694     y[10*i+7] += sum8;
1695     y[10*i+8] += sum9;
1696     y[10*i+9] += sum10;
1697   }
1698 
1699   PetscCall(PetscLogFlops(20.0*a->nz));
1700   PetscCall(VecRestoreArrayRead(xx,&x));
1701   PetscCall(VecRestoreArray(yy,&y));
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1706 {
1707   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1708   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1709   const PetscScalar *x,*v;
1710   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1711   const PetscInt    m = b->AIJ->rmap->n,*idx;
1712   PetscInt          n,i;
1713 
1714   PetscFunctionBegin;
1715   PetscCall(VecSet(yy,0.0));
1716   PetscCall(VecGetArrayRead(xx,&x));
1717   PetscCall(VecGetArray(yy,&y));
1718 
1719   for (i=0; i<m; i++) {
1720     idx     = a->j + a->i[i];
1721     v       = a->a + a->i[i];
1722     n       = a->i[i+1] - a->i[i];
1723     alpha1  = x[10*i];
1724     alpha2  = x[10*i+1];
1725     alpha3  = x[10*i+2];
1726     alpha4  = x[10*i+3];
1727     alpha5  = x[10*i+4];
1728     alpha6  = x[10*i+5];
1729     alpha7  = x[10*i+6];
1730     alpha8  = x[10*i+7];
1731     alpha9  = x[10*i+8];
1732     alpha10 = x[10*i+9];
1733     while (n-->0) {
1734       y[10*(*idx)]   += alpha1*(*v);
1735       y[10*(*idx)+1] += alpha2*(*v);
1736       y[10*(*idx)+2] += alpha3*(*v);
1737       y[10*(*idx)+3] += alpha4*(*v);
1738       y[10*(*idx)+4] += alpha5*(*v);
1739       y[10*(*idx)+5] += alpha6*(*v);
1740       y[10*(*idx)+6] += alpha7*(*v);
1741       y[10*(*idx)+7] += alpha8*(*v);
1742       y[10*(*idx)+8] += alpha9*(*v);
1743       y[10*(*idx)+9] += alpha10*(*v);
1744       idx++; v++;
1745     }
1746   }
1747   PetscCall(PetscLogFlops(20.0*a->nz));
1748   PetscCall(VecRestoreArrayRead(xx,&x));
1749   PetscCall(VecRestoreArray(yy,&y));
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1754 {
1755   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1756   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1757   const PetscScalar *x,*v;
1758   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1759   const PetscInt    m = b->AIJ->rmap->n,*idx;
1760   PetscInt          n,i;
1761 
1762   PetscFunctionBegin;
1763   if (yy != zz) PetscCall(VecCopy(yy,zz));
1764   PetscCall(VecGetArrayRead(xx,&x));
1765   PetscCall(VecGetArray(zz,&y));
1766   for (i=0; i<m; i++) {
1767     idx     = a->j + a->i[i];
1768     v       = a->a + a->i[i];
1769     n       = a->i[i+1] - a->i[i];
1770     alpha1  = x[10*i];
1771     alpha2  = x[10*i+1];
1772     alpha3  = x[10*i+2];
1773     alpha4  = x[10*i+3];
1774     alpha5  = x[10*i+4];
1775     alpha6  = x[10*i+5];
1776     alpha7  = x[10*i+6];
1777     alpha8  = x[10*i+7];
1778     alpha9  = x[10*i+8];
1779     alpha10 = x[10*i+9];
1780     while (n-->0) {
1781       y[10*(*idx)]   += alpha1*(*v);
1782       y[10*(*idx)+1] += alpha2*(*v);
1783       y[10*(*idx)+2] += alpha3*(*v);
1784       y[10*(*idx)+3] += alpha4*(*v);
1785       y[10*(*idx)+4] += alpha5*(*v);
1786       y[10*(*idx)+5] += alpha6*(*v);
1787       y[10*(*idx)+6] += alpha7*(*v);
1788       y[10*(*idx)+7] += alpha8*(*v);
1789       y[10*(*idx)+8] += alpha9*(*v);
1790       y[10*(*idx)+9] += alpha10*(*v);
1791       idx++; v++;
1792     }
1793   }
1794   PetscCall(PetscLogFlops(20.0*a->nz));
1795   PetscCall(VecRestoreArrayRead(xx,&x));
1796   PetscCall(VecRestoreArray(zz,&y));
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 /*--------------------------------------------------------------------------------------------*/
1801 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1802 {
1803   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1804   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1805   const PetscScalar *x,*v;
1806   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1807   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1808   PetscInt          nonzerorow=0,n,i,jrow,j;
1809 
1810   PetscFunctionBegin;
1811   PetscCall(VecGetArrayRead(xx,&x));
1812   PetscCall(VecGetArray(yy,&y));
1813   idx  = a->j;
1814   v    = a->a;
1815   ii   = a->i;
1816 
1817   for (i=0; i<m; i++) {
1818     jrow  = ii[i];
1819     n     = ii[i+1] - jrow;
1820     sum1  = 0.0;
1821     sum2  = 0.0;
1822     sum3  = 0.0;
1823     sum4  = 0.0;
1824     sum5  = 0.0;
1825     sum6  = 0.0;
1826     sum7  = 0.0;
1827     sum8  = 0.0;
1828     sum9  = 0.0;
1829     sum10 = 0.0;
1830     sum11 = 0.0;
1831 
1832     nonzerorow += (n>0);
1833     for (j=0; j<n; j++) {
1834       sum1  += v[jrow]*x[11*idx[jrow]];
1835       sum2  += v[jrow]*x[11*idx[jrow]+1];
1836       sum3  += v[jrow]*x[11*idx[jrow]+2];
1837       sum4  += v[jrow]*x[11*idx[jrow]+3];
1838       sum5  += v[jrow]*x[11*idx[jrow]+4];
1839       sum6  += v[jrow]*x[11*idx[jrow]+5];
1840       sum7  += v[jrow]*x[11*idx[jrow]+6];
1841       sum8  += v[jrow]*x[11*idx[jrow]+7];
1842       sum9  += v[jrow]*x[11*idx[jrow]+8];
1843       sum10 += v[jrow]*x[11*idx[jrow]+9];
1844       sum11 += v[jrow]*x[11*idx[jrow]+10];
1845       jrow++;
1846     }
1847     y[11*i]    = sum1;
1848     y[11*i+1]  = sum2;
1849     y[11*i+2]  = sum3;
1850     y[11*i+3]  = sum4;
1851     y[11*i+4]  = sum5;
1852     y[11*i+5]  = sum6;
1853     y[11*i+6]  = sum7;
1854     y[11*i+7]  = sum8;
1855     y[11*i+8]  = sum9;
1856     y[11*i+9]  = sum10;
1857     y[11*i+10] = sum11;
1858   }
1859 
1860   PetscCall(PetscLogFlops(22.0*a->nz - 11*nonzerorow));
1861   PetscCall(VecRestoreArrayRead(xx,&x));
1862   PetscCall(VecRestoreArray(yy,&y));
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1867 {
1868   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1869   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1870   const PetscScalar *x,*v;
1871   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1872   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1873   PetscInt          n,i,jrow,j;
1874 
1875   PetscFunctionBegin;
1876   if (yy != zz) PetscCall(VecCopy(yy,zz));
1877   PetscCall(VecGetArrayRead(xx,&x));
1878   PetscCall(VecGetArray(zz,&y));
1879   idx  = a->j;
1880   v    = a->a;
1881   ii   = a->i;
1882 
1883   for (i=0; i<m; i++) {
1884     jrow  = ii[i];
1885     n     = ii[i+1] - jrow;
1886     sum1  = 0.0;
1887     sum2  = 0.0;
1888     sum3  = 0.0;
1889     sum4  = 0.0;
1890     sum5  = 0.0;
1891     sum6  = 0.0;
1892     sum7  = 0.0;
1893     sum8  = 0.0;
1894     sum9  = 0.0;
1895     sum10 = 0.0;
1896     sum11 = 0.0;
1897     for (j=0; j<n; j++) {
1898       sum1  += v[jrow]*x[11*idx[jrow]];
1899       sum2  += v[jrow]*x[11*idx[jrow]+1];
1900       sum3  += v[jrow]*x[11*idx[jrow]+2];
1901       sum4  += v[jrow]*x[11*idx[jrow]+3];
1902       sum5  += v[jrow]*x[11*idx[jrow]+4];
1903       sum6  += v[jrow]*x[11*idx[jrow]+5];
1904       sum7  += v[jrow]*x[11*idx[jrow]+6];
1905       sum8  += v[jrow]*x[11*idx[jrow]+7];
1906       sum9  += v[jrow]*x[11*idx[jrow]+8];
1907       sum10 += v[jrow]*x[11*idx[jrow]+9];
1908       sum11 += v[jrow]*x[11*idx[jrow]+10];
1909       jrow++;
1910     }
1911     y[11*i]    += sum1;
1912     y[11*i+1]  += sum2;
1913     y[11*i+2]  += sum3;
1914     y[11*i+3]  += sum4;
1915     y[11*i+4]  += sum5;
1916     y[11*i+5]  += sum6;
1917     y[11*i+6]  += sum7;
1918     y[11*i+7]  += sum8;
1919     y[11*i+8]  += sum9;
1920     y[11*i+9]  += sum10;
1921     y[11*i+10] += sum11;
1922   }
1923 
1924   PetscCall(PetscLogFlops(22.0*a->nz));
1925   PetscCall(VecRestoreArrayRead(xx,&x));
1926   PetscCall(VecRestoreArray(yy,&y));
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1931 {
1932   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1933   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1934   const PetscScalar *x,*v;
1935   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1936   const PetscInt    m = b->AIJ->rmap->n,*idx;
1937   PetscInt          n,i;
1938 
1939   PetscFunctionBegin;
1940   PetscCall(VecSet(yy,0.0));
1941   PetscCall(VecGetArrayRead(xx,&x));
1942   PetscCall(VecGetArray(yy,&y));
1943 
1944   for (i=0; i<m; i++) {
1945     idx     = a->j + a->i[i];
1946     v       = a->a + a->i[i];
1947     n       = a->i[i+1] - a->i[i];
1948     alpha1  = x[11*i];
1949     alpha2  = x[11*i+1];
1950     alpha3  = x[11*i+2];
1951     alpha4  = x[11*i+3];
1952     alpha5  = x[11*i+4];
1953     alpha6  = x[11*i+5];
1954     alpha7  = x[11*i+6];
1955     alpha8  = x[11*i+7];
1956     alpha9  = x[11*i+8];
1957     alpha10 = x[11*i+9];
1958     alpha11 = x[11*i+10];
1959     while (n-->0) {
1960       y[11*(*idx)]    += alpha1*(*v);
1961       y[11*(*idx)+1]  += alpha2*(*v);
1962       y[11*(*idx)+2]  += alpha3*(*v);
1963       y[11*(*idx)+3]  += alpha4*(*v);
1964       y[11*(*idx)+4]  += alpha5*(*v);
1965       y[11*(*idx)+5]  += alpha6*(*v);
1966       y[11*(*idx)+6]  += alpha7*(*v);
1967       y[11*(*idx)+7]  += alpha8*(*v);
1968       y[11*(*idx)+8]  += alpha9*(*v);
1969       y[11*(*idx)+9]  += alpha10*(*v);
1970       y[11*(*idx)+10] += alpha11*(*v);
1971       idx++; v++;
1972     }
1973   }
1974   PetscCall(PetscLogFlops(22.0*a->nz));
1975   PetscCall(VecRestoreArrayRead(xx,&x));
1976   PetscCall(VecRestoreArray(yy,&y));
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1981 {
1982   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1983   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1984   const PetscScalar *x,*v;
1985   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1986   const PetscInt    m = b->AIJ->rmap->n,*idx;
1987   PetscInt          n,i;
1988 
1989   PetscFunctionBegin;
1990   if (yy != zz) PetscCall(VecCopy(yy,zz));
1991   PetscCall(VecGetArrayRead(xx,&x));
1992   PetscCall(VecGetArray(zz,&y));
1993   for (i=0; i<m; i++) {
1994     idx     = a->j + a->i[i];
1995     v       = a->a + a->i[i];
1996     n       = a->i[i+1] - a->i[i];
1997     alpha1  = x[11*i];
1998     alpha2  = x[11*i+1];
1999     alpha3  = x[11*i+2];
2000     alpha4  = x[11*i+3];
2001     alpha5  = x[11*i+4];
2002     alpha6  = x[11*i+5];
2003     alpha7  = x[11*i+6];
2004     alpha8  = x[11*i+7];
2005     alpha9  = x[11*i+8];
2006     alpha10 = x[11*i+9];
2007     alpha11 = x[11*i+10];
2008     while (n-->0) {
2009       y[11*(*idx)]    += alpha1*(*v);
2010       y[11*(*idx)+1]  += alpha2*(*v);
2011       y[11*(*idx)+2]  += alpha3*(*v);
2012       y[11*(*idx)+3]  += alpha4*(*v);
2013       y[11*(*idx)+4]  += alpha5*(*v);
2014       y[11*(*idx)+5]  += alpha6*(*v);
2015       y[11*(*idx)+6]  += alpha7*(*v);
2016       y[11*(*idx)+7]  += alpha8*(*v);
2017       y[11*(*idx)+8]  += alpha9*(*v);
2018       y[11*(*idx)+9]  += alpha10*(*v);
2019       y[11*(*idx)+10] += alpha11*(*v);
2020       idx++; v++;
2021     }
2022   }
2023   PetscCall(PetscLogFlops(22.0*a->nz));
2024   PetscCall(VecRestoreArrayRead(xx,&x));
2025   PetscCall(VecRestoreArray(zz,&y));
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 /*--------------------------------------------------------------------------------------------*/
2030 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2031 {
2032   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2033   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2034   const PetscScalar *x,*v;
2035   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2036   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2037   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2038   PetscInt          nonzerorow=0,n,i,jrow,j;
2039 
2040   PetscFunctionBegin;
2041   PetscCall(VecGetArrayRead(xx,&x));
2042   PetscCall(VecGetArray(yy,&y));
2043   idx  = a->j;
2044   v    = a->a;
2045   ii   = a->i;
2046 
2047   for (i=0; i<m; i++) {
2048     jrow  = ii[i];
2049     n     = ii[i+1] - jrow;
2050     sum1  = 0.0;
2051     sum2  = 0.0;
2052     sum3  = 0.0;
2053     sum4  = 0.0;
2054     sum5  = 0.0;
2055     sum6  = 0.0;
2056     sum7  = 0.0;
2057     sum8  = 0.0;
2058     sum9  = 0.0;
2059     sum10 = 0.0;
2060     sum11 = 0.0;
2061     sum12 = 0.0;
2062     sum13 = 0.0;
2063     sum14 = 0.0;
2064     sum15 = 0.0;
2065     sum16 = 0.0;
2066 
2067     nonzerorow += (n>0);
2068     for (j=0; j<n; j++) {
2069       sum1  += v[jrow]*x[16*idx[jrow]];
2070       sum2  += v[jrow]*x[16*idx[jrow]+1];
2071       sum3  += v[jrow]*x[16*idx[jrow]+2];
2072       sum4  += v[jrow]*x[16*idx[jrow]+3];
2073       sum5  += v[jrow]*x[16*idx[jrow]+4];
2074       sum6  += v[jrow]*x[16*idx[jrow]+5];
2075       sum7  += v[jrow]*x[16*idx[jrow]+6];
2076       sum8  += v[jrow]*x[16*idx[jrow]+7];
2077       sum9  += v[jrow]*x[16*idx[jrow]+8];
2078       sum10 += v[jrow]*x[16*idx[jrow]+9];
2079       sum11 += v[jrow]*x[16*idx[jrow]+10];
2080       sum12 += v[jrow]*x[16*idx[jrow]+11];
2081       sum13 += v[jrow]*x[16*idx[jrow]+12];
2082       sum14 += v[jrow]*x[16*idx[jrow]+13];
2083       sum15 += v[jrow]*x[16*idx[jrow]+14];
2084       sum16 += v[jrow]*x[16*idx[jrow]+15];
2085       jrow++;
2086     }
2087     y[16*i]    = sum1;
2088     y[16*i+1]  = sum2;
2089     y[16*i+2]  = sum3;
2090     y[16*i+3]  = sum4;
2091     y[16*i+4]  = sum5;
2092     y[16*i+5]  = sum6;
2093     y[16*i+6]  = sum7;
2094     y[16*i+7]  = sum8;
2095     y[16*i+8]  = sum9;
2096     y[16*i+9]  = sum10;
2097     y[16*i+10] = sum11;
2098     y[16*i+11] = sum12;
2099     y[16*i+12] = sum13;
2100     y[16*i+13] = sum14;
2101     y[16*i+14] = sum15;
2102     y[16*i+15] = sum16;
2103   }
2104 
2105   PetscCall(PetscLogFlops(32.0*a->nz - 16.0*nonzerorow));
2106   PetscCall(VecRestoreArrayRead(xx,&x));
2107   PetscCall(VecRestoreArray(yy,&y));
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2112 {
2113   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2114   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2115   const PetscScalar *x,*v;
2116   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2117   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2118   const PetscInt    m = b->AIJ->rmap->n,*idx;
2119   PetscInt          n,i;
2120 
2121   PetscFunctionBegin;
2122   PetscCall(VecSet(yy,0.0));
2123   PetscCall(VecGetArrayRead(xx,&x));
2124   PetscCall(VecGetArray(yy,&y));
2125 
2126   for (i=0; i<m; i++) {
2127     idx     = a->j + a->i[i];
2128     v       = a->a + a->i[i];
2129     n       = a->i[i+1] - a->i[i];
2130     alpha1  = x[16*i];
2131     alpha2  = x[16*i+1];
2132     alpha3  = x[16*i+2];
2133     alpha4  = x[16*i+3];
2134     alpha5  = x[16*i+4];
2135     alpha6  = x[16*i+5];
2136     alpha7  = x[16*i+6];
2137     alpha8  = x[16*i+7];
2138     alpha9  = x[16*i+8];
2139     alpha10 = x[16*i+9];
2140     alpha11 = x[16*i+10];
2141     alpha12 = x[16*i+11];
2142     alpha13 = x[16*i+12];
2143     alpha14 = x[16*i+13];
2144     alpha15 = x[16*i+14];
2145     alpha16 = x[16*i+15];
2146     while (n-->0) {
2147       y[16*(*idx)]    += alpha1*(*v);
2148       y[16*(*idx)+1]  += alpha2*(*v);
2149       y[16*(*idx)+2]  += alpha3*(*v);
2150       y[16*(*idx)+3]  += alpha4*(*v);
2151       y[16*(*idx)+4]  += alpha5*(*v);
2152       y[16*(*idx)+5]  += alpha6*(*v);
2153       y[16*(*idx)+6]  += alpha7*(*v);
2154       y[16*(*idx)+7]  += alpha8*(*v);
2155       y[16*(*idx)+8]  += alpha9*(*v);
2156       y[16*(*idx)+9]  += alpha10*(*v);
2157       y[16*(*idx)+10] += alpha11*(*v);
2158       y[16*(*idx)+11] += alpha12*(*v);
2159       y[16*(*idx)+12] += alpha13*(*v);
2160       y[16*(*idx)+13] += alpha14*(*v);
2161       y[16*(*idx)+14] += alpha15*(*v);
2162       y[16*(*idx)+15] += alpha16*(*v);
2163       idx++; v++;
2164     }
2165   }
2166   PetscCall(PetscLogFlops(32.0*a->nz));
2167   PetscCall(VecRestoreArrayRead(xx,&x));
2168   PetscCall(VecRestoreArray(yy,&y));
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2173 {
2174   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2175   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2176   const PetscScalar *x,*v;
2177   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2178   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2179   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2180   PetscInt          n,i,jrow,j;
2181 
2182   PetscFunctionBegin;
2183   if (yy != zz) PetscCall(VecCopy(yy,zz));
2184   PetscCall(VecGetArrayRead(xx,&x));
2185   PetscCall(VecGetArray(zz,&y));
2186   idx  = a->j;
2187   v    = a->a;
2188   ii   = a->i;
2189 
2190   for (i=0; i<m; i++) {
2191     jrow  = ii[i];
2192     n     = ii[i+1] - jrow;
2193     sum1  = 0.0;
2194     sum2  = 0.0;
2195     sum3  = 0.0;
2196     sum4  = 0.0;
2197     sum5  = 0.0;
2198     sum6  = 0.0;
2199     sum7  = 0.0;
2200     sum8  = 0.0;
2201     sum9  = 0.0;
2202     sum10 = 0.0;
2203     sum11 = 0.0;
2204     sum12 = 0.0;
2205     sum13 = 0.0;
2206     sum14 = 0.0;
2207     sum15 = 0.0;
2208     sum16 = 0.0;
2209     for (j=0; j<n; j++) {
2210       sum1  += v[jrow]*x[16*idx[jrow]];
2211       sum2  += v[jrow]*x[16*idx[jrow]+1];
2212       sum3  += v[jrow]*x[16*idx[jrow]+2];
2213       sum4  += v[jrow]*x[16*idx[jrow]+3];
2214       sum5  += v[jrow]*x[16*idx[jrow]+4];
2215       sum6  += v[jrow]*x[16*idx[jrow]+5];
2216       sum7  += v[jrow]*x[16*idx[jrow]+6];
2217       sum8  += v[jrow]*x[16*idx[jrow]+7];
2218       sum9  += v[jrow]*x[16*idx[jrow]+8];
2219       sum10 += v[jrow]*x[16*idx[jrow]+9];
2220       sum11 += v[jrow]*x[16*idx[jrow]+10];
2221       sum12 += v[jrow]*x[16*idx[jrow]+11];
2222       sum13 += v[jrow]*x[16*idx[jrow]+12];
2223       sum14 += v[jrow]*x[16*idx[jrow]+13];
2224       sum15 += v[jrow]*x[16*idx[jrow]+14];
2225       sum16 += v[jrow]*x[16*idx[jrow]+15];
2226       jrow++;
2227     }
2228     y[16*i]    += sum1;
2229     y[16*i+1]  += sum2;
2230     y[16*i+2]  += sum3;
2231     y[16*i+3]  += sum4;
2232     y[16*i+4]  += sum5;
2233     y[16*i+5]  += sum6;
2234     y[16*i+6]  += sum7;
2235     y[16*i+7]  += sum8;
2236     y[16*i+8]  += sum9;
2237     y[16*i+9]  += sum10;
2238     y[16*i+10] += sum11;
2239     y[16*i+11] += sum12;
2240     y[16*i+12] += sum13;
2241     y[16*i+13] += sum14;
2242     y[16*i+14] += sum15;
2243     y[16*i+15] += sum16;
2244   }
2245 
2246   PetscCall(PetscLogFlops(32.0*a->nz));
2247   PetscCall(VecRestoreArrayRead(xx,&x));
2248   PetscCall(VecRestoreArray(zz,&y));
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2253 {
2254   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2255   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2256   const PetscScalar *x,*v;
2257   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2258   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2259   const PetscInt    m = b->AIJ->rmap->n,*idx;
2260   PetscInt          n,i;
2261 
2262   PetscFunctionBegin;
2263   if (yy != zz) PetscCall(VecCopy(yy,zz));
2264   PetscCall(VecGetArrayRead(xx,&x));
2265   PetscCall(VecGetArray(zz,&y));
2266   for (i=0; i<m; i++) {
2267     idx     = a->j + a->i[i];
2268     v       = a->a + a->i[i];
2269     n       = a->i[i+1] - a->i[i];
2270     alpha1  = x[16*i];
2271     alpha2  = x[16*i+1];
2272     alpha3  = x[16*i+2];
2273     alpha4  = x[16*i+3];
2274     alpha5  = x[16*i+4];
2275     alpha6  = x[16*i+5];
2276     alpha7  = x[16*i+6];
2277     alpha8  = x[16*i+7];
2278     alpha9  = x[16*i+8];
2279     alpha10 = x[16*i+9];
2280     alpha11 = x[16*i+10];
2281     alpha12 = x[16*i+11];
2282     alpha13 = x[16*i+12];
2283     alpha14 = x[16*i+13];
2284     alpha15 = x[16*i+14];
2285     alpha16 = x[16*i+15];
2286     while (n-->0) {
2287       y[16*(*idx)]    += alpha1*(*v);
2288       y[16*(*idx)+1]  += alpha2*(*v);
2289       y[16*(*idx)+2]  += alpha3*(*v);
2290       y[16*(*idx)+3]  += alpha4*(*v);
2291       y[16*(*idx)+4]  += alpha5*(*v);
2292       y[16*(*idx)+5]  += alpha6*(*v);
2293       y[16*(*idx)+6]  += alpha7*(*v);
2294       y[16*(*idx)+7]  += alpha8*(*v);
2295       y[16*(*idx)+8]  += alpha9*(*v);
2296       y[16*(*idx)+9]  += alpha10*(*v);
2297       y[16*(*idx)+10] += alpha11*(*v);
2298       y[16*(*idx)+11] += alpha12*(*v);
2299       y[16*(*idx)+12] += alpha13*(*v);
2300       y[16*(*idx)+13] += alpha14*(*v);
2301       y[16*(*idx)+14] += alpha15*(*v);
2302       y[16*(*idx)+15] += alpha16*(*v);
2303       idx++; v++;
2304     }
2305   }
2306   PetscCall(PetscLogFlops(32.0*a->nz));
2307   PetscCall(VecRestoreArrayRead(xx,&x));
2308   PetscCall(VecRestoreArray(zz,&y));
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 /*--------------------------------------------------------------------------------------------*/
2313 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2314 {
2315   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2316   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2317   const PetscScalar *x,*v;
2318   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2319   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2320   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2321   PetscInt          nonzerorow=0,n,i,jrow,j;
2322 
2323   PetscFunctionBegin;
2324   PetscCall(VecGetArrayRead(xx,&x));
2325   PetscCall(VecGetArray(yy,&y));
2326   idx  = a->j;
2327   v    = a->a;
2328   ii   = a->i;
2329 
2330   for (i=0; i<m; i++) {
2331     jrow  = ii[i];
2332     n     = ii[i+1] - jrow;
2333     sum1  = 0.0;
2334     sum2  = 0.0;
2335     sum3  = 0.0;
2336     sum4  = 0.0;
2337     sum5  = 0.0;
2338     sum6  = 0.0;
2339     sum7  = 0.0;
2340     sum8  = 0.0;
2341     sum9  = 0.0;
2342     sum10 = 0.0;
2343     sum11 = 0.0;
2344     sum12 = 0.0;
2345     sum13 = 0.0;
2346     sum14 = 0.0;
2347     sum15 = 0.0;
2348     sum16 = 0.0;
2349     sum17 = 0.0;
2350     sum18 = 0.0;
2351 
2352     nonzerorow += (n>0);
2353     for (j=0; j<n; j++) {
2354       sum1  += v[jrow]*x[18*idx[jrow]];
2355       sum2  += v[jrow]*x[18*idx[jrow]+1];
2356       sum3  += v[jrow]*x[18*idx[jrow]+2];
2357       sum4  += v[jrow]*x[18*idx[jrow]+3];
2358       sum5  += v[jrow]*x[18*idx[jrow]+4];
2359       sum6  += v[jrow]*x[18*idx[jrow]+5];
2360       sum7  += v[jrow]*x[18*idx[jrow]+6];
2361       sum8  += v[jrow]*x[18*idx[jrow]+7];
2362       sum9  += v[jrow]*x[18*idx[jrow]+8];
2363       sum10 += v[jrow]*x[18*idx[jrow]+9];
2364       sum11 += v[jrow]*x[18*idx[jrow]+10];
2365       sum12 += v[jrow]*x[18*idx[jrow]+11];
2366       sum13 += v[jrow]*x[18*idx[jrow]+12];
2367       sum14 += v[jrow]*x[18*idx[jrow]+13];
2368       sum15 += v[jrow]*x[18*idx[jrow]+14];
2369       sum16 += v[jrow]*x[18*idx[jrow]+15];
2370       sum17 += v[jrow]*x[18*idx[jrow]+16];
2371       sum18 += v[jrow]*x[18*idx[jrow]+17];
2372       jrow++;
2373     }
2374     y[18*i]    = sum1;
2375     y[18*i+1]  = sum2;
2376     y[18*i+2]  = sum3;
2377     y[18*i+3]  = sum4;
2378     y[18*i+4]  = sum5;
2379     y[18*i+5]  = sum6;
2380     y[18*i+6]  = sum7;
2381     y[18*i+7]  = sum8;
2382     y[18*i+8]  = sum9;
2383     y[18*i+9]  = sum10;
2384     y[18*i+10] = sum11;
2385     y[18*i+11] = sum12;
2386     y[18*i+12] = sum13;
2387     y[18*i+13] = sum14;
2388     y[18*i+14] = sum15;
2389     y[18*i+15] = sum16;
2390     y[18*i+16] = sum17;
2391     y[18*i+17] = sum18;
2392   }
2393 
2394   PetscCall(PetscLogFlops(36.0*a->nz - 18.0*nonzerorow));
2395   PetscCall(VecRestoreArrayRead(xx,&x));
2396   PetscCall(VecRestoreArray(yy,&y));
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2401 {
2402   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2403   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2404   const PetscScalar *x,*v;
2405   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2406   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2407   const PetscInt    m = b->AIJ->rmap->n,*idx;
2408   PetscInt          n,i;
2409 
2410   PetscFunctionBegin;
2411   PetscCall(VecSet(yy,0.0));
2412   PetscCall(VecGetArrayRead(xx,&x));
2413   PetscCall(VecGetArray(yy,&y));
2414 
2415   for (i=0; i<m; i++) {
2416     idx     = a->j + a->i[i];
2417     v       = a->a + a->i[i];
2418     n       = a->i[i+1] - a->i[i];
2419     alpha1  = x[18*i];
2420     alpha2  = x[18*i+1];
2421     alpha3  = x[18*i+2];
2422     alpha4  = x[18*i+3];
2423     alpha5  = x[18*i+4];
2424     alpha6  = x[18*i+5];
2425     alpha7  = x[18*i+6];
2426     alpha8  = x[18*i+7];
2427     alpha9  = x[18*i+8];
2428     alpha10 = x[18*i+9];
2429     alpha11 = x[18*i+10];
2430     alpha12 = x[18*i+11];
2431     alpha13 = x[18*i+12];
2432     alpha14 = x[18*i+13];
2433     alpha15 = x[18*i+14];
2434     alpha16 = x[18*i+15];
2435     alpha17 = x[18*i+16];
2436     alpha18 = x[18*i+17];
2437     while (n-->0) {
2438       y[18*(*idx)]    += alpha1*(*v);
2439       y[18*(*idx)+1]  += alpha2*(*v);
2440       y[18*(*idx)+2]  += alpha3*(*v);
2441       y[18*(*idx)+3]  += alpha4*(*v);
2442       y[18*(*idx)+4]  += alpha5*(*v);
2443       y[18*(*idx)+5]  += alpha6*(*v);
2444       y[18*(*idx)+6]  += alpha7*(*v);
2445       y[18*(*idx)+7]  += alpha8*(*v);
2446       y[18*(*idx)+8]  += alpha9*(*v);
2447       y[18*(*idx)+9]  += alpha10*(*v);
2448       y[18*(*idx)+10] += alpha11*(*v);
2449       y[18*(*idx)+11] += alpha12*(*v);
2450       y[18*(*idx)+12] += alpha13*(*v);
2451       y[18*(*idx)+13] += alpha14*(*v);
2452       y[18*(*idx)+14] += alpha15*(*v);
2453       y[18*(*idx)+15] += alpha16*(*v);
2454       y[18*(*idx)+16] += alpha17*(*v);
2455       y[18*(*idx)+17] += alpha18*(*v);
2456       idx++; v++;
2457     }
2458   }
2459   PetscCall(PetscLogFlops(36.0*a->nz));
2460   PetscCall(VecRestoreArrayRead(xx,&x));
2461   PetscCall(VecRestoreArray(yy,&y));
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2466 {
2467   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2468   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2469   const PetscScalar *x,*v;
2470   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2471   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2472   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2473   PetscInt          n,i,jrow,j;
2474 
2475   PetscFunctionBegin;
2476   if (yy != zz) PetscCall(VecCopy(yy,zz));
2477   PetscCall(VecGetArrayRead(xx,&x));
2478   PetscCall(VecGetArray(zz,&y));
2479   idx  = a->j;
2480   v    = a->a;
2481   ii   = a->i;
2482 
2483   for (i=0; i<m; i++) {
2484     jrow  = ii[i];
2485     n     = ii[i+1] - jrow;
2486     sum1  = 0.0;
2487     sum2  = 0.0;
2488     sum3  = 0.0;
2489     sum4  = 0.0;
2490     sum5  = 0.0;
2491     sum6  = 0.0;
2492     sum7  = 0.0;
2493     sum8  = 0.0;
2494     sum9  = 0.0;
2495     sum10 = 0.0;
2496     sum11 = 0.0;
2497     sum12 = 0.0;
2498     sum13 = 0.0;
2499     sum14 = 0.0;
2500     sum15 = 0.0;
2501     sum16 = 0.0;
2502     sum17 = 0.0;
2503     sum18 = 0.0;
2504     for (j=0; j<n; j++) {
2505       sum1  += v[jrow]*x[18*idx[jrow]];
2506       sum2  += v[jrow]*x[18*idx[jrow]+1];
2507       sum3  += v[jrow]*x[18*idx[jrow]+2];
2508       sum4  += v[jrow]*x[18*idx[jrow]+3];
2509       sum5  += v[jrow]*x[18*idx[jrow]+4];
2510       sum6  += v[jrow]*x[18*idx[jrow]+5];
2511       sum7  += v[jrow]*x[18*idx[jrow]+6];
2512       sum8  += v[jrow]*x[18*idx[jrow]+7];
2513       sum9  += v[jrow]*x[18*idx[jrow]+8];
2514       sum10 += v[jrow]*x[18*idx[jrow]+9];
2515       sum11 += v[jrow]*x[18*idx[jrow]+10];
2516       sum12 += v[jrow]*x[18*idx[jrow]+11];
2517       sum13 += v[jrow]*x[18*idx[jrow]+12];
2518       sum14 += v[jrow]*x[18*idx[jrow]+13];
2519       sum15 += v[jrow]*x[18*idx[jrow]+14];
2520       sum16 += v[jrow]*x[18*idx[jrow]+15];
2521       sum17 += v[jrow]*x[18*idx[jrow]+16];
2522       sum18 += v[jrow]*x[18*idx[jrow]+17];
2523       jrow++;
2524     }
2525     y[18*i]    += sum1;
2526     y[18*i+1]  += sum2;
2527     y[18*i+2]  += sum3;
2528     y[18*i+3]  += sum4;
2529     y[18*i+4]  += sum5;
2530     y[18*i+5]  += sum6;
2531     y[18*i+6]  += sum7;
2532     y[18*i+7]  += sum8;
2533     y[18*i+8]  += sum9;
2534     y[18*i+9]  += sum10;
2535     y[18*i+10] += sum11;
2536     y[18*i+11] += sum12;
2537     y[18*i+12] += sum13;
2538     y[18*i+13] += sum14;
2539     y[18*i+14] += sum15;
2540     y[18*i+15] += sum16;
2541     y[18*i+16] += sum17;
2542     y[18*i+17] += sum18;
2543   }
2544 
2545   PetscCall(PetscLogFlops(36.0*a->nz));
2546   PetscCall(VecRestoreArrayRead(xx,&x));
2547   PetscCall(VecRestoreArray(zz,&y));
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2552 {
2553   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2554   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2555   const PetscScalar *x,*v;
2556   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2557   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2558   const PetscInt    m = b->AIJ->rmap->n,*idx;
2559   PetscInt          n,i;
2560 
2561   PetscFunctionBegin;
2562   if (yy != zz) PetscCall(VecCopy(yy,zz));
2563   PetscCall(VecGetArrayRead(xx,&x));
2564   PetscCall(VecGetArray(zz,&y));
2565   for (i=0; i<m; i++) {
2566     idx     = a->j + a->i[i];
2567     v       = a->a + a->i[i];
2568     n       = a->i[i+1] - a->i[i];
2569     alpha1  = x[18*i];
2570     alpha2  = x[18*i+1];
2571     alpha3  = x[18*i+2];
2572     alpha4  = x[18*i+3];
2573     alpha5  = x[18*i+4];
2574     alpha6  = x[18*i+5];
2575     alpha7  = x[18*i+6];
2576     alpha8  = x[18*i+7];
2577     alpha9  = x[18*i+8];
2578     alpha10 = x[18*i+9];
2579     alpha11 = x[18*i+10];
2580     alpha12 = x[18*i+11];
2581     alpha13 = x[18*i+12];
2582     alpha14 = x[18*i+13];
2583     alpha15 = x[18*i+14];
2584     alpha16 = x[18*i+15];
2585     alpha17 = x[18*i+16];
2586     alpha18 = x[18*i+17];
2587     while (n-->0) {
2588       y[18*(*idx)]    += alpha1*(*v);
2589       y[18*(*idx)+1]  += alpha2*(*v);
2590       y[18*(*idx)+2]  += alpha3*(*v);
2591       y[18*(*idx)+3]  += alpha4*(*v);
2592       y[18*(*idx)+4]  += alpha5*(*v);
2593       y[18*(*idx)+5]  += alpha6*(*v);
2594       y[18*(*idx)+6]  += alpha7*(*v);
2595       y[18*(*idx)+7]  += alpha8*(*v);
2596       y[18*(*idx)+8]  += alpha9*(*v);
2597       y[18*(*idx)+9]  += alpha10*(*v);
2598       y[18*(*idx)+10] += alpha11*(*v);
2599       y[18*(*idx)+11] += alpha12*(*v);
2600       y[18*(*idx)+12] += alpha13*(*v);
2601       y[18*(*idx)+13] += alpha14*(*v);
2602       y[18*(*idx)+14] += alpha15*(*v);
2603       y[18*(*idx)+15] += alpha16*(*v);
2604       y[18*(*idx)+16] += alpha17*(*v);
2605       y[18*(*idx)+17] += alpha18*(*v);
2606       idx++; v++;
2607     }
2608   }
2609   PetscCall(PetscLogFlops(36.0*a->nz));
2610   PetscCall(VecRestoreArrayRead(xx,&x));
2611   PetscCall(VecRestoreArray(zz,&y));
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2616 {
2617   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2618   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2619   const PetscScalar *x,*v;
2620   PetscScalar       *y,*sums;
2621   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2622   PetscInt          n,i,jrow,j,dof = b->dof,k;
2623 
2624   PetscFunctionBegin;
2625   PetscCall(VecGetArrayRead(xx,&x));
2626   PetscCall(VecSet(yy,0.0));
2627   PetscCall(VecGetArray(yy,&y));
2628   idx  = a->j;
2629   v    = a->a;
2630   ii   = a->i;
2631 
2632   for (i=0; i<m; i++) {
2633     jrow = ii[i];
2634     n    = ii[i+1] - jrow;
2635     sums = y + dof*i;
2636     for (j=0; j<n; j++) {
2637       for (k=0; k<dof; k++) {
2638         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2639       }
2640       jrow++;
2641     }
2642   }
2643 
2644   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2645   PetscCall(VecRestoreArrayRead(xx,&x));
2646   PetscCall(VecRestoreArray(yy,&y));
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2651 {
2652   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2653   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2654   const PetscScalar *x,*v;
2655   PetscScalar       *y,*sums;
2656   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2657   PetscInt          n,i,jrow,j,dof = b->dof,k;
2658 
2659   PetscFunctionBegin;
2660   if (yy != zz) PetscCall(VecCopy(yy,zz));
2661   PetscCall(VecGetArrayRead(xx,&x));
2662   PetscCall(VecGetArray(zz,&y));
2663   idx  = a->j;
2664   v    = a->a;
2665   ii   = a->i;
2666 
2667   for (i=0; i<m; i++) {
2668     jrow = ii[i];
2669     n    = ii[i+1] - jrow;
2670     sums = y + dof*i;
2671     for (j=0; j<n; j++) {
2672       for (k=0; k<dof; k++) {
2673         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2674       }
2675       jrow++;
2676     }
2677   }
2678 
2679   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2680   PetscCall(VecRestoreArrayRead(xx,&x));
2681   PetscCall(VecRestoreArray(zz,&y));
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2686 {
2687   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2688   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2689   const PetscScalar *x,*v,*alpha;
2690   PetscScalar       *y;
2691   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2692   PetscInt          n,i,k;
2693 
2694   PetscFunctionBegin;
2695   PetscCall(VecGetArrayRead(xx,&x));
2696   PetscCall(VecSet(yy,0.0));
2697   PetscCall(VecGetArray(yy,&y));
2698   for (i=0; i<m; i++) {
2699     idx   = a->j + a->i[i];
2700     v     = a->a + a->i[i];
2701     n     = a->i[i+1] - a->i[i];
2702     alpha = x + dof*i;
2703     while (n-->0) {
2704       for (k=0; k<dof; k++) {
2705         y[dof*(*idx)+k] += alpha[k]*(*v);
2706       }
2707       idx++; v++;
2708     }
2709   }
2710   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2711   PetscCall(VecRestoreArrayRead(xx,&x));
2712   PetscCall(VecRestoreArray(yy,&y));
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2717 {
2718   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2719   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2720   const PetscScalar *x,*v,*alpha;
2721   PetscScalar       *y;
2722   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2723   PetscInt          n,i,k;
2724 
2725   PetscFunctionBegin;
2726   if (yy != zz) PetscCall(VecCopy(yy,zz));
2727   PetscCall(VecGetArrayRead(xx,&x));
2728   PetscCall(VecGetArray(zz,&y));
2729   for (i=0; i<m; i++) {
2730     idx   = a->j + a->i[i];
2731     v     = a->a + a->i[i];
2732     n     = a->i[i+1] - a->i[i];
2733     alpha = x + dof*i;
2734     while (n-->0) {
2735       for (k=0; k<dof; k++) {
2736         y[dof*(*idx)+k] += alpha[k]*(*v);
2737       }
2738       idx++; v++;
2739     }
2740   }
2741   PetscCall(PetscLogFlops(2.0*dof*a->nz));
2742   PetscCall(VecRestoreArrayRead(xx,&x));
2743   PetscCall(VecRestoreArray(zz,&y));
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 /*===================================================================================*/
2748 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2749 {
2750   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2751 
2752   PetscFunctionBegin;
2753   /* start the scatter */
2754   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2755   PetscCall((*b->AIJ->ops->mult)(b->AIJ,xx,yy));
2756   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2757   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy));
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2762 {
2763   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2764 
2765   PetscFunctionBegin;
2766   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
2767   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy));
2768   PetscCall(VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
2769   PetscCall(VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE));
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2774 {
2775   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2776 
2777   PetscFunctionBegin;
2778   /* start the scatter */
2779   PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2780   PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz));
2781   PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
2782   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2787 {
2788   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2789 
2790   PetscFunctionBegin;
2791   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w));
2792   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz));
2793   PetscCall(VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
2794   PetscCall(VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE));
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 /* ----------------------------------------------------------------*/
2799 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2800 {
2801   Mat_Product    *product = C->product;
2802 
2803   PetscFunctionBegin;
2804   if (product->type == MATPRODUCT_PtAP) {
2805     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2806   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2811 {
2812   Mat_Product    *product = C->product;
2813   PetscBool      flg = PETSC_FALSE;
2814   Mat            A=product->A,P=product->B;
2815   PetscInt       alg=1; /* set default algorithm */
2816 #if !defined(PETSC_HAVE_HYPRE)
2817   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2818   PetscInt       nalg=4;
2819 #else
2820   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2821   PetscInt       nalg=5;
2822 #endif
2823 
2824   PetscFunctionBegin;
2825   PetscCheck(product->type == MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2826 
2827   /* PtAP */
2828   /* Check matrix local sizes */
2829   PetscCheckFalse(A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2830   PetscCheckFalse(A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2831 
2832   /* Set the default algorithm */
2833   PetscCall(PetscStrcmp(C->product->alg,"default",&flg));
2834   if (flg) {
2835     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2836   }
2837 
2838   /* Get runtime option */
2839   PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2840   PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg));
2841   if (flg) {
2842     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2843   }
2844   PetscOptionsEnd();
2845 
2846   PetscCall(PetscStrcmp(C->product->alg,"allatonce",&flg));
2847   if (flg) {
2848     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2849     PetscFunctionReturn(0);
2850   }
2851 
2852   PetscCall(PetscStrcmp(C->product->alg,"allatonce_merged",&flg));
2853   if (flg) {
2854     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2855     PetscFunctionReturn(0);
2856   }
2857 
2858   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2859   PetscCall(PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
2860   PetscCall(MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P));
2861   PetscCall(MatProductSetFromOptions(C));
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 /* ----------------------------------------------------------------*/
2866 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2867 {
2868   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2869   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2870   Mat                P         =pp->AIJ;
2871   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2872   PetscInt           *pti,*ptj,*ptJ;
2873   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2874   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2875   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2876   MatScalar          *ca;
2877   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2878 
2879   PetscFunctionBegin;
2880   /* Get ij structure of P^T */
2881   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
2882 
2883   cn = pn*ppdof;
2884   /* Allocate ci array, arrays for fill computation and */
2885   /* free space for accumulating nonzero column info */
2886   PetscCall(PetscMalloc1(cn+1,&ci));
2887   ci[0] = 0;
2888 
2889   /* Work arrays for rows of P^T*A */
2890   PetscCall(PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow));
2891   PetscCall(PetscArrayzero(ptadenserow,an));
2892   PetscCall(PetscArrayzero(denserow,cn));
2893 
2894   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2895   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2896   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2897   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space));
2898   current_space = free_space;
2899 
2900   /* Determine symbolic info for each row of C: */
2901   for (i=0; i<pn; i++) {
2902     ptnzi = pti[i+1] - pti[i];
2903     ptJ   = ptj + pti[i];
2904     for (dof=0; dof<ppdof; dof++) {
2905       ptanzi = 0;
2906       /* Determine symbolic row of PtA: */
2907       for (j=0; j<ptnzi; j++) {
2908         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2909         arow = ptJ[j]*ppdof + dof;
2910         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2911         anzj = ai[arow+1] - ai[arow];
2912         ajj  = aj + ai[arow];
2913         for (k=0; k<anzj; k++) {
2914           if (!ptadenserow[ajj[k]]) {
2915             ptadenserow[ajj[k]]    = -1;
2916             ptasparserow[ptanzi++] = ajj[k];
2917           }
2918         }
2919       }
2920       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2921       ptaj = ptasparserow;
2922       cnzi = 0;
2923       for (j=0; j<ptanzi; j++) {
2924         /* Get offset within block of P */
2925         pshift = *ptaj%ppdof;
2926         /* Get block row of P */
2927         prow = (*ptaj++)/ppdof; /* integer division */
2928         /* P has same number of nonzeros per row as the compressed form */
2929         pnzj = pi[prow+1] - pi[prow];
2930         pjj  = pj + pi[prow];
2931         for (k=0;k<pnzj;k++) {
2932           /* Locations in C are shifted by the offset within the block */
2933           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2934           if (!denserow[pjj[k]*ppdof+pshift]) {
2935             denserow[pjj[k]*ppdof+pshift] = -1;
2936             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2937           }
2938         }
2939       }
2940 
2941       /* sort sparserow */
2942       PetscCall(PetscSortInt(cnzi,sparserow));
2943 
2944       /* If free space is not available, make more free space */
2945       /* Double the amount of total space in the list */
2946       if (current_space->local_remaining<cnzi) {
2947         PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space));
2948       }
2949 
2950       /* Copy data into free space, and zero out denserows */
2951       PetscCall(PetscArraycpy(current_space->array,sparserow,cnzi));
2952 
2953       current_space->array           += cnzi;
2954       current_space->local_used      += cnzi;
2955       current_space->local_remaining -= cnzi;
2956 
2957       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2958       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2959 
2960       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2961       /*        For now, we will recompute what is needed. */
2962       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2963     }
2964   }
2965   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2966   /* Allocate space for cj, initialize cj, and */
2967   /* destroy list of free space and other temporary array(s) */
2968   PetscCall(PetscMalloc1(ci[cn]+1,&cj));
2969   PetscCall(PetscFreeSpaceContiguous(&free_space,cj));
2970   PetscCall(PetscFree4(ptadenserow,ptasparserow,denserow,sparserow));
2971 
2972   /* Allocate space for ca */
2973   PetscCall(PetscCalloc1(ci[cn]+1,&ca));
2974 
2975   /* put together the new matrix */
2976   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C));
2977   PetscCall(MatSetBlockSize(C,pp->dof));
2978 
2979   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2980   /* Since these are PETSc arrays, change flags to free them as necessary. */
2981   c          = (Mat_SeqAIJ*)(C->data);
2982   c->free_a  = PETSC_TRUE;
2983   c->free_ij = PETSC_TRUE;
2984   c->nonew   = 0;
2985 
2986   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2987   C->ops->productnumeric = MatProductNumeric_PtAP;
2988 
2989   /* Clean up. */
2990   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj));
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2995 {
2996   /* This routine requires testing -- first draft only */
2997   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
2998   Mat             P  =pp->AIJ;
2999   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3000   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3001   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3002   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3003   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3004   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3005   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3006   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3007   MatScalar       *ca=c->a,*caj,*apa;
3008 
3009   PetscFunctionBegin;
3010   /* Allocate temporary array for storage of one row of A*P */
3011   PetscCall(PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense));
3012 
3013   /* Clear old values in C */
3014   PetscCall(PetscArrayzero(ca,ci[cm]));
3015 
3016   for (i=0; i<am; i++) {
3017     /* Form sparse row of A*P */
3018     anzi  = ai[i+1] - ai[i];
3019     apnzj = 0;
3020     for (j=0; j<anzi; j++) {
3021       /* Get offset within block of P */
3022       pshift = *aj%ppdof;
3023       /* Get block row of P */
3024       prow = *aj++/ppdof;   /* integer division */
3025       pnzj = pi[prow+1] - pi[prow];
3026       pjj  = pj + pi[prow];
3027       paj  = pa + pi[prow];
3028       for (k=0; k<pnzj; k++) {
3029         poffset = pjj[k]*ppdof+pshift;
3030         if (!apjdense[poffset]) {
3031           apjdense[poffset] = -1;
3032           apj[apnzj++]      = poffset;
3033         }
3034         apa[poffset] += (*aa)*paj[k];
3035       }
3036       PetscCall(PetscLogFlops(2.0*pnzj));
3037       aa++;
3038     }
3039 
3040     /* Sort the j index array for quick sparse axpy. */
3041     /* Note: a array does not need sorting as it is in dense storage locations. */
3042     PetscCall(PetscSortInt(apnzj,apj));
3043 
3044     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3045     prow    = i/ppdof; /* integer division */
3046     pshift  = i%ppdof;
3047     poffset = pi[prow];
3048     pnzi    = pi[prow+1] - poffset;
3049     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3050     pJ = pj+poffset;
3051     pA = pa+poffset;
3052     for (j=0; j<pnzi; j++) {
3053       crow = (*pJ)*ppdof+pshift;
3054       cjj  = cj + ci[crow];
3055       caj  = ca + ci[crow];
3056       pJ++;
3057       /* Perform sparse axpy operation.  Note cjj includes apj. */
3058       for (k=0,nextap=0; nextap<apnzj; k++) {
3059         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3060       }
3061       PetscCall(PetscLogFlops(2.0*apnzj));
3062       pA++;
3063     }
3064 
3065     /* Zero the current row info for A*P */
3066     for (j=0; j<apnzj; j++) {
3067       apa[apj[j]]      = 0.;
3068       apjdense[apj[j]] = 0;
3069     }
3070   }
3071 
3072   /* Assemble the final matrix and clean up */
3073   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
3074   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
3075   PetscCall(PetscFree3(apa,apj,apjdense));
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3080 {
3081   Mat_Product         *product = C->product;
3082   Mat                 A=product->A,P=product->B;
3083 
3084   PetscFunctionBegin;
3085   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C));
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3090 {
3091   PetscFunctionBegin;
3092   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3093 }
3094 
3095 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3096 {
3097   PetscFunctionBegin;
3098   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3099 }
3100 
3101 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3102 
3103 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3104 {
3105   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3106 
3107   PetscFunctionBegin;
3108 
3109   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C));
3110   PetscFunctionReturn(0);
3111 }
3112 
3113 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3114 
3115 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3116 {
3117   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3118 
3119   PetscFunctionBegin;
3120   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C));
3121   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3122   PetscFunctionReturn(0);
3123 }
3124 
3125 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3126 
3127 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3128 {
3129   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3130 
3131   PetscFunctionBegin;
3132 
3133   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C));
3134   PetscFunctionReturn(0);
3135 }
3136 
3137 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3138 
3139 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3140 {
3141   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3142 
3143   PetscFunctionBegin;
3144 
3145   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C));
3146   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3147   PetscFunctionReturn(0);
3148 }
3149 
3150 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3151 {
3152   Mat_Product         *product = C->product;
3153   Mat                 A=product->A,P=product->B;
3154   PetscBool           flg;
3155 
3156   PetscFunctionBegin;
3157   PetscCall(PetscStrcmp(product->alg,"allatonce",&flg));
3158   if (flg) {
3159     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C));
3160     C->ops->productnumeric = MatProductNumeric_PtAP;
3161     PetscFunctionReturn(0);
3162   }
3163 
3164   PetscCall(PetscStrcmp(product->alg,"allatonce_merged",&flg));
3165   if (flg) {
3166     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C));
3167     C->ops->productnumeric = MatProductNumeric_PtAP;
3168     PetscFunctionReturn(0);
3169   }
3170 
3171   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3172 }
3173 
3174 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3175 {
3176   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3177   Mat            a    = b->AIJ,B;
3178   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3179   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3180   PetscInt       *cols;
3181   PetscScalar    *vals;
3182 
3183   PetscFunctionBegin;
3184   PetscCall(MatGetSize(a,&m,&n));
3185   PetscCall(PetscMalloc1(dof*m,&ilen));
3186   for (i=0; i<m; i++) {
3187     nmax = PetscMax(nmax,aij->ilen[i]);
3188     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3189   }
3190   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
3191   PetscCall(MatSetSizes(B,dof*m,dof*n,dof*m,dof*n));
3192   PetscCall(MatSetType(B,newtype));
3193   PetscCall(MatSeqAIJSetPreallocation(B,0,ilen));
3194   PetscCall(PetscFree(ilen));
3195   PetscCall(PetscMalloc1(nmax,&icols));
3196   ii   = 0;
3197   for (i=0; i<m; i++) {
3198     PetscCall(MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals));
3199     for (j=0; j<dof; j++) {
3200       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3201       PetscCall(MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
3202       ii++;
3203     }
3204     PetscCall(MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals));
3205   }
3206   PetscCall(PetscFree(icols));
3207   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3208   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3209 
3210   if (reuse == MAT_INPLACE_MATRIX) {
3211     PetscCall(MatHeaderReplace(A,&B));
3212   } else {
3213     *newmat = B;
3214   }
3215   PetscFunctionReturn(0);
3216 }
3217 
3218 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3219 
3220 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3221 {
3222   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3223   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3224   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3225   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3226   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3227   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3228   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3229   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3230   PetscInt       rstart,cstart,*garray,ii,k;
3231   PetscScalar    *vals,*ovals;
3232 
3233   PetscFunctionBegin;
3234   PetscCall(PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz));
3235   for (i=0; i<A->rmap->n/dof; i++) {
3236     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3237     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3238     for (j=0; j<dof; j++) {
3239       dnz[dof*i+j] = AIJ->ilen[i];
3240       onz[dof*i+j] = OAIJ->ilen[i];
3241     }
3242   }
3243   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3244   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3245   PetscCall(MatSetType(B,newtype));
3246   PetscCall(MatMPIAIJSetPreallocation(B,0,dnz,0,onz));
3247   PetscCall(MatSetBlockSize(B,dof));
3248   PetscCall(PetscFree2(dnz,onz));
3249 
3250   PetscCall(PetscMalloc2(nmax,&icols,onmax,&oicols));
3251   rstart = dof*maij->A->rmap->rstart;
3252   cstart = dof*maij->A->cmap->rstart;
3253   garray = mpiaij->garray;
3254 
3255   ii = rstart;
3256   for (i=0; i<A->rmap->n/dof; i++) {
3257     PetscCall(MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
3258     PetscCall(MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
3259     for (j=0; j<dof; j++) {
3260       for (k=0; k<ncols; k++) {
3261         icols[k] = cstart + dof*cols[k]+j;
3262       }
3263       for (k=0; k<oncols; k++) {
3264         oicols[k] = dof*garray[ocols[k]]+j;
3265       }
3266       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES));
3267       PetscCall(MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES));
3268       ii++;
3269     }
3270     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals));
3271     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals));
3272   }
3273   PetscCall(PetscFree2(icols,oicols));
3274 
3275   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3276   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3277 
3278   if (reuse == MAT_INPLACE_MATRIX) {
3279     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3280     ((PetscObject)A)->refct = 1;
3281 
3282     PetscCall(MatHeaderReplace(A,&B));
3283 
3284     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3285   } else {
3286     *newmat = B;
3287   }
3288   PetscFunctionReturn(0);
3289 }
3290 
3291 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3292 {
3293   Mat            A;
3294 
3295   PetscFunctionBegin;
3296   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
3297   PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
3298   PetscCall(MatDestroy(&A));
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3303 {
3304   Mat            A;
3305 
3306   PetscFunctionBegin;
3307   PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
3308   PetscCall(MatCreateSubMatrices(A,n,irow,icol,scall,submat));
3309   PetscCall(MatDestroy(&A));
3310   PetscFunctionReturn(0);
3311 }
3312 
3313 /* ---------------------------------------------------------------------------------- */
3314 /*@
3315   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3316   operations for multicomponent problems.  It interpolates each component the same
3317   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3318   and MATMPIAIJ for distributed matrices.
3319 
3320   Collective
3321 
3322   Input Parameters:
3323 + A - the AIJ matrix describing the action on blocks
3324 - dof - the block size (number of components per node)
3325 
3326   Output Parameter:
3327 . maij - the new MAIJ matrix
3328 
3329   Operations provided:
3330 .vb
3331     MatMult
3332     MatMultTranspose
3333     MatMultAdd
3334     MatMultTransposeAdd
3335     MatView
3336 .ve
3337 
3338   Level: advanced
3339 
3340 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3341 @*/
3342 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3343 {
3344   PetscMPIInt    size;
3345   PetscInt       n;
3346   Mat            B;
3347 #if defined(PETSC_HAVE_CUDA)
3348   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3349   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3350 #endif
3351 
3352   PetscFunctionBegin;
3353   dof  = PetscAbs(dof);
3354   PetscCall(PetscObjectReference((PetscObject)A));
3355 
3356   if (dof == 1) *maij = A;
3357   else {
3358     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3359     /* propagate vec type */
3360     PetscCall(MatSetVecType(B,A->defaultvectype));
3361     PetscCall(MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N));
3362     PetscCall(PetscLayoutSetBlockSize(B->rmap,dof));
3363     PetscCall(PetscLayoutSetBlockSize(B->cmap,dof));
3364     PetscCall(PetscLayoutSetUp(B->rmap));
3365     PetscCall(PetscLayoutSetUp(B->cmap));
3366 
3367     B->assembled = PETSC_TRUE;
3368 
3369     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3370     if (size == 1) {
3371       Mat_SeqMAIJ *b;
3372 
3373       PetscCall(MatSetType(B,MATSEQMAIJ));
3374 
3375       B->ops->setup   = NULL;
3376       B->ops->destroy = MatDestroy_SeqMAIJ;
3377       B->ops->view    = MatView_SeqMAIJ;
3378 
3379       b               = (Mat_SeqMAIJ*)B->data;
3380       b->dof          = dof;
3381       b->AIJ          = A;
3382 
3383       if (dof == 2) {
3384         B->ops->mult             = MatMult_SeqMAIJ_2;
3385         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3386         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3387         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3388       } else if (dof == 3) {
3389         B->ops->mult             = MatMult_SeqMAIJ_3;
3390         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3391         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3392         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3393       } else if (dof == 4) {
3394         B->ops->mult             = MatMult_SeqMAIJ_4;
3395         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3396         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3397         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3398       } else if (dof == 5) {
3399         B->ops->mult             = MatMult_SeqMAIJ_5;
3400         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3401         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3402         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3403       } else if (dof == 6) {
3404         B->ops->mult             = MatMult_SeqMAIJ_6;
3405         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3406         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3407         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3408       } else if (dof == 7) {
3409         B->ops->mult             = MatMult_SeqMAIJ_7;
3410         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3411         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3412         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3413       } else if (dof == 8) {
3414         B->ops->mult             = MatMult_SeqMAIJ_8;
3415         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3416         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3417         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3418       } else if (dof == 9) {
3419         B->ops->mult             = MatMult_SeqMAIJ_9;
3420         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3421         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3422         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3423       } else if (dof == 10) {
3424         B->ops->mult             = MatMult_SeqMAIJ_10;
3425         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3426         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3427         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3428       } else if (dof == 11) {
3429         B->ops->mult             = MatMult_SeqMAIJ_11;
3430         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3431         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3432         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3433       } else if (dof == 16) {
3434         B->ops->mult             = MatMult_SeqMAIJ_16;
3435         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3436         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3437         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3438       } else if (dof == 18) {
3439         B->ops->mult             = MatMult_SeqMAIJ_18;
3440         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3441         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3442         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3443       } else {
3444         B->ops->mult             = MatMult_SeqMAIJ_N;
3445         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3446         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3447         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3448       }
3449 #if defined(PETSC_HAVE_CUDA)
3450       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ));
3451 #endif
3452       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ));
3453       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
3454     } else {
3455       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3456       Mat_MPIMAIJ *b;
3457       IS          from,to;
3458       Vec         gvec;
3459 
3460       PetscCall(MatSetType(B,MATMPIMAIJ));
3461 
3462       B->ops->setup            = NULL;
3463       B->ops->destroy          = MatDestroy_MPIMAIJ;
3464       B->ops->view             = MatView_MPIMAIJ;
3465 
3466       b      = (Mat_MPIMAIJ*)B->data;
3467       b->dof = dof;
3468       b->A   = A;
3469 
3470       PetscCall(MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ));
3471       PetscCall(MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ));
3472 
3473       PetscCall(VecGetSize(mpiaij->lvec,&n));
3474       PetscCall(VecCreate(PETSC_COMM_SELF,&b->w));
3475       PetscCall(VecSetSizes(b->w,n*dof,n*dof));
3476       PetscCall(VecSetBlockSize(b->w,dof));
3477       PetscCall(VecSetType(b->w,VECSEQ));
3478 
3479       /* create two temporary Index sets for build scatter gather */
3480       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
3481       PetscCall(ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to));
3482 
3483       /* create temporary global vector to generate scatter context */
3484       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec));
3485 
3486       /* generate the scatter context */
3487       PetscCall(VecScatterCreate(gvec,from,b->w,to,&b->ctx));
3488 
3489       PetscCall(ISDestroy(&from));
3490       PetscCall(ISDestroy(&to));
3491       PetscCall(VecDestroy(&gvec));
3492 
3493       B->ops->mult             = MatMult_MPIMAIJ_dof;
3494       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3495       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3496       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3497 
3498 #if defined(PETSC_HAVE_CUDA)
3499       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ));
3500 #endif
3501       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ));
3502       PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
3503     }
3504     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3505     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3506     PetscCall(MatSetUp(B));
3507 #if defined(PETSC_HAVE_CUDA)
3508     /* temporary until we have CUDA implementation of MAIJ */
3509     {
3510       PetscBool flg;
3511       if (convert) {
3512         PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,""));
3513         if (flg) {
3514           PetscCall(MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B));
3515         }
3516       }
3517     }
3518 #endif
3519     *maij = B;
3520   }
3521   PetscFunctionReturn(0);
3522 }
3523