xref: /petsc/src/mat/impls/maij/maij.c (revision 01a79839fc82a7dabb7a87cd2a8bb532c6bfa88d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/
21 #include "../src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 /*@C
27    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28 
29    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30 
31    Input Parameter:
32 .  A - the MAIJ matrix
33 
34    Output Parameter:
35 .  B - the AIJ matrix
36 
37    Level: advanced
38 
39    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40 
41 .seealso: MatCreateMAIJ()
42 @*/
43 PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
44 {
45   PetscErrorCode ierr;
46   PetscBool      ismpimaij,isseqmaij;
47 
48   PetscFunctionBegin;
49   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
50   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
51   if (ismpimaij) {
52     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53 
54     *B = b->A;
55   } else if (isseqmaij) {
56     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57 
58     *B = b->AIJ;
59   } else {
60     *B = A;
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatMAIJRedimension"
67 /*@C
68    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69 
70    Logically Collective
71 
72    Input Parameter:
73 +  A - the MAIJ matrix
74 -  dof - the block size for the new matrix
75 
76    Output Parameter:
77 .  B - the new MAIJ matrix
78 
79    Level: advanced
80 
81 .seealso: MatCreateMAIJ()
82 @*/
83 PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
84 {
85   PetscErrorCode ierr;
86   Mat            Aij = PETSC_NULL;
87 
88   PetscFunctionBegin;
89   PetscValidLogicalCollectiveInt(A,dof,2);
90   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
91   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SeqMAIJ"
97 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
98 {
99   PetscErrorCode ierr;
100   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
101 
102   PetscFunctionBegin;
103   if (b->AIJ) {
104     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
105   }
106   ierr = PetscFree(b);CHKERRQ(ierr);
107   PetscFunctionReturn(0);
108 }
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "MatView_SeqMAIJ"
112 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
113 {
114   PetscErrorCode ierr;
115   Mat            B;
116 
117   PetscFunctionBegin;
118   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
119   ierr = MatView(B,viewer);CHKERRQ(ierr);
120   ierr = MatDestroy(B);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatView_MPIMAIJ"
126 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
127 {
128   PetscErrorCode ierr;
129   Mat            B;
130 
131   PetscFunctionBegin;
132   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
133   ierr = MatView(B,viewer);CHKERRQ(ierr);
134   ierr = MatDestroy(B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatDestroy_MPIMAIJ"
140 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
141 {
142   PetscErrorCode ierr;
143   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
144 
145   PetscFunctionBegin;
146   if (b->AIJ) {
147     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
148   }
149   if (b->OAIJ) {
150     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
151   }
152   if (b->A) {
153     ierr = MatDestroy(b->A);CHKERRQ(ierr);
154   }
155   if (b->ctx) {
156     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
157   }
158   if (b->w) {
159     ierr = VecDestroy(b->w);CHKERRQ(ierr);
160   }
161   ierr = PetscFree(b);CHKERRQ(ierr);
162   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 /*MC
167   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168   multicomponent problems, interpolating or restricting each component the same way independently.
169   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
170 
171   Operations provided:
172 . MatMult
173 . MatMultTranspose
174 . MatMultAdd
175 . MatMultTransposeAdd
176 
177   Level: advanced
178 
179 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180 M*/
181 
182 EXTERN_C_BEGIN
183 #undef __FUNCT__
184 #define __FUNCT__ "MatCreate_MAIJ"
185 PetscErrorCode  MatCreate_MAIJ(Mat A)
186 {
187   PetscErrorCode ierr;
188   Mat_MPIMAIJ    *b;
189   PetscMPIInt    size;
190 
191   PetscFunctionBegin;
192   ierr     = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr);
193   A->data  = (void*)b;
194   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
195 
196   b->AIJ  = 0;
197   b->dof  = 0;
198   b->OAIJ = 0;
199   b->ctx  = 0;
200   b->w    = 0;
201   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
202   if (size == 1){
203     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
204   } else {
205     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
206   }
207   PetscFunctionReturn(0);
208 }
209 EXTERN_C_END
210 
211 /* --------------------------------------------------------------------------------------*/
212 #undef __FUNCT__
213 #define __FUNCT__ "MatMult_SeqMAIJ_2"
214 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
215 {
216   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
217   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
218   const PetscScalar *x,*v;
219   PetscScalar       *y, sum1, sum2;
220   PetscErrorCode    ierr;
221   PetscInt          nonzerorow=0,n,i,jrow,j;
222   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
223 
224   PetscFunctionBegin;
225   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
226   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
227   idx  = a->j;
228   v    = a->a;
229   ii   = a->i;
230 
231   for (i=0; i<m; i++) {
232     jrow = ii[i];
233     n    = ii[i+1] - jrow;
234     sum1  = 0.0;
235     sum2  = 0.0;
236     nonzerorow += (n>0);
237     for (j=0; j<n; j++) {
238       sum1 += v[jrow]*x[2*idx[jrow]];
239       sum2 += v[jrow]*x[2*idx[jrow]+1];
240       jrow++;
241      }
242     y[2*i]   = sum1;
243     y[2*i+1] = sum2;
244   }
245 
246   ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
247   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
248   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
254 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
255 {
256   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
257   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
258   const PetscScalar *x,*v;
259   PetscScalar       *y,alpha1,alpha2;
260   PetscErrorCode    ierr;
261   PetscInt          n,i;
262   const PetscInt    m = b->AIJ->rmap->n,*idx;
263 
264   PetscFunctionBegin;
265   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
266   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
267   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
268 
269   for (i=0; i<m; i++) {
270     idx    = a->j + a->i[i] ;
271     v      = a->a + a->i[i] ;
272     n      = a->i[i+1] - a->i[i];
273     alpha1 = x[2*i];
274     alpha2 = x[2*i+1];
275     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
276   }
277   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
278   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
279   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
285 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
286 {
287   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
288   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
289   const PetscScalar *x,*v;
290   PetscScalar       *y,sum1, sum2;
291   PetscErrorCode    ierr;
292   PetscInt          n,i,jrow,j;
293   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
294 
295   PetscFunctionBegin;
296   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
297   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
298   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
299   idx  = a->j;
300   v    = a->a;
301   ii   = a->i;
302 
303   for (i=0; i<m; i++) {
304     jrow = ii[i];
305     n    = ii[i+1] - jrow;
306     sum1  = 0.0;
307     sum2  = 0.0;
308     for (j=0; j<n; j++) {
309       sum1 += v[jrow]*x[2*idx[jrow]];
310       sum2 += v[jrow]*x[2*idx[jrow]+1];
311       jrow++;
312      }
313     y[2*i]   += sum1;
314     y[2*i+1] += sum2;
315   }
316 
317   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
319   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 #undef __FUNCT__
323 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
324 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
325 {
326   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
327   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
328   const PetscScalar *x,*v;
329   PetscScalar       *y,alpha1,alpha2;
330   PetscErrorCode    ierr;
331   PetscInt          n,i;
332   const PetscInt    m = b->AIJ->rmap->n,*idx;
333 
334   PetscFunctionBegin;
335   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
336   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
337   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
338 
339   for (i=0; i<m; i++) {
340     idx   = a->j + a->i[i] ;
341     v     = a->a + a->i[i] ;
342     n     = a->i[i+1] - a->i[i];
343     alpha1 = x[2*i];
344     alpha2 = x[2*i+1];
345     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
346   }
347   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
348   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
349   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 /* --------------------------------------------------------------------------------------*/
353 #undef __FUNCT__
354 #define __FUNCT__ "MatMult_SeqMAIJ_3"
355 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
356 {
357   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
358   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
359   const PetscScalar *x,*v;
360   PetscScalar       *y,sum1, sum2, sum3;
361   PetscErrorCode    ierr;
362   PetscInt          nonzerorow=0,n,i,jrow,j;
363   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
364 
365   PetscFunctionBegin;
366   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
367   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
368   idx  = a->j;
369   v    = a->a;
370   ii   = a->i;
371 
372   for (i=0; i<m; i++) {
373     jrow = ii[i];
374     n    = ii[i+1] - jrow;
375     sum1  = 0.0;
376     sum2  = 0.0;
377     sum3  = 0.0;
378     nonzerorow += (n>0);
379     for (j=0; j<n; j++) {
380       sum1 += v[jrow]*x[3*idx[jrow]];
381       sum2 += v[jrow]*x[3*idx[jrow]+1];
382       sum3 += v[jrow]*x[3*idx[jrow]+2];
383       jrow++;
384      }
385     y[3*i]   = sum1;
386     y[3*i+1] = sum2;
387     y[3*i+2] = sum3;
388   }
389 
390   ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
391   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
392   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #undef __FUNCT__
397 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
398 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
399 {
400   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
401   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
402   const PetscScalar *x,*v;
403   PetscScalar       *y,alpha1,alpha2,alpha3;
404   PetscErrorCode    ierr;
405   PetscInt          n,i;
406   const PetscInt    m = b->AIJ->rmap->n,*idx;
407 
408   PetscFunctionBegin;
409   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
410   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
411   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
412 
413   for (i=0; i<m; i++) {
414     idx    = a->j + a->i[i];
415     v      = a->a + a->i[i];
416     n      = a->i[i+1] - a->i[i];
417     alpha1 = x[3*i];
418     alpha2 = x[3*i+1];
419     alpha3 = x[3*i+2];
420     while (n-->0) {
421       y[3*(*idx)]   += alpha1*(*v);
422       y[3*(*idx)+1] += alpha2*(*v);
423       y[3*(*idx)+2] += alpha3*(*v);
424       idx++; v++;
425     }
426   }
427   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
428   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
435 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436 {
437   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
438   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
439   const PetscScalar *x,*v;
440   PetscScalar       *y,sum1, sum2, sum3;
441   PetscErrorCode    ierr;
442   PetscInt          n,i,jrow,j;
443   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
444 
445   PetscFunctionBegin;
446   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
447   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
448   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
449   idx  = a->j;
450   v    = a->a;
451   ii   = a->i;
452 
453   for (i=0; i<m; i++) {
454     jrow = ii[i];
455     n    = ii[i+1] - jrow;
456     sum1  = 0.0;
457     sum2  = 0.0;
458     sum3  = 0.0;
459     for (j=0; j<n; j++) {
460       sum1 += v[jrow]*x[3*idx[jrow]];
461       sum2 += v[jrow]*x[3*idx[jrow]+1];
462       sum3 += v[jrow]*x[3*idx[jrow]+2];
463       jrow++;
464      }
465     y[3*i]   += sum1;
466     y[3*i+1] += sum2;
467     y[3*i+2] += sum3;
468   }
469 
470   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
471   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
472   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 #undef __FUNCT__
476 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
477 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
478 {
479   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
480   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
481   const PetscScalar *x,*v;
482   PetscScalar       *y,alpha1,alpha2,alpha3;
483   PetscErrorCode    ierr;
484   PetscInt          n,i;
485   const PetscInt    m = b->AIJ->rmap->n,*idx;
486 
487   PetscFunctionBegin;
488   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
489   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
490   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
491   for (i=0; i<m; i++) {
492     idx    = a->j + a->i[i] ;
493     v      = a->a + a->i[i] ;
494     n      = a->i[i+1] - a->i[i];
495     alpha1 = x[3*i];
496     alpha2 = x[3*i+1];
497     alpha3 = x[3*i+2];
498     while (n-->0) {
499       y[3*(*idx)]   += alpha1*(*v);
500       y[3*(*idx)+1] += alpha2*(*v);
501       y[3*(*idx)+2] += alpha3*(*v);
502       idx++; v++;
503     }
504   }
505   ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr);
506   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
507   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 /* ------------------------------------------------------------------------------*/
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMult_SeqMAIJ_4"
514 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515 {
516   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
517   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
518   const PetscScalar *x,*v;
519   PetscScalar       *y,sum1, sum2, sum3, sum4;
520   PetscErrorCode    ierr;
521   PetscInt          nonzerorow=0,n,i,jrow,j;
522   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
523 
524   PetscFunctionBegin;
525   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
526   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527   idx  = a->j;
528   v    = a->a;
529   ii   = a->i;
530 
531   for (i=0; i<m; i++) {
532     jrow = ii[i];
533     n    = ii[i+1] - jrow;
534     sum1  = 0.0;
535     sum2  = 0.0;
536     sum3  = 0.0;
537     sum4  = 0.0;
538     nonzerorow += (n>0);
539     for (j=0; j<n; j++) {
540       sum1 += v[jrow]*x[4*idx[jrow]];
541       sum2 += v[jrow]*x[4*idx[jrow]+1];
542       sum3 += v[jrow]*x[4*idx[jrow]+2];
543       sum4 += v[jrow]*x[4*idx[jrow]+3];
544       jrow++;
545      }
546     y[4*i]   = sum1;
547     y[4*i+1] = sum2;
548     y[4*i+2] = sum3;
549     y[4*i+3] = sum4;
550   }
551 
552   ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
553   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
554   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
560 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
561 {
562   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
563   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
564   const PetscScalar *x,*v;
565   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
566   PetscErrorCode    ierr;
567   PetscInt          n,i;
568   const PetscInt    m = b->AIJ->rmap->n,*idx;
569 
570   PetscFunctionBegin;
571   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
572   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
574   for (i=0; i<m; i++) {
575     idx    = a->j + a->i[i] ;
576     v      = a->a + a->i[i] ;
577     n      = a->i[i+1] - a->i[i];
578     alpha1 = x[4*i];
579     alpha2 = x[4*i+1];
580     alpha3 = x[4*i+2];
581     alpha4 = x[4*i+3];
582     while (n-->0) {
583       y[4*(*idx)]   += alpha1*(*v);
584       y[4*(*idx)+1] += alpha2*(*v);
585       y[4*(*idx)+2] += alpha3*(*v);
586       y[4*(*idx)+3] += alpha4*(*v);
587       idx++; v++;
588     }
589   }
590   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
591   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
592   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
598 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
599 {
600   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
601   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
602   const PetscScalar *x,*v;
603   PetscScalar       *y,sum1, sum2, sum3, sum4;
604   PetscErrorCode    ierr;
605   PetscInt          n,i,jrow,j;
606   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
607 
608   PetscFunctionBegin;
609   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
610   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
611   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
612   idx  = a->j;
613   v    = a->a;
614   ii   = a->i;
615 
616   for (i=0; i<m; i++) {
617     jrow = ii[i];
618     n    = ii[i+1] - jrow;
619     sum1  = 0.0;
620     sum2  = 0.0;
621     sum3  = 0.0;
622     sum4  = 0.0;
623     for (j=0; j<n; j++) {
624       sum1 += v[jrow]*x[4*idx[jrow]];
625       sum2 += v[jrow]*x[4*idx[jrow]+1];
626       sum3 += v[jrow]*x[4*idx[jrow]+2];
627       sum4 += v[jrow]*x[4*idx[jrow]+3];
628       jrow++;
629      }
630     y[4*i]   += sum1;
631     y[4*i+1] += sum2;
632     y[4*i+2] += sum3;
633     y[4*i+3] += sum4;
634   }
635 
636   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
637   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
638   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
639   PetscFunctionReturn(0);
640 }
641 #undef __FUNCT__
642 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
643 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
644 {
645   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
646   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
647   const PetscScalar *x,*v;
648   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
649   PetscErrorCode    ierr;
650   PetscInt          n,i;
651   const PetscInt    m = b->AIJ->rmap->n,*idx;
652 
653   PetscFunctionBegin;
654   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
655   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
656   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
657 
658   for (i=0; i<m; i++) {
659     idx    = a->j + a->i[i] ;
660     v      = a->a + a->i[i] ;
661     n      = a->i[i+1] - a->i[i];
662     alpha1 = x[4*i];
663     alpha2 = x[4*i+1];
664     alpha3 = x[4*i+2];
665     alpha4 = x[4*i+3];
666     while (n-->0) {
667       y[4*(*idx)]   += alpha1*(*v);
668       y[4*(*idx)+1] += alpha2*(*v);
669       y[4*(*idx)+2] += alpha3*(*v);
670       y[4*(*idx)+3] += alpha4*(*v);
671       idx++; v++;
672     }
673   }
674   ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr);
675   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
676   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 /* ------------------------------------------------------------------------------*/
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatMult_SeqMAIJ_5"
683 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
684 {
685   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
686   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
687   const PetscScalar *x,*v;
688   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
689   PetscErrorCode    ierr;
690   PetscInt          nonzerorow=0,n,i,jrow,j;
691   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
692 
693   PetscFunctionBegin;
694   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
695   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
696   idx  = a->j;
697   v    = a->a;
698   ii   = a->i;
699 
700   for (i=0; i<m; i++) {
701     jrow = ii[i];
702     n    = ii[i+1] - jrow;
703     sum1  = 0.0;
704     sum2  = 0.0;
705     sum3  = 0.0;
706     sum4  = 0.0;
707     sum5  = 0.0;
708     nonzerorow += (n>0);
709     for (j=0; j<n; j++) {
710       sum1 += v[jrow]*x[5*idx[jrow]];
711       sum2 += v[jrow]*x[5*idx[jrow]+1];
712       sum3 += v[jrow]*x[5*idx[jrow]+2];
713       sum4 += v[jrow]*x[5*idx[jrow]+3];
714       sum5 += v[jrow]*x[5*idx[jrow]+4];
715       jrow++;
716      }
717     y[5*i]   = sum1;
718     y[5*i+1] = sum2;
719     y[5*i+2] = sum3;
720     y[5*i+3] = sum4;
721     y[5*i+4] = sum5;
722   }
723 
724   ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
725   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
726   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
732 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
733 {
734   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
735   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
736   const PetscScalar *x,*v;
737   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
738   PetscErrorCode    ierr;
739   PetscInt          n,i;
740   const PetscInt    m = b->AIJ->rmap->n,*idx;
741 
742   PetscFunctionBegin;
743   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
744   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
745   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
746 
747   for (i=0; i<m; i++) {
748     idx    = a->j + a->i[i] ;
749     v      = a->a + a->i[i] ;
750     n      = a->i[i+1] - a->i[i];
751     alpha1 = x[5*i];
752     alpha2 = x[5*i+1];
753     alpha3 = x[5*i+2];
754     alpha4 = x[5*i+3];
755     alpha5 = x[5*i+4];
756     while (n-->0) {
757       y[5*(*idx)]   += alpha1*(*v);
758       y[5*(*idx)+1] += alpha2*(*v);
759       y[5*(*idx)+2] += alpha3*(*v);
760       y[5*(*idx)+3] += alpha4*(*v);
761       y[5*(*idx)+4] += alpha5*(*v);
762       idx++; v++;
763     }
764   }
765   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
766   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
767   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 #undef __FUNCT__
772 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
773 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
774 {
775   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
776   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
777   const PetscScalar *x,*v;
778   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
779   PetscErrorCode    ierr;
780   PetscInt          n,i,jrow,j;
781   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
782 
783   PetscFunctionBegin;
784   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
785   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
786   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
787   idx  = a->j;
788   v    = a->a;
789   ii   = a->i;
790 
791   for (i=0; i<m; i++) {
792     jrow = ii[i];
793     n    = ii[i+1] - jrow;
794     sum1  = 0.0;
795     sum2  = 0.0;
796     sum3  = 0.0;
797     sum4  = 0.0;
798     sum5  = 0.0;
799     for (j=0; j<n; j++) {
800       sum1 += v[jrow]*x[5*idx[jrow]];
801       sum2 += v[jrow]*x[5*idx[jrow]+1];
802       sum3 += v[jrow]*x[5*idx[jrow]+2];
803       sum4 += v[jrow]*x[5*idx[jrow]+3];
804       sum5 += v[jrow]*x[5*idx[jrow]+4];
805       jrow++;
806      }
807     y[5*i]   += sum1;
808     y[5*i+1] += sum2;
809     y[5*i+2] += sum3;
810     y[5*i+3] += sum4;
811     y[5*i+4] += sum5;
812   }
813 
814   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
815   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
816   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
822 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
823 {
824   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
825   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
826   const PetscScalar *x,*v;
827   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
828   PetscErrorCode    ierr;
829   PetscInt          n,i;
830   const PetscInt    m = b->AIJ->rmap->n,*idx;
831 
832   PetscFunctionBegin;
833   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
834   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
835   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
836 
837   for (i=0; i<m; i++) {
838     idx    = a->j + a->i[i] ;
839     v      = a->a + a->i[i] ;
840     n      = a->i[i+1] - a->i[i];
841     alpha1 = x[5*i];
842     alpha2 = x[5*i+1];
843     alpha3 = x[5*i+2];
844     alpha4 = x[5*i+3];
845     alpha5 = x[5*i+4];
846     while (n-->0) {
847       y[5*(*idx)]   += alpha1*(*v);
848       y[5*(*idx)+1] += alpha2*(*v);
849       y[5*(*idx)+2] += alpha3*(*v);
850       y[5*(*idx)+3] += alpha4*(*v);
851       y[5*(*idx)+4] += alpha5*(*v);
852       idx++; v++;
853     }
854   }
855   ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr);
856   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
857   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 /* ------------------------------------------------------------------------------*/
862 #undef __FUNCT__
863 #define __FUNCT__ "MatMult_SeqMAIJ_6"
864 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
865 {
866   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
867   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
868   const PetscScalar *x,*v;
869   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
870   PetscErrorCode    ierr;
871   PetscInt          nonzerorow=0,n,i,jrow,j;
872   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
873 
874   PetscFunctionBegin;
875   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
876   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
877   idx  = a->j;
878   v    = a->a;
879   ii   = a->i;
880 
881   for (i=0; i<m; i++) {
882     jrow = ii[i];
883     n    = ii[i+1] - jrow;
884     sum1  = 0.0;
885     sum2  = 0.0;
886     sum3  = 0.0;
887     sum4  = 0.0;
888     sum5  = 0.0;
889     sum6  = 0.0;
890     nonzerorow += (n>0);
891     for (j=0; j<n; j++) {
892       sum1 += v[jrow]*x[6*idx[jrow]];
893       sum2 += v[jrow]*x[6*idx[jrow]+1];
894       sum3 += v[jrow]*x[6*idx[jrow]+2];
895       sum4 += v[jrow]*x[6*idx[jrow]+3];
896       sum5 += v[jrow]*x[6*idx[jrow]+4];
897       sum6 += v[jrow]*x[6*idx[jrow]+5];
898       jrow++;
899      }
900     y[6*i]   = sum1;
901     y[6*i+1] = sum2;
902     y[6*i+2] = sum3;
903     y[6*i+3] = sum4;
904     y[6*i+4] = sum5;
905     y[6*i+5] = sum6;
906   }
907 
908   ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
909   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
910   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
916 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
917 {
918   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
919   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
920   const PetscScalar *x,*v;
921   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
922   PetscErrorCode    ierr;
923   PetscInt          n,i;
924   const PetscInt    m = b->AIJ->rmap->n,*idx;
925 
926   PetscFunctionBegin;
927   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
928   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
929   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
930 
931   for (i=0; i<m; i++) {
932     idx    = a->j + a->i[i] ;
933     v      = a->a + a->i[i] ;
934     n      = a->i[i+1] - a->i[i];
935     alpha1 = x[6*i];
936     alpha2 = x[6*i+1];
937     alpha3 = x[6*i+2];
938     alpha4 = x[6*i+3];
939     alpha5 = x[6*i+4];
940     alpha6 = x[6*i+5];
941     while (n-->0) {
942       y[6*(*idx)]   += alpha1*(*v);
943       y[6*(*idx)+1] += alpha2*(*v);
944       y[6*(*idx)+2] += alpha3*(*v);
945       y[6*(*idx)+3] += alpha4*(*v);
946       y[6*(*idx)+4] += alpha5*(*v);
947       y[6*(*idx)+5] += alpha6*(*v);
948       idx++; v++;
949     }
950   }
951   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
952   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
953   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 #undef __FUNCT__
958 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
959 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
962   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
963   const PetscScalar *x,*v;
964   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
965   PetscErrorCode    ierr;
966   PetscInt          n,i,jrow,j;
967   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
968 
969   PetscFunctionBegin;
970   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
971   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
972   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
973   idx  = a->j;
974   v    = a->a;
975   ii   = a->i;
976 
977   for (i=0; i<m; i++) {
978     jrow = ii[i];
979     n    = ii[i+1] - jrow;
980     sum1  = 0.0;
981     sum2  = 0.0;
982     sum3  = 0.0;
983     sum4  = 0.0;
984     sum5  = 0.0;
985     sum6  = 0.0;
986     for (j=0; j<n; j++) {
987       sum1 += v[jrow]*x[6*idx[jrow]];
988       sum2 += v[jrow]*x[6*idx[jrow]+1];
989       sum3 += v[jrow]*x[6*idx[jrow]+2];
990       sum4 += v[jrow]*x[6*idx[jrow]+3];
991       sum5 += v[jrow]*x[6*idx[jrow]+4];
992       sum6 += v[jrow]*x[6*idx[jrow]+5];
993       jrow++;
994      }
995     y[6*i]   += sum1;
996     y[6*i+1] += sum2;
997     y[6*i+2] += sum3;
998     y[6*i+3] += sum4;
999     y[6*i+4] += sum5;
1000     y[6*i+5] += sum6;
1001   }
1002 
1003   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1004   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1005   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
1011 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1012 {
1013   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1014   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015   const PetscScalar *x,*v;
1016   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1017   PetscErrorCode    ierr;
1018   PetscInt          n,i;
1019   const PetscInt    m = b->AIJ->rmap->n,*idx;
1020 
1021   PetscFunctionBegin;
1022   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1023   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1024   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1025 
1026   for (i=0; i<m; i++) {
1027     idx    = a->j + a->i[i] ;
1028     v      = a->a + a->i[i] ;
1029     n      = a->i[i+1] - a->i[i];
1030     alpha1 = x[6*i];
1031     alpha2 = x[6*i+1];
1032     alpha3 = x[6*i+2];
1033     alpha4 = x[6*i+3];
1034     alpha5 = x[6*i+4];
1035     alpha6 = x[6*i+5];
1036     while (n-->0) {
1037       y[6*(*idx)]   += alpha1*(*v);
1038       y[6*(*idx)+1] += alpha2*(*v);
1039       y[6*(*idx)+2] += alpha3*(*v);
1040       y[6*(*idx)+3] += alpha4*(*v);
1041       y[6*(*idx)+4] += alpha5*(*v);
1042       y[6*(*idx)+5] += alpha6*(*v);
1043       idx++; v++;
1044     }
1045   }
1046   ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr);
1047   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1048   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 /* ------------------------------------------------------------------------------*/
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatMult_SeqMAIJ_7"
1055 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1056 {
1057   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1058   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1059   const PetscScalar *x,*v;
1060   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1061   PetscErrorCode    ierr;
1062   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1063   PetscInt          nonzerorow=0,n,i,jrow,j;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1067   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1068   idx  = a->j;
1069   v    = a->a;
1070   ii   = a->i;
1071 
1072   for (i=0; i<m; i++) {
1073     jrow = ii[i];
1074     n    = ii[i+1] - jrow;
1075     sum1  = 0.0;
1076     sum2  = 0.0;
1077     sum3  = 0.0;
1078     sum4  = 0.0;
1079     sum5  = 0.0;
1080     sum6  = 0.0;
1081     sum7  = 0.0;
1082     nonzerorow += (n>0);
1083     for (j=0; j<n; j++) {
1084       sum1 += v[jrow]*x[7*idx[jrow]];
1085       sum2 += v[jrow]*x[7*idx[jrow]+1];
1086       sum3 += v[jrow]*x[7*idx[jrow]+2];
1087       sum4 += v[jrow]*x[7*idx[jrow]+3];
1088       sum5 += v[jrow]*x[7*idx[jrow]+4];
1089       sum6 += v[jrow]*x[7*idx[jrow]+5];
1090       sum7 += v[jrow]*x[7*idx[jrow]+6];
1091       jrow++;
1092      }
1093     y[7*i]   = sum1;
1094     y[7*i+1] = sum2;
1095     y[7*i+2] = sum3;
1096     y[7*i+3] = sum4;
1097     y[7*i+4] = sum5;
1098     y[7*i+5] = sum6;
1099     y[7*i+6] = sum7;
1100   }
1101 
1102   ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
1103   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1104   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1110 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1111 {
1112   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1113   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1114   const PetscScalar *x,*v;
1115   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1116   PetscErrorCode    ierr;
1117   const PetscInt    m = b->AIJ->rmap->n,*idx;
1118   PetscInt          n,i;
1119 
1120   PetscFunctionBegin;
1121   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1122   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1123   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1124 
1125   for (i=0; i<m; i++) {
1126     idx    = a->j + a->i[i] ;
1127     v      = a->a + a->i[i] ;
1128     n      = a->i[i+1] - a->i[i];
1129     alpha1 = x[7*i];
1130     alpha2 = x[7*i+1];
1131     alpha3 = x[7*i+2];
1132     alpha4 = x[7*i+3];
1133     alpha5 = x[7*i+4];
1134     alpha6 = x[7*i+5];
1135     alpha7 = x[7*i+6];
1136     while (n-->0) {
1137       y[7*(*idx)]   += alpha1*(*v);
1138       y[7*(*idx)+1] += alpha2*(*v);
1139       y[7*(*idx)+2] += alpha3*(*v);
1140       y[7*(*idx)+3] += alpha4*(*v);
1141       y[7*(*idx)+4] += alpha5*(*v);
1142       y[7*(*idx)+5] += alpha6*(*v);
1143       y[7*(*idx)+6] += alpha7*(*v);
1144       idx++; v++;
1145     }
1146   }
1147   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1148   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1149   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1155 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1156 {
1157   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1158   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1159   const PetscScalar *x,*v;
1160   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1161   PetscErrorCode    ierr;
1162   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1163   PetscInt          n,i,jrow,j;
1164 
1165   PetscFunctionBegin;
1166   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1167   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1168   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1169   idx  = a->j;
1170   v    = a->a;
1171   ii   = a->i;
1172 
1173   for (i=0; i<m; i++) {
1174     jrow = ii[i];
1175     n    = ii[i+1] - jrow;
1176     sum1  = 0.0;
1177     sum2  = 0.0;
1178     sum3  = 0.0;
1179     sum4  = 0.0;
1180     sum5  = 0.0;
1181     sum6  = 0.0;
1182     sum7  = 0.0;
1183     for (j=0; j<n; j++) {
1184       sum1 += v[jrow]*x[7*idx[jrow]];
1185       sum2 += v[jrow]*x[7*idx[jrow]+1];
1186       sum3 += v[jrow]*x[7*idx[jrow]+2];
1187       sum4 += v[jrow]*x[7*idx[jrow]+3];
1188       sum5 += v[jrow]*x[7*idx[jrow]+4];
1189       sum6 += v[jrow]*x[7*idx[jrow]+5];
1190       sum7 += v[jrow]*x[7*idx[jrow]+6];
1191       jrow++;
1192      }
1193     y[7*i]   += sum1;
1194     y[7*i+1] += sum2;
1195     y[7*i+2] += sum3;
1196     y[7*i+3] += sum4;
1197     y[7*i+4] += sum5;
1198     y[7*i+5] += sum6;
1199     y[7*i+6] += sum7;
1200   }
1201 
1202   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1203   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1204   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1210 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1211 {
1212   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1213   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1214   const PetscScalar *x,*v;
1215   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1216   PetscErrorCode    ierr;
1217   const PetscInt    m = b->AIJ->rmap->n,*idx;
1218   PetscInt          n,i;
1219 
1220   PetscFunctionBegin;
1221   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1222   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1223   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1224   for (i=0; i<m; i++) {
1225     idx    = a->j + a->i[i] ;
1226     v      = a->a + a->i[i] ;
1227     n      = a->i[i+1] - a->i[i];
1228     alpha1 = x[7*i];
1229     alpha2 = x[7*i+1];
1230     alpha3 = x[7*i+2];
1231     alpha4 = x[7*i+3];
1232     alpha5 = x[7*i+4];
1233     alpha6 = x[7*i+5];
1234     alpha7 = x[7*i+6];
1235     while (n-->0) {
1236       y[7*(*idx)]   += alpha1*(*v);
1237       y[7*(*idx)+1] += alpha2*(*v);
1238       y[7*(*idx)+2] += alpha3*(*v);
1239       y[7*(*idx)+3] += alpha4*(*v);
1240       y[7*(*idx)+4] += alpha5*(*v);
1241       y[7*(*idx)+5] += alpha6*(*v);
1242       y[7*(*idx)+6] += alpha7*(*v);
1243       idx++; v++;
1244     }
1245   }
1246   ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr);
1247   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1248   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1254 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1255 {
1256   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1257   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1258   const PetscScalar *x,*v;
1259   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1260   PetscErrorCode    ierr;
1261   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1262   PetscInt          nonzerorow=0,n,i,jrow,j;
1263 
1264   PetscFunctionBegin;
1265   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1266   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1267   idx  = a->j;
1268   v    = a->a;
1269   ii   = a->i;
1270 
1271   for (i=0; i<m; i++) {
1272     jrow = ii[i];
1273     n    = ii[i+1] - jrow;
1274     sum1  = 0.0;
1275     sum2  = 0.0;
1276     sum3  = 0.0;
1277     sum4  = 0.0;
1278     sum5  = 0.0;
1279     sum6  = 0.0;
1280     sum7  = 0.0;
1281     sum8  = 0.0;
1282     nonzerorow += (n>0);
1283     for (j=0; j<n; j++) {
1284       sum1 += v[jrow]*x[8*idx[jrow]];
1285       sum2 += v[jrow]*x[8*idx[jrow]+1];
1286       sum3 += v[jrow]*x[8*idx[jrow]+2];
1287       sum4 += v[jrow]*x[8*idx[jrow]+3];
1288       sum5 += v[jrow]*x[8*idx[jrow]+4];
1289       sum6 += v[jrow]*x[8*idx[jrow]+5];
1290       sum7 += v[jrow]*x[8*idx[jrow]+6];
1291       sum8 += v[jrow]*x[8*idx[jrow]+7];
1292       jrow++;
1293      }
1294     y[8*i]   = sum1;
1295     y[8*i+1] = sum2;
1296     y[8*i+2] = sum3;
1297     y[8*i+3] = sum4;
1298     y[8*i+4] = sum5;
1299     y[8*i+5] = sum6;
1300     y[8*i+6] = sum7;
1301     y[8*i+7] = sum8;
1302   }
1303 
1304   ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr);
1305   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1306   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1312 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1313 {
1314   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1315   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1316   const PetscScalar *x,*v;
1317   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1318   PetscErrorCode    ierr;
1319   const PetscInt    m = b->AIJ->rmap->n,*idx;
1320   PetscInt          n,i;
1321 
1322   PetscFunctionBegin;
1323   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1324   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1325   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1326 
1327   for (i=0; i<m; i++) {
1328     idx    = a->j + a->i[i] ;
1329     v      = a->a + a->i[i] ;
1330     n      = a->i[i+1] - a->i[i];
1331     alpha1 = x[8*i];
1332     alpha2 = x[8*i+1];
1333     alpha3 = x[8*i+2];
1334     alpha4 = x[8*i+3];
1335     alpha5 = x[8*i+4];
1336     alpha6 = x[8*i+5];
1337     alpha7 = x[8*i+6];
1338     alpha8 = x[8*i+7];
1339     while (n-->0) {
1340       y[8*(*idx)]   += alpha1*(*v);
1341       y[8*(*idx)+1] += alpha2*(*v);
1342       y[8*(*idx)+2] += alpha3*(*v);
1343       y[8*(*idx)+3] += alpha4*(*v);
1344       y[8*(*idx)+4] += alpha5*(*v);
1345       y[8*(*idx)+5] += alpha6*(*v);
1346       y[8*(*idx)+6] += alpha7*(*v);
1347       y[8*(*idx)+7] += alpha8*(*v);
1348       idx++; v++;
1349     }
1350   }
1351   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1352   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1353   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1359 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1360 {
1361   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1362   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1363   const PetscScalar *x,*v;
1364   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1365   PetscErrorCode    ierr;
1366   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1367   PetscInt          n,i,jrow,j;
1368 
1369   PetscFunctionBegin;
1370   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1371   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1372   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1373   idx  = a->j;
1374   v    = a->a;
1375   ii   = a->i;
1376 
1377   for (i=0; i<m; i++) {
1378     jrow = ii[i];
1379     n    = ii[i+1] - jrow;
1380     sum1  = 0.0;
1381     sum2  = 0.0;
1382     sum3  = 0.0;
1383     sum4  = 0.0;
1384     sum5  = 0.0;
1385     sum6  = 0.0;
1386     sum7  = 0.0;
1387     sum8  = 0.0;
1388     for (j=0; j<n; j++) {
1389       sum1 += v[jrow]*x[8*idx[jrow]];
1390       sum2 += v[jrow]*x[8*idx[jrow]+1];
1391       sum3 += v[jrow]*x[8*idx[jrow]+2];
1392       sum4 += v[jrow]*x[8*idx[jrow]+3];
1393       sum5 += v[jrow]*x[8*idx[jrow]+4];
1394       sum6 += v[jrow]*x[8*idx[jrow]+5];
1395       sum7 += v[jrow]*x[8*idx[jrow]+6];
1396       sum8 += v[jrow]*x[8*idx[jrow]+7];
1397       jrow++;
1398      }
1399     y[8*i]   += sum1;
1400     y[8*i+1] += sum2;
1401     y[8*i+2] += sum3;
1402     y[8*i+3] += sum4;
1403     y[8*i+4] += sum5;
1404     y[8*i+5] += sum6;
1405     y[8*i+6] += sum7;
1406     y[8*i+7] += sum8;
1407   }
1408 
1409   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1410   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1411   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1417 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1418 {
1419   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1420   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1421   const PetscScalar *x,*v;
1422   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1423   PetscErrorCode    ierr;
1424   const PetscInt    m = b->AIJ->rmap->n,*idx;
1425   PetscInt          n,i;
1426 
1427   PetscFunctionBegin;
1428   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1429   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1430   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1431   for (i=0; i<m; i++) {
1432     idx    = a->j + a->i[i] ;
1433     v      = a->a + a->i[i] ;
1434     n      = a->i[i+1] - a->i[i];
1435     alpha1 = x[8*i];
1436     alpha2 = x[8*i+1];
1437     alpha3 = x[8*i+2];
1438     alpha4 = x[8*i+3];
1439     alpha5 = x[8*i+4];
1440     alpha6 = x[8*i+5];
1441     alpha7 = x[8*i+6];
1442     alpha8 = x[8*i+7];
1443     while (n-->0) {
1444       y[8*(*idx)]   += alpha1*(*v);
1445       y[8*(*idx)+1] += alpha2*(*v);
1446       y[8*(*idx)+2] += alpha3*(*v);
1447       y[8*(*idx)+3] += alpha4*(*v);
1448       y[8*(*idx)+4] += alpha5*(*v);
1449       y[8*(*idx)+5] += alpha6*(*v);
1450       y[8*(*idx)+6] += alpha7*(*v);
1451       y[8*(*idx)+7] += alpha8*(*v);
1452       idx++; v++;
1453     }
1454   }
1455   ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr);
1456   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1457   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 /* ------------------------------------------------------------------------------*/
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1464 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1465 {
1466   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1467   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1468   const PetscScalar *x,*v;
1469   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1470   PetscErrorCode    ierr;
1471   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1472   PetscInt          nonzerorow=0,n,i,jrow,j;
1473 
1474   PetscFunctionBegin;
1475   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1476   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1477   idx  = a->j;
1478   v    = a->a;
1479   ii   = a->i;
1480 
1481   for (i=0; i<m; i++) {
1482     jrow = ii[i];
1483     n    = ii[i+1] - jrow;
1484     sum1  = 0.0;
1485     sum2  = 0.0;
1486     sum3  = 0.0;
1487     sum4  = 0.0;
1488     sum5  = 0.0;
1489     sum6  = 0.0;
1490     sum7  = 0.0;
1491     sum8  = 0.0;
1492     sum9  = 0.0;
1493     nonzerorow += (n>0);
1494     for (j=0; j<n; j++) {
1495       sum1 += v[jrow]*x[9*idx[jrow]];
1496       sum2 += v[jrow]*x[9*idx[jrow]+1];
1497       sum3 += v[jrow]*x[9*idx[jrow]+2];
1498       sum4 += v[jrow]*x[9*idx[jrow]+3];
1499       sum5 += v[jrow]*x[9*idx[jrow]+4];
1500       sum6 += v[jrow]*x[9*idx[jrow]+5];
1501       sum7 += v[jrow]*x[9*idx[jrow]+6];
1502       sum8 += v[jrow]*x[9*idx[jrow]+7];
1503       sum9 += v[jrow]*x[9*idx[jrow]+8];
1504       jrow++;
1505      }
1506     y[9*i]   = sum1;
1507     y[9*i+1] = sum2;
1508     y[9*i+2] = sum3;
1509     y[9*i+3] = sum4;
1510     y[9*i+4] = sum5;
1511     y[9*i+5] = sum6;
1512     y[9*i+6] = sum7;
1513     y[9*i+7] = sum8;
1514     y[9*i+8] = sum9;
1515   }
1516 
1517   ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr);
1518   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1519   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 /* ------------------------------------------------------------------------------*/
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1527 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1528 {
1529   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1530   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1531   const PetscScalar *x,*v;
1532   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1533   PetscErrorCode    ierr;
1534   const PetscInt    m = b->AIJ->rmap->n,*idx;
1535   PetscInt          n,i;
1536 
1537   PetscFunctionBegin;
1538   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1539   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1540   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1541 
1542   for (i=0; i<m; i++) {
1543     idx    = a->j + a->i[i] ;
1544     v      = a->a + a->i[i] ;
1545     n      = a->i[i+1] - a->i[i];
1546     alpha1 = x[9*i];
1547     alpha2 = x[9*i+1];
1548     alpha3 = x[9*i+2];
1549     alpha4 = x[9*i+3];
1550     alpha5 = x[9*i+4];
1551     alpha6 = x[9*i+5];
1552     alpha7 = x[9*i+6];
1553     alpha8 = x[9*i+7];
1554     alpha9 = x[9*i+8];
1555     while (n-->0) {
1556       y[9*(*idx)]   += alpha1*(*v);
1557       y[9*(*idx)+1] += alpha2*(*v);
1558       y[9*(*idx)+2] += alpha3*(*v);
1559       y[9*(*idx)+3] += alpha4*(*v);
1560       y[9*(*idx)+4] += alpha5*(*v);
1561       y[9*(*idx)+5] += alpha6*(*v);
1562       y[9*(*idx)+6] += alpha7*(*v);
1563       y[9*(*idx)+7] += alpha8*(*v);
1564       y[9*(*idx)+8] += alpha9*(*v);
1565       idx++; v++;
1566     }
1567   }
1568   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1569   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1570   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1576 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1577 {
1578   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1579   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1580   const PetscScalar *x,*v;
1581   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1582   PetscErrorCode    ierr;
1583   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1584   PetscInt          n,i,jrow,j;
1585 
1586   PetscFunctionBegin;
1587   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1588   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1589   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1590   idx  = a->j;
1591   v    = a->a;
1592   ii   = a->i;
1593 
1594   for (i=0; i<m; i++) {
1595     jrow = ii[i];
1596     n    = ii[i+1] - jrow;
1597     sum1  = 0.0;
1598     sum2  = 0.0;
1599     sum3  = 0.0;
1600     sum4  = 0.0;
1601     sum5  = 0.0;
1602     sum6  = 0.0;
1603     sum7  = 0.0;
1604     sum8  = 0.0;
1605     sum9  = 0.0;
1606     for (j=0; j<n; j++) {
1607       sum1 += v[jrow]*x[9*idx[jrow]];
1608       sum2 += v[jrow]*x[9*idx[jrow]+1];
1609       sum3 += v[jrow]*x[9*idx[jrow]+2];
1610       sum4 += v[jrow]*x[9*idx[jrow]+3];
1611       sum5 += v[jrow]*x[9*idx[jrow]+4];
1612       sum6 += v[jrow]*x[9*idx[jrow]+5];
1613       sum7 += v[jrow]*x[9*idx[jrow]+6];
1614       sum8 += v[jrow]*x[9*idx[jrow]+7];
1615       sum9 += v[jrow]*x[9*idx[jrow]+8];
1616       jrow++;
1617      }
1618     y[9*i]   += sum1;
1619     y[9*i+1] += sum2;
1620     y[9*i+2] += sum3;
1621     y[9*i+3] += sum4;
1622     y[9*i+4] += sum5;
1623     y[9*i+5] += sum6;
1624     y[9*i+6] += sum7;
1625     y[9*i+7] += sum8;
1626     y[9*i+8] += sum9;
1627   }
1628 
1629   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1630   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1631   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1637 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1638 {
1639   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1640   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1641   const PetscScalar *x,*v;
1642   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1643   PetscErrorCode    ierr;
1644   const PetscInt    m = b->AIJ->rmap->n,*idx;
1645   PetscInt          n,i;
1646 
1647   PetscFunctionBegin;
1648   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1649   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1650   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1651   for (i=0; i<m; i++) {
1652     idx    = a->j + a->i[i] ;
1653     v      = a->a + a->i[i] ;
1654     n      = a->i[i+1] - a->i[i];
1655     alpha1 = x[9*i];
1656     alpha2 = x[9*i+1];
1657     alpha3 = x[9*i+2];
1658     alpha4 = x[9*i+3];
1659     alpha5 = x[9*i+4];
1660     alpha6 = x[9*i+5];
1661     alpha7 = x[9*i+6];
1662     alpha8 = x[9*i+7];
1663     alpha9 = x[9*i+8];
1664     while (n-->0) {
1665       y[9*(*idx)]   += alpha1*(*v);
1666       y[9*(*idx)+1] += alpha2*(*v);
1667       y[9*(*idx)+2] += alpha3*(*v);
1668       y[9*(*idx)+3] += alpha4*(*v);
1669       y[9*(*idx)+4] += alpha5*(*v);
1670       y[9*(*idx)+5] += alpha6*(*v);
1671       y[9*(*idx)+6] += alpha7*(*v);
1672       y[9*(*idx)+7] += alpha8*(*v);
1673       y[9*(*idx)+8] += alpha9*(*v);
1674       idx++; v++;
1675     }
1676   }
1677   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1678   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1679   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1684 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1685 {
1686   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1687   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1688   const PetscScalar *x,*v;
1689   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690   PetscErrorCode    ierr;
1691   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1692   PetscInt          nonzerorow=0,n,i,jrow,j;
1693 
1694   PetscFunctionBegin;
1695   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1696   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1697   idx  = a->j;
1698   v    = a->a;
1699   ii   = a->i;
1700 
1701   for (i=0; i<m; i++) {
1702     jrow = ii[i];
1703     n    = ii[i+1] - jrow;
1704     sum1  = 0.0;
1705     sum2  = 0.0;
1706     sum3  = 0.0;
1707     sum4  = 0.0;
1708     sum5  = 0.0;
1709     sum6  = 0.0;
1710     sum7  = 0.0;
1711     sum8  = 0.0;
1712     sum9  = 0.0;
1713     sum10 = 0.0;
1714     nonzerorow += (n>0);
1715     for (j=0; j<n; j++) {
1716       sum1  += v[jrow]*x[10*idx[jrow]];
1717       sum2  += v[jrow]*x[10*idx[jrow]+1];
1718       sum3  += v[jrow]*x[10*idx[jrow]+2];
1719       sum4  += v[jrow]*x[10*idx[jrow]+3];
1720       sum5  += v[jrow]*x[10*idx[jrow]+4];
1721       sum6  += v[jrow]*x[10*idx[jrow]+5];
1722       sum7  += v[jrow]*x[10*idx[jrow]+6];
1723       sum8  += v[jrow]*x[10*idx[jrow]+7];
1724       sum9  += v[jrow]*x[10*idx[jrow]+8];
1725       sum10 += v[jrow]*x[10*idx[jrow]+9];
1726       jrow++;
1727      }
1728     y[10*i]   = sum1;
1729     y[10*i+1] = sum2;
1730     y[10*i+2] = sum3;
1731     y[10*i+3] = sum4;
1732     y[10*i+4] = sum5;
1733     y[10*i+5] = sum6;
1734     y[10*i+6] = sum7;
1735     y[10*i+7] = sum8;
1736     y[10*i+8] = sum9;
1737     y[10*i+9] = sum10;
1738   }
1739 
1740   ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr);
1741   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1742   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10"
1748 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1749 {
1750   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1751   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1752   const PetscScalar *x,*v;
1753   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1754   PetscErrorCode    ierr;
1755   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1756   PetscInt          n,i,jrow,j;
1757 
1758   PetscFunctionBegin;
1759   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1760   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1761   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1762   idx  = a->j;
1763   v    = a->a;
1764   ii   = a->i;
1765 
1766   for (i=0; i<m; i++) {
1767     jrow = ii[i];
1768     n    = ii[i+1] - jrow;
1769     sum1  = 0.0;
1770     sum2  = 0.0;
1771     sum3  = 0.0;
1772     sum4  = 0.0;
1773     sum5  = 0.0;
1774     sum6  = 0.0;
1775     sum7  = 0.0;
1776     sum8  = 0.0;
1777     sum9  = 0.0;
1778     sum10 = 0.0;
1779     for (j=0; j<n; j++) {
1780       sum1  += v[jrow]*x[10*idx[jrow]];
1781       sum2  += v[jrow]*x[10*idx[jrow]+1];
1782       sum3  += v[jrow]*x[10*idx[jrow]+2];
1783       sum4  += v[jrow]*x[10*idx[jrow]+3];
1784       sum5  += v[jrow]*x[10*idx[jrow]+4];
1785       sum6  += v[jrow]*x[10*idx[jrow]+5];
1786       sum7  += v[jrow]*x[10*idx[jrow]+6];
1787       sum8  += v[jrow]*x[10*idx[jrow]+7];
1788       sum9  += v[jrow]*x[10*idx[jrow]+8];
1789       sum10 += v[jrow]*x[10*idx[jrow]+9];
1790       jrow++;
1791      }
1792     y[10*i]   += sum1;
1793     y[10*i+1] += sum2;
1794     y[10*i+2] += sum3;
1795     y[10*i+3] += sum4;
1796     y[10*i+4] += sum5;
1797     y[10*i+5] += sum6;
1798     y[10*i+6] += sum7;
1799     y[10*i+7] += sum8;
1800     y[10*i+8] += sum9;
1801     y[10*i+9] += sum10;
1802   }
1803 
1804   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1805   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1806   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1812 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1813 {
1814   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1815   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1816   const PetscScalar *x,*v;
1817   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1818   PetscErrorCode    ierr;
1819   const PetscInt    m = b->AIJ->rmap->n,*idx;
1820   PetscInt          n,i;
1821 
1822   PetscFunctionBegin;
1823   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
1824   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1825   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1826 
1827   for (i=0; i<m; i++) {
1828     idx    = a->j + a->i[i] ;
1829     v      = a->a + a->i[i] ;
1830     n      = a->i[i+1] - a->i[i];
1831     alpha1 = x[10*i];
1832     alpha2 = x[10*i+1];
1833     alpha3 = x[10*i+2];
1834     alpha4 = x[10*i+3];
1835     alpha5 = x[10*i+4];
1836     alpha6 = x[10*i+5];
1837     alpha7 = x[10*i+6];
1838     alpha8 = x[10*i+7];
1839     alpha9 = x[10*i+8];
1840     alpha10 = x[10*i+9];
1841     while (n-->0) {
1842       y[10*(*idx)]   += alpha1*(*v);
1843       y[10*(*idx)+1] += alpha2*(*v);
1844       y[10*(*idx)+2] += alpha3*(*v);
1845       y[10*(*idx)+3] += alpha4*(*v);
1846       y[10*(*idx)+4] += alpha5*(*v);
1847       y[10*(*idx)+5] += alpha6*(*v);
1848       y[10*(*idx)+6] += alpha7*(*v);
1849       y[10*(*idx)+7] += alpha8*(*v);
1850       y[10*(*idx)+8] += alpha9*(*v);
1851       y[10*(*idx)+9] += alpha10*(*v);
1852       idx++; v++;
1853     }
1854   }
1855   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1856   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1857   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1863 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1864 {
1865   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1866   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1867   const PetscScalar *x,*v;
1868   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1869   PetscErrorCode    ierr;
1870   const PetscInt    m = b->AIJ->rmap->n,*idx;
1871   PetscInt          n,i;
1872 
1873   PetscFunctionBegin;
1874   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1875   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1876   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1877   for (i=0; i<m; i++) {
1878     idx    = a->j + a->i[i] ;
1879     v      = a->a + a->i[i] ;
1880     n      = a->i[i+1] - a->i[i];
1881     alpha1 = x[10*i];
1882     alpha2 = x[10*i+1];
1883     alpha3 = x[10*i+2];
1884     alpha4 = x[10*i+3];
1885     alpha5 = x[10*i+4];
1886     alpha6 = x[10*i+5];
1887     alpha7 = x[10*i+6];
1888     alpha8 = x[10*i+7];
1889     alpha9 = x[10*i+8];
1890     alpha10 = x[10*i+9];
1891     while (n-->0) {
1892       y[10*(*idx)]   += alpha1*(*v);
1893       y[10*(*idx)+1] += alpha2*(*v);
1894       y[10*(*idx)+2] += alpha3*(*v);
1895       y[10*(*idx)+3] += alpha4*(*v);
1896       y[10*(*idx)+4] += alpha5*(*v);
1897       y[10*(*idx)+5] += alpha6*(*v);
1898       y[10*(*idx)+6] += alpha7*(*v);
1899       y[10*(*idx)+7] += alpha8*(*v);
1900       y[10*(*idx)+8] += alpha9*(*v);
1901       y[10*(*idx)+9] += alpha10*(*v);
1902       idx++; v++;
1903     }
1904   }
1905   ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr);
1906   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1907   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 
1912 /*--------------------------------------------------------------------------------------------*/
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatMult_SeqMAIJ_11"
1915 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1916 {
1917   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1918   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1919   const PetscScalar *x,*v;
1920   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1921   PetscErrorCode    ierr;
1922   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1923   PetscInt          nonzerorow=0,n,i,jrow,j;
1924 
1925   PetscFunctionBegin;
1926   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1927   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1928   idx  = a->j;
1929   v    = a->a;
1930   ii   = a->i;
1931 
1932   for (i=0; i<m; i++) {
1933     jrow = ii[i];
1934     n    = ii[i+1] - jrow;
1935     sum1  = 0.0;
1936     sum2  = 0.0;
1937     sum3  = 0.0;
1938     sum4  = 0.0;
1939     sum5  = 0.0;
1940     sum6  = 0.0;
1941     sum7  = 0.0;
1942     sum8  = 0.0;
1943     sum9  = 0.0;
1944     sum10 = 0.0;
1945     sum11 = 0.0;
1946     nonzerorow += (n>0);
1947     for (j=0; j<n; j++) {
1948       sum1  += v[jrow]*x[11*idx[jrow]];
1949       sum2  += v[jrow]*x[11*idx[jrow]+1];
1950       sum3  += v[jrow]*x[11*idx[jrow]+2];
1951       sum4  += v[jrow]*x[11*idx[jrow]+3];
1952       sum5  += v[jrow]*x[11*idx[jrow]+4];
1953       sum6  += v[jrow]*x[11*idx[jrow]+5];
1954       sum7  += v[jrow]*x[11*idx[jrow]+6];
1955       sum8  += v[jrow]*x[11*idx[jrow]+7];
1956       sum9  += v[jrow]*x[11*idx[jrow]+8];
1957       sum10 += v[jrow]*x[11*idx[jrow]+9];
1958       sum11 += v[jrow]*x[11*idx[jrow]+10];
1959       jrow++;
1960      }
1961     y[11*i]   = sum1;
1962     y[11*i+1] = sum2;
1963     y[11*i+2] = sum3;
1964     y[11*i+3] = sum4;
1965     y[11*i+4] = sum5;
1966     y[11*i+5] = sum6;
1967     y[11*i+6] = sum7;
1968     y[11*i+7] = sum8;
1969     y[11*i+8] = sum9;
1970     y[11*i+9] = sum10;
1971     y[11*i+10] = sum11;
1972   }
1973 
1974   ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr);
1975   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1976   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11"
1982 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1983 {
1984   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1985   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1986   const PetscScalar *x,*v;
1987   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1988   PetscErrorCode    ierr;
1989   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1990   PetscInt          n,i,jrow,j;
1991 
1992   PetscFunctionBegin;
1993   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1994   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1995   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1996   idx  = a->j;
1997   v    = a->a;
1998   ii   = a->i;
1999 
2000   for (i=0; i<m; i++) {
2001     jrow = ii[i];
2002     n    = ii[i+1] - jrow;
2003     sum1  = 0.0;
2004     sum2  = 0.0;
2005     sum3  = 0.0;
2006     sum4  = 0.0;
2007     sum5  = 0.0;
2008     sum6  = 0.0;
2009     sum7  = 0.0;
2010     sum8  = 0.0;
2011     sum9  = 0.0;
2012     sum10 = 0.0;
2013     sum11 = 0.0;
2014     for (j=0; j<n; j++) {
2015       sum1  += v[jrow]*x[11*idx[jrow]];
2016       sum2  += v[jrow]*x[11*idx[jrow]+1];
2017       sum3  += v[jrow]*x[11*idx[jrow]+2];
2018       sum4  += v[jrow]*x[11*idx[jrow]+3];
2019       sum5  += v[jrow]*x[11*idx[jrow]+4];
2020       sum6  += v[jrow]*x[11*idx[jrow]+5];
2021       sum7  += v[jrow]*x[11*idx[jrow]+6];
2022       sum8  += v[jrow]*x[11*idx[jrow]+7];
2023       sum9  += v[jrow]*x[11*idx[jrow]+8];
2024       sum10 += v[jrow]*x[11*idx[jrow]+9];
2025       sum11 += v[jrow]*x[11*idx[jrow]+10];
2026       jrow++;
2027      }
2028     y[11*i]   += sum1;
2029     y[11*i+1] += sum2;
2030     y[11*i+2] += sum3;
2031     y[11*i+3] += sum4;
2032     y[11*i+4] += sum5;
2033     y[11*i+5] += sum6;
2034     y[11*i+6] += sum7;
2035     y[11*i+7] += sum8;
2036     y[11*i+8] += sum9;
2037     y[11*i+9] += sum10;
2038     y[11*i+10] += sum11;
2039   }
2040 
2041   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2042   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2043   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11"
2049 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2050 {
2051   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2052   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2053   const PetscScalar *x,*v;
2054   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2055   PetscErrorCode    ierr;
2056   const PetscInt    m = b->AIJ->rmap->n,*idx;
2057   PetscInt          n,i;
2058 
2059   PetscFunctionBegin;
2060   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2061   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2062   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2063 
2064   for (i=0; i<m; i++) {
2065     idx    = a->j + a->i[i] ;
2066     v      = a->a + a->i[i] ;
2067     n      = a->i[i+1] - a->i[i];
2068     alpha1 = x[11*i];
2069     alpha2 = x[11*i+1];
2070     alpha3 = x[11*i+2];
2071     alpha4 = x[11*i+3];
2072     alpha5 = x[11*i+4];
2073     alpha6 = x[11*i+5];
2074     alpha7 = x[11*i+6];
2075     alpha8 = x[11*i+7];
2076     alpha9 = x[11*i+8];
2077     alpha10 = x[11*i+9];
2078     alpha11 = x[11*i+10];
2079     while (n-->0) {
2080       y[11*(*idx)]   += alpha1*(*v);
2081       y[11*(*idx)+1] += alpha2*(*v);
2082       y[11*(*idx)+2] += alpha3*(*v);
2083       y[11*(*idx)+3] += alpha4*(*v);
2084       y[11*(*idx)+4] += alpha5*(*v);
2085       y[11*(*idx)+5] += alpha6*(*v);
2086       y[11*(*idx)+6] += alpha7*(*v);
2087       y[11*(*idx)+7] += alpha8*(*v);
2088       y[11*(*idx)+8] += alpha9*(*v);
2089       y[11*(*idx)+9] += alpha10*(*v);
2090       y[11*(*idx)+10] += alpha11*(*v);
2091       idx++; v++;
2092     }
2093   }
2094   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2095   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2096   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 #undef __FUNCT__
2101 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11"
2102 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2103 {
2104   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2105   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2106   const PetscScalar *x,*v;
2107   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2108   PetscErrorCode    ierr;
2109   const PetscInt    m = b->AIJ->rmap->n,*idx;
2110   PetscInt          n,i;
2111 
2112   PetscFunctionBegin;
2113   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2114   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2115   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2116   for (i=0; i<m; i++) {
2117     idx    = a->j + a->i[i] ;
2118     v      = a->a + a->i[i] ;
2119     n      = a->i[i+1] - a->i[i];
2120     alpha1 = x[11*i];
2121     alpha2 = x[11*i+1];
2122     alpha3 = x[11*i+2];
2123     alpha4 = x[11*i+3];
2124     alpha5 = x[11*i+4];
2125     alpha6 = x[11*i+5];
2126     alpha7 = x[11*i+6];
2127     alpha8 = x[11*i+7];
2128     alpha9 = x[11*i+8];
2129     alpha10 = x[11*i+9];
2130     alpha11 = x[11*i+10];
2131     while (n-->0) {
2132       y[11*(*idx)]   += alpha1*(*v);
2133       y[11*(*idx)+1] += alpha2*(*v);
2134       y[11*(*idx)+2] += alpha3*(*v);
2135       y[11*(*idx)+3] += alpha4*(*v);
2136       y[11*(*idx)+4] += alpha5*(*v);
2137       y[11*(*idx)+5] += alpha6*(*v);
2138       y[11*(*idx)+6] += alpha7*(*v);
2139       y[11*(*idx)+7] += alpha8*(*v);
2140       y[11*(*idx)+8] += alpha9*(*v);
2141       y[11*(*idx)+9] += alpha10*(*v);
2142       y[11*(*idx)+10] += alpha11*(*v);
2143       idx++; v++;
2144     }
2145   }
2146   ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr);
2147   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2148   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 
2153 /*--------------------------------------------------------------------------------------------*/
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatMult_SeqMAIJ_16"
2156 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2157 {
2158   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2159   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2160   const PetscScalar *x,*v;
2161   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2162   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2163   PetscErrorCode     ierr;
2164   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2165   PetscInt           nonzerorow=0,n,i,jrow,j;
2166 
2167   PetscFunctionBegin;
2168   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2169   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2170   idx  = a->j;
2171   v    = a->a;
2172   ii   = a->i;
2173 
2174   for (i=0; i<m; i++) {
2175     jrow = ii[i];
2176     n    = ii[i+1] - jrow;
2177     sum1  = 0.0;
2178     sum2  = 0.0;
2179     sum3  = 0.0;
2180     sum4  = 0.0;
2181     sum5  = 0.0;
2182     sum6  = 0.0;
2183     sum7  = 0.0;
2184     sum8  = 0.0;
2185     sum9  = 0.0;
2186     sum10 = 0.0;
2187     sum11 = 0.0;
2188     sum12 = 0.0;
2189     sum13 = 0.0;
2190     sum14 = 0.0;
2191     sum15 = 0.0;
2192     sum16 = 0.0;
2193     nonzerorow += (n>0);
2194     for (j=0; j<n; j++) {
2195       sum1  += v[jrow]*x[16*idx[jrow]];
2196       sum2  += v[jrow]*x[16*idx[jrow]+1];
2197       sum3  += v[jrow]*x[16*idx[jrow]+2];
2198       sum4  += v[jrow]*x[16*idx[jrow]+3];
2199       sum5  += v[jrow]*x[16*idx[jrow]+4];
2200       sum6  += v[jrow]*x[16*idx[jrow]+5];
2201       sum7  += v[jrow]*x[16*idx[jrow]+6];
2202       sum8  += v[jrow]*x[16*idx[jrow]+7];
2203       sum9  += v[jrow]*x[16*idx[jrow]+8];
2204       sum10 += v[jrow]*x[16*idx[jrow]+9];
2205       sum11 += v[jrow]*x[16*idx[jrow]+10];
2206       sum12 += v[jrow]*x[16*idx[jrow]+11];
2207       sum13 += v[jrow]*x[16*idx[jrow]+12];
2208       sum14 += v[jrow]*x[16*idx[jrow]+13];
2209       sum15 += v[jrow]*x[16*idx[jrow]+14];
2210       sum16 += v[jrow]*x[16*idx[jrow]+15];
2211       jrow++;
2212      }
2213     y[16*i]    = sum1;
2214     y[16*i+1]  = sum2;
2215     y[16*i+2]  = sum3;
2216     y[16*i+3]  = sum4;
2217     y[16*i+4]  = sum5;
2218     y[16*i+5]  = sum6;
2219     y[16*i+6]  = sum7;
2220     y[16*i+7]  = sum8;
2221     y[16*i+8]  = sum9;
2222     y[16*i+9]  = sum10;
2223     y[16*i+10] = sum11;
2224     y[16*i+11] = sum12;
2225     y[16*i+12] = sum13;
2226     y[16*i+13] = sum14;
2227     y[16*i+14] = sum15;
2228     y[16*i+15] = sum16;
2229   }
2230 
2231   ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr);
2232   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2233   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
2239 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2240 {
2241   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2242   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2243   const PetscScalar *x,*v;
2244   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2245   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2246   PetscErrorCode    ierr;
2247   const PetscInt    m = b->AIJ->rmap->n,*idx;
2248   PetscInt          n,i;
2249 
2250   PetscFunctionBegin;
2251   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2252   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2253   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2254 
2255   for (i=0; i<m; i++) {
2256     idx    = a->j + a->i[i] ;
2257     v      = a->a + a->i[i] ;
2258     n      = a->i[i+1] - a->i[i];
2259     alpha1  = x[16*i];
2260     alpha2  = x[16*i+1];
2261     alpha3  = x[16*i+2];
2262     alpha4  = x[16*i+3];
2263     alpha5  = x[16*i+4];
2264     alpha6  = x[16*i+5];
2265     alpha7  = x[16*i+6];
2266     alpha8  = x[16*i+7];
2267     alpha9  = x[16*i+8];
2268     alpha10 = x[16*i+9];
2269     alpha11 = x[16*i+10];
2270     alpha12 = x[16*i+11];
2271     alpha13 = x[16*i+12];
2272     alpha14 = x[16*i+13];
2273     alpha15 = x[16*i+14];
2274     alpha16 = x[16*i+15];
2275     while (n-->0) {
2276       y[16*(*idx)]    += alpha1*(*v);
2277       y[16*(*idx)+1]  += alpha2*(*v);
2278       y[16*(*idx)+2]  += alpha3*(*v);
2279       y[16*(*idx)+3]  += alpha4*(*v);
2280       y[16*(*idx)+4]  += alpha5*(*v);
2281       y[16*(*idx)+5]  += alpha6*(*v);
2282       y[16*(*idx)+6]  += alpha7*(*v);
2283       y[16*(*idx)+7]  += alpha8*(*v);
2284       y[16*(*idx)+8]  += alpha9*(*v);
2285       y[16*(*idx)+9]  += alpha10*(*v);
2286       y[16*(*idx)+10] += alpha11*(*v);
2287       y[16*(*idx)+11] += alpha12*(*v);
2288       y[16*(*idx)+12] += alpha13*(*v);
2289       y[16*(*idx)+13] += alpha14*(*v);
2290       y[16*(*idx)+14] += alpha15*(*v);
2291       y[16*(*idx)+15] += alpha16*(*v);
2292       idx++; v++;
2293     }
2294   }
2295   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2296   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2297   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
2303 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304 {
2305   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2306   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307   const PetscScalar *x,*v;
2308   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2309   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2310   PetscErrorCode    ierr;
2311   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2312   PetscInt          n,i,jrow,j;
2313 
2314   PetscFunctionBegin;
2315   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2316   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2317   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2318   idx  = a->j;
2319   v    = a->a;
2320   ii   = a->i;
2321 
2322   for (i=0; i<m; i++) {
2323     jrow = ii[i];
2324     n    = ii[i+1] - jrow;
2325     sum1  = 0.0;
2326     sum2  = 0.0;
2327     sum3  = 0.0;
2328     sum4  = 0.0;
2329     sum5  = 0.0;
2330     sum6  = 0.0;
2331     sum7  = 0.0;
2332     sum8  = 0.0;
2333     sum9  = 0.0;
2334     sum10 = 0.0;
2335     sum11 = 0.0;
2336     sum12 = 0.0;
2337     sum13 = 0.0;
2338     sum14 = 0.0;
2339     sum15 = 0.0;
2340     sum16 = 0.0;
2341     for (j=0; j<n; j++) {
2342       sum1  += v[jrow]*x[16*idx[jrow]];
2343       sum2  += v[jrow]*x[16*idx[jrow]+1];
2344       sum3  += v[jrow]*x[16*idx[jrow]+2];
2345       sum4  += v[jrow]*x[16*idx[jrow]+3];
2346       sum5  += v[jrow]*x[16*idx[jrow]+4];
2347       sum6  += v[jrow]*x[16*idx[jrow]+5];
2348       sum7  += v[jrow]*x[16*idx[jrow]+6];
2349       sum8  += v[jrow]*x[16*idx[jrow]+7];
2350       sum9  += v[jrow]*x[16*idx[jrow]+8];
2351       sum10 += v[jrow]*x[16*idx[jrow]+9];
2352       sum11 += v[jrow]*x[16*idx[jrow]+10];
2353       sum12 += v[jrow]*x[16*idx[jrow]+11];
2354       sum13 += v[jrow]*x[16*idx[jrow]+12];
2355       sum14 += v[jrow]*x[16*idx[jrow]+13];
2356       sum15 += v[jrow]*x[16*idx[jrow]+14];
2357       sum16 += v[jrow]*x[16*idx[jrow]+15];
2358       jrow++;
2359      }
2360     y[16*i]    += sum1;
2361     y[16*i+1]  += sum2;
2362     y[16*i+2]  += sum3;
2363     y[16*i+3]  += sum4;
2364     y[16*i+4]  += sum5;
2365     y[16*i+5]  += sum6;
2366     y[16*i+6]  += sum7;
2367     y[16*i+7]  += sum8;
2368     y[16*i+8]  += sum9;
2369     y[16*i+9]  += sum10;
2370     y[16*i+10] += sum11;
2371     y[16*i+11] += sum12;
2372     y[16*i+12] += sum13;
2373     y[16*i+13] += sum14;
2374     y[16*i+14] += sum15;
2375     y[16*i+15] += sum16;
2376   }
2377 
2378   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2379   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2386 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2387 {
2388   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2389   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2390   const PetscScalar *x,*v;
2391   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2392   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2393   PetscErrorCode    ierr;
2394   const PetscInt    m = b->AIJ->rmap->n,*idx;
2395   PetscInt          n,i;
2396 
2397   PetscFunctionBegin;
2398   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2399   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2400   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2401   for (i=0; i<m; i++) {
2402     idx    = a->j + a->i[i] ;
2403     v      = a->a + a->i[i] ;
2404     n      = a->i[i+1] - a->i[i];
2405     alpha1 = x[16*i];
2406     alpha2 = x[16*i+1];
2407     alpha3 = x[16*i+2];
2408     alpha4 = x[16*i+3];
2409     alpha5 = x[16*i+4];
2410     alpha6 = x[16*i+5];
2411     alpha7 = x[16*i+6];
2412     alpha8 = x[16*i+7];
2413     alpha9  = x[16*i+8];
2414     alpha10 = x[16*i+9];
2415     alpha11 = x[16*i+10];
2416     alpha12 = x[16*i+11];
2417     alpha13 = x[16*i+12];
2418     alpha14 = x[16*i+13];
2419     alpha15 = x[16*i+14];
2420     alpha16 = x[16*i+15];
2421     while (n-->0) {
2422       y[16*(*idx)]   += alpha1*(*v);
2423       y[16*(*idx)+1] += alpha2*(*v);
2424       y[16*(*idx)+2] += alpha3*(*v);
2425       y[16*(*idx)+3] += alpha4*(*v);
2426       y[16*(*idx)+4] += alpha5*(*v);
2427       y[16*(*idx)+5] += alpha6*(*v);
2428       y[16*(*idx)+6] += alpha7*(*v);
2429       y[16*(*idx)+7] += alpha8*(*v);
2430       y[16*(*idx)+8]  += alpha9*(*v);
2431       y[16*(*idx)+9]  += alpha10*(*v);
2432       y[16*(*idx)+10] += alpha11*(*v);
2433       y[16*(*idx)+11] += alpha12*(*v);
2434       y[16*(*idx)+12] += alpha13*(*v);
2435       y[16*(*idx)+13] += alpha14*(*v);
2436       y[16*(*idx)+14] += alpha15*(*v);
2437       y[16*(*idx)+15] += alpha16*(*v);
2438       idx++; v++;
2439     }
2440   }
2441   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
2442   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2443   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 /*--------------------------------------------------------------------------------------------*/
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatMult_SeqMAIJ_18"
2450 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451 {
2452   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2453   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2454   const PetscScalar *x,*v;
2455   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2456   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2457   PetscErrorCode    ierr;
2458   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2459   PetscInt          nonzerorow=0,n,i,jrow,j;
2460 
2461   PetscFunctionBegin;
2462   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2463   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2464   idx  = a->j;
2465   v    = a->a;
2466   ii   = a->i;
2467 
2468   for (i=0; i<m; i++) {
2469     jrow = ii[i];
2470     n    = ii[i+1] - jrow;
2471     sum1  = 0.0;
2472     sum2  = 0.0;
2473     sum3  = 0.0;
2474     sum4  = 0.0;
2475     sum5  = 0.0;
2476     sum6  = 0.0;
2477     sum7  = 0.0;
2478     sum8  = 0.0;
2479     sum9  = 0.0;
2480     sum10 = 0.0;
2481     sum11 = 0.0;
2482     sum12 = 0.0;
2483     sum13 = 0.0;
2484     sum14 = 0.0;
2485     sum15 = 0.0;
2486     sum16 = 0.0;
2487     sum17 = 0.0;
2488     sum18 = 0.0;
2489     nonzerorow += (n>0);
2490     for (j=0; j<n; j++) {
2491       sum1  += v[jrow]*x[18*idx[jrow]];
2492       sum2  += v[jrow]*x[18*idx[jrow]+1];
2493       sum3  += v[jrow]*x[18*idx[jrow]+2];
2494       sum4  += v[jrow]*x[18*idx[jrow]+3];
2495       sum5  += v[jrow]*x[18*idx[jrow]+4];
2496       sum6  += v[jrow]*x[18*idx[jrow]+5];
2497       sum7  += v[jrow]*x[18*idx[jrow]+6];
2498       sum8  += v[jrow]*x[18*idx[jrow]+7];
2499       sum9  += v[jrow]*x[18*idx[jrow]+8];
2500       sum10 += v[jrow]*x[18*idx[jrow]+9];
2501       sum11 += v[jrow]*x[18*idx[jrow]+10];
2502       sum12 += v[jrow]*x[18*idx[jrow]+11];
2503       sum13 += v[jrow]*x[18*idx[jrow]+12];
2504       sum14 += v[jrow]*x[18*idx[jrow]+13];
2505       sum15 += v[jrow]*x[18*idx[jrow]+14];
2506       sum16 += v[jrow]*x[18*idx[jrow]+15];
2507       sum17 += v[jrow]*x[18*idx[jrow]+16];
2508       sum18 += v[jrow]*x[18*idx[jrow]+17];
2509       jrow++;
2510      }
2511     y[18*i]    = sum1;
2512     y[18*i+1]  = sum2;
2513     y[18*i+2]  = sum3;
2514     y[18*i+3]  = sum4;
2515     y[18*i+4]  = sum5;
2516     y[18*i+5]  = sum6;
2517     y[18*i+6]  = sum7;
2518     y[18*i+7]  = sum8;
2519     y[18*i+8]  = sum9;
2520     y[18*i+9]  = sum10;
2521     y[18*i+10] = sum11;
2522     y[18*i+11] = sum12;
2523     y[18*i+12] = sum13;
2524     y[18*i+13] = sum14;
2525     y[18*i+14] = sum15;
2526     y[18*i+15] = sum16;
2527     y[18*i+16] = sum17;
2528     y[18*i+17] = sum18;
2529   }
2530 
2531   ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr);
2532   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2533   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18"
2539 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2540 {
2541   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2542   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2543   const PetscScalar *x,*v;
2544   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2545   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2546   PetscErrorCode    ierr;
2547   const PetscInt    m = b->AIJ->rmap->n,*idx;
2548   PetscInt          n,i;
2549 
2550   PetscFunctionBegin;
2551   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2552   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2553   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2554 
2555   for (i=0; i<m; i++) {
2556     idx    = a->j + a->i[i] ;
2557     v      = a->a + a->i[i] ;
2558     n      = a->i[i+1] - a->i[i];
2559     alpha1  = x[18*i];
2560     alpha2  = x[18*i+1];
2561     alpha3  = x[18*i+2];
2562     alpha4  = x[18*i+3];
2563     alpha5  = x[18*i+4];
2564     alpha6  = x[18*i+5];
2565     alpha7  = x[18*i+6];
2566     alpha8  = x[18*i+7];
2567     alpha9  = x[18*i+8];
2568     alpha10 = x[18*i+9];
2569     alpha11 = x[18*i+10];
2570     alpha12 = x[18*i+11];
2571     alpha13 = x[18*i+12];
2572     alpha14 = x[18*i+13];
2573     alpha15 = x[18*i+14];
2574     alpha16 = x[18*i+15];
2575     alpha17 = x[18*i+16];
2576     alpha18 = x[18*i+17];
2577     while (n-->0) {
2578       y[18*(*idx)]    += alpha1*(*v);
2579       y[18*(*idx)+1]  += alpha2*(*v);
2580       y[18*(*idx)+2]  += alpha3*(*v);
2581       y[18*(*idx)+3]  += alpha4*(*v);
2582       y[18*(*idx)+4]  += alpha5*(*v);
2583       y[18*(*idx)+5]  += alpha6*(*v);
2584       y[18*(*idx)+6]  += alpha7*(*v);
2585       y[18*(*idx)+7]  += alpha8*(*v);
2586       y[18*(*idx)+8]  += alpha9*(*v);
2587       y[18*(*idx)+9]  += alpha10*(*v);
2588       y[18*(*idx)+10] += alpha11*(*v);
2589       y[18*(*idx)+11] += alpha12*(*v);
2590       y[18*(*idx)+12] += alpha13*(*v);
2591       y[18*(*idx)+13] += alpha14*(*v);
2592       y[18*(*idx)+14] += alpha15*(*v);
2593       y[18*(*idx)+15] += alpha16*(*v);
2594       y[18*(*idx)+16] += alpha17*(*v);
2595       y[18*(*idx)+17] += alpha18*(*v);
2596       idx++; v++;
2597     }
2598   }
2599   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2600   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2601   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18"
2607 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608 {
2609   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2610   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2611   const PetscScalar *x,*v;
2612   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2613   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2614   PetscErrorCode    ierr;
2615   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2616   PetscInt          n,i,jrow,j;
2617 
2618   PetscFunctionBegin;
2619   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2620   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2621   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2622   idx  = a->j;
2623   v    = a->a;
2624   ii   = a->i;
2625 
2626   for (i=0; i<m; i++) {
2627     jrow = ii[i];
2628     n    = ii[i+1] - jrow;
2629     sum1  = 0.0;
2630     sum2  = 0.0;
2631     sum3  = 0.0;
2632     sum4  = 0.0;
2633     sum5  = 0.0;
2634     sum6  = 0.0;
2635     sum7  = 0.0;
2636     sum8  = 0.0;
2637     sum9  = 0.0;
2638     sum10 = 0.0;
2639     sum11 = 0.0;
2640     sum12 = 0.0;
2641     sum13 = 0.0;
2642     sum14 = 0.0;
2643     sum15 = 0.0;
2644     sum16 = 0.0;
2645     sum17 = 0.0;
2646     sum18 = 0.0;
2647     for (j=0; j<n; j++) {
2648       sum1  += v[jrow]*x[18*idx[jrow]];
2649       sum2  += v[jrow]*x[18*idx[jrow]+1];
2650       sum3  += v[jrow]*x[18*idx[jrow]+2];
2651       sum4  += v[jrow]*x[18*idx[jrow]+3];
2652       sum5  += v[jrow]*x[18*idx[jrow]+4];
2653       sum6  += v[jrow]*x[18*idx[jrow]+5];
2654       sum7  += v[jrow]*x[18*idx[jrow]+6];
2655       sum8  += v[jrow]*x[18*idx[jrow]+7];
2656       sum9  += v[jrow]*x[18*idx[jrow]+8];
2657       sum10 += v[jrow]*x[18*idx[jrow]+9];
2658       sum11 += v[jrow]*x[18*idx[jrow]+10];
2659       sum12 += v[jrow]*x[18*idx[jrow]+11];
2660       sum13 += v[jrow]*x[18*idx[jrow]+12];
2661       sum14 += v[jrow]*x[18*idx[jrow]+13];
2662       sum15 += v[jrow]*x[18*idx[jrow]+14];
2663       sum16 += v[jrow]*x[18*idx[jrow]+15];
2664       sum17 += v[jrow]*x[18*idx[jrow]+16];
2665       sum18 += v[jrow]*x[18*idx[jrow]+17];
2666       jrow++;
2667      }
2668     y[18*i]    += sum1;
2669     y[18*i+1]  += sum2;
2670     y[18*i+2]  += sum3;
2671     y[18*i+3]  += sum4;
2672     y[18*i+4]  += sum5;
2673     y[18*i+5]  += sum6;
2674     y[18*i+6]  += sum7;
2675     y[18*i+7]  += sum8;
2676     y[18*i+8]  += sum9;
2677     y[18*i+9]  += sum10;
2678     y[18*i+10] += sum11;
2679     y[18*i+11] += sum12;
2680     y[18*i+12] += sum13;
2681     y[18*i+13] += sum14;
2682     y[18*i+14] += sum15;
2683     y[18*i+15] += sum16;
2684     y[18*i+16] += sum17;
2685     y[18*i+17] += sum18;
2686   }
2687 
2688   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2689   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2690   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18"
2696 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2697 {
2698   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2699   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2700   const PetscScalar *x,*v;
2701   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2702   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2703   PetscErrorCode    ierr;
2704   const PetscInt    m = b->AIJ->rmap->n,*idx;
2705   PetscInt          n,i;
2706 
2707   PetscFunctionBegin;
2708   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2709   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2710   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2711   for (i=0; i<m; i++) {
2712     idx    = a->j + a->i[i] ;
2713     v      = a->a + a->i[i] ;
2714     n      = a->i[i+1] - a->i[i];
2715     alpha1 = x[18*i];
2716     alpha2 = x[18*i+1];
2717     alpha3 = x[18*i+2];
2718     alpha4 = x[18*i+3];
2719     alpha5 = x[18*i+4];
2720     alpha6 = x[18*i+5];
2721     alpha7 = x[18*i+6];
2722     alpha8 = x[18*i+7];
2723     alpha9  = x[18*i+8];
2724     alpha10 = x[18*i+9];
2725     alpha11 = x[18*i+10];
2726     alpha12 = x[18*i+11];
2727     alpha13 = x[18*i+12];
2728     alpha14 = x[18*i+13];
2729     alpha15 = x[18*i+14];
2730     alpha16 = x[18*i+15];
2731     alpha17 = x[18*i+16];
2732     alpha18 = x[18*i+17];
2733     while (n-->0) {
2734       y[18*(*idx)]   += alpha1*(*v);
2735       y[18*(*idx)+1] += alpha2*(*v);
2736       y[18*(*idx)+2] += alpha3*(*v);
2737       y[18*(*idx)+3] += alpha4*(*v);
2738       y[18*(*idx)+4] += alpha5*(*v);
2739       y[18*(*idx)+5] += alpha6*(*v);
2740       y[18*(*idx)+6] += alpha7*(*v);
2741       y[18*(*idx)+7] += alpha8*(*v);
2742       y[18*(*idx)+8]  += alpha9*(*v);
2743       y[18*(*idx)+9]  += alpha10*(*v);
2744       y[18*(*idx)+10] += alpha11*(*v);
2745       y[18*(*idx)+11] += alpha12*(*v);
2746       y[18*(*idx)+12] += alpha13*(*v);
2747       y[18*(*idx)+13] += alpha14*(*v);
2748       y[18*(*idx)+14] += alpha15*(*v);
2749       y[18*(*idx)+15] += alpha16*(*v);
2750       y[18*(*idx)+16] += alpha17*(*v);
2751       y[18*(*idx)+17] += alpha18*(*v);
2752       idx++; v++;
2753     }
2754   }
2755   ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr);
2756   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2757   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatMult_SeqMAIJ_N"
2763 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2764 {
2765   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2766   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2767   const PetscScalar *x,*v;
2768   PetscScalar       *y,*sums;
2769   PetscErrorCode    ierr;
2770   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2771   PetscInt          n,i,jrow,j,dof = b->dof,k;
2772 
2773   PetscFunctionBegin;
2774   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2775   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2776   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2777   idx  = a->j;
2778   v    = a->a;
2779   ii   = a->i;
2780 
2781   for (i=0; i<m; i++) {
2782     jrow = ii[i];
2783     n    = ii[i+1] - jrow;
2784     sums = y + dof*i;
2785     for (j=0; j<n; j++) {
2786       for (k=0; k<dof; k++) {
2787         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2788       }
2789       jrow++;
2790     }
2791   }
2792 
2793   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2794   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2795   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 #undef __FUNCT__
2800 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N"
2801 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2802 {
2803   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2804   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2805   const PetscScalar *x,*v;
2806   PetscScalar       *y,*sums;
2807   PetscErrorCode    ierr;
2808   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2809   PetscInt          n,i,jrow,j,dof = b->dof,k;
2810 
2811   PetscFunctionBegin;
2812   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2813   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2814   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2815   idx  = a->j;
2816   v    = a->a;
2817   ii   = a->i;
2818 
2819   for (i=0; i<m; i++) {
2820     jrow = ii[i];
2821     n    = ii[i+1] - jrow;
2822     sums = y + dof*i;
2823     for (j=0; j<n; j++) {
2824       for (k=0; k<dof; k++) {
2825         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2826       }
2827       jrow++;
2828     }
2829   }
2830 
2831   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2832   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2833   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N"
2839 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2840 {
2841   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2842   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2843   const PetscScalar *x,*v,*alpha;
2844   PetscScalar       *y;
2845   PetscErrorCode    ierr;
2846   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2847   PetscInt          n,i,k;
2848 
2849   PetscFunctionBegin;
2850   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2851   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
2852   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
2853   for (i=0; i<m; i++) {
2854     idx    = a->j + a->i[i] ;
2855     v      = a->a + a->i[i] ;
2856     n      = a->i[i+1] - a->i[i];
2857     alpha  = x + dof*i;
2858     while (n-->0) {
2859       for (k=0; k<dof; k++) {
2860         y[dof*(*idx)+k] += alpha[k]*(*v);
2861       }
2862       idx++; v++;
2863     }
2864   }
2865   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2866   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2867   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N"
2873 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2874 {
2875   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2876   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2877   const PetscScalar *x,*v,*alpha;
2878   PetscScalar       *y;
2879   PetscErrorCode    ierr;
2880   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2881   PetscInt          n,i,k;
2882 
2883   PetscFunctionBegin;
2884   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2885   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
2886   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2887   for (i=0; i<m; i++) {
2888     idx    = a->j + a->i[i] ;
2889     v      = a->a + a->i[i] ;
2890     n      = a->i[i+1] - a->i[i];
2891     alpha  = x + dof*i;
2892     while (n-->0) {
2893       for (k=0; k<dof; k++) {
2894         y[dof*(*idx)+k] += alpha[k]*(*v);
2895       }
2896       idx++; v++;
2897     }
2898   }
2899   ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr);
2900   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
2901   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 /*===================================================================================*/
2906 #undef __FUNCT__
2907 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2908 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2909 {
2910   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2911   PetscErrorCode ierr;
2912 
2913   PetscFunctionBegin;
2914   /* start the scatter */
2915   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2916   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2917   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2918   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2919   PetscFunctionReturn(0);
2920 }
2921 
2922 #undef __FUNCT__
2923 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2924 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2925 {
2926   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2927   PetscErrorCode ierr;
2928 
2929   PetscFunctionBegin;
2930   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2931   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2932   ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2933   ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 #undef __FUNCT__
2938 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2939 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2940 {
2941   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2942   PetscErrorCode ierr;
2943 
2944   PetscFunctionBegin;
2945   /* start the scatter */
2946   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2947   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2948   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2949   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2955 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2956 {
2957   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2958   PetscErrorCode ierr;
2959 
2960   PetscFunctionBegin;
2961   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2962   ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2963   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2964   ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 /* ----------------------------------------------------------------*/
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2971 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2972 {
2973   /* This routine requires testing -- but it's getting better. */
2974   PetscErrorCode     ierr;
2975   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2976   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2977   Mat                P=pp->AIJ;
2978   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2979   PetscInt           *pti,*ptj,*ptJ;
2980   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2981   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2982   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2983   MatScalar          *ca;
2984   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2985 
2986   PetscFunctionBegin;
2987   /* Start timer */
2988   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2989 
2990   /* Get ij structure of P^T */
2991   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2992 
2993   cn = pn*ppdof;
2994   /* Allocate ci array, arrays for fill computation and */
2995   /* free space for accumulating nonzero column info */
2996   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2997   ci[0] = 0;
2998 
2999   /* Work arrays for rows of P^T*A */
3000   ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr);
3001   ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr);
3002   ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr);
3003 
3004   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3005   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3006   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3007   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3008   current_space = free_space;
3009 
3010   /* Determine symbolic info for each row of C: */
3011   for (i=0;i<pn;i++) {
3012     ptnzi  = pti[i+1] - pti[i];
3013     ptJ    = ptj + pti[i];
3014     for (dof=0;dof<ppdof;dof++) {
3015       ptanzi = 0;
3016       /* Determine symbolic row of PtA: */
3017       for (j=0;j<ptnzi;j++) {
3018         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3019         arow = ptJ[j]*ppdof + dof;
3020         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3021         anzj = ai[arow+1] - ai[arow];
3022         ajj  = aj + ai[arow];
3023         for (k=0;k<anzj;k++) {
3024           if (!ptadenserow[ajj[k]]) {
3025             ptadenserow[ajj[k]]    = -1;
3026             ptasparserow[ptanzi++] = ajj[k];
3027           }
3028         }
3029       }
3030       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3031       ptaj = ptasparserow;
3032       cnzi   = 0;
3033       for (j=0;j<ptanzi;j++) {
3034         /* Get offset within block of P */
3035         pshift = *ptaj%ppdof;
3036         /* Get block row of P */
3037         prow = (*ptaj++)/ppdof; /* integer division */
3038         /* P has same number of nonzeros per row as the compressed form */
3039         pnzj = pi[prow+1] - pi[prow];
3040         pjj  = pj + pi[prow];
3041         for (k=0;k<pnzj;k++) {
3042           /* Locations in C are shifted by the offset within the block */
3043           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3044           if (!denserow[pjj[k]*ppdof+pshift]) {
3045             denserow[pjj[k]*ppdof+pshift] = -1;
3046             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3047           }
3048         }
3049       }
3050 
3051       /* sort sparserow */
3052       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
3053 
3054       /* If free space is not available, make more free space */
3055       /* Double the amount of total space in the list */
3056       if (current_space->local_remaining<cnzi) {
3057         ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
3058       }
3059 
3060       /* Copy data into free space, and zero out denserows */
3061       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
3062       current_space->array           += cnzi;
3063       current_space->local_used      += cnzi;
3064       current_space->local_remaining -= cnzi;
3065 
3066       for (j=0;j<ptanzi;j++) {
3067         ptadenserow[ptasparserow[j]] = 0;
3068       }
3069       for (j=0;j<cnzi;j++) {
3070         denserow[sparserow[j]] = 0;
3071       }
3072       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3073       /*        For now, we will recompute what is needed. */
3074       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3075     }
3076   }
3077   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3078   /* Allocate space for cj, initialize cj, and */
3079   /* destroy list of free space and other temporary array(s) */
3080   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3081   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
3082   ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr);
3083 
3084   /* Allocate space for ca */
3085   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
3086   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3087 
3088   /* put together the new matrix */
3089   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
3090 
3091   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3092   /* Since these are PETSc arrays, change flags to free them as necessary. */
3093   c          = (Mat_SeqAIJ *)((*C)->data);
3094   c->free_a  = PETSC_TRUE;
3095   c->free_ij = PETSC_TRUE;
3096   c->nonew   = 0;
3097 
3098   /* Clean up. */
3099   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3100 
3101   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 #undef __FUNCT__
3106 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
3107 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3108 {
3109   /* This routine requires testing -- first draft only */
3110   PetscErrorCode  ierr;
3111   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3112   Mat             P=pp->AIJ;
3113   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
3114   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
3115   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3116   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3117   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3118   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3119   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3120   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3121   MatScalar       *ca=c->a,*caj,*apa;
3122 
3123   PetscFunctionBegin;
3124   /* Allocate temporary array for storage of one row of A*P */
3125   ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr);
3126   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
3127   ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr);
3128   ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr);
3129 
3130   /* Clear old values in C */
3131   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
3132 
3133   for (i=0;i<am;i++) {
3134     /* Form sparse row of A*P */
3135     anzi  = ai[i+1] - ai[i];
3136     apnzj = 0;
3137     for (j=0;j<anzi;j++) {
3138       /* Get offset within block of P */
3139       pshift = *aj%ppdof;
3140       /* Get block row of P */
3141       prow   = *aj++/ppdof; /* integer division */
3142       pnzj = pi[prow+1] - pi[prow];
3143       pjj  = pj + pi[prow];
3144       paj  = pa + pi[prow];
3145       for (k=0;k<pnzj;k++) {
3146         poffset = pjj[k]*ppdof+pshift;
3147         if (!apjdense[poffset]) {
3148           apjdense[poffset] = -1;
3149           apj[apnzj++]      = poffset;
3150         }
3151         apa[poffset] += (*aa)*paj[k];
3152       }
3153       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
3154       aa++;
3155     }
3156 
3157     /* Sort the j index array for quick sparse axpy. */
3158     /* Note: a array does not need sorting as it is in dense storage locations. */
3159     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
3160 
3161     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3162     prow    = i/ppdof; /* integer division */
3163     pshift  = i%ppdof;
3164     poffset = pi[prow];
3165     pnzi = pi[prow+1] - poffset;
3166     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3167     pJ   = pj+poffset;
3168     pA   = pa+poffset;
3169     for (j=0;j<pnzi;j++) {
3170       crow   = (*pJ)*ppdof+pshift;
3171       cjj    = cj + ci[crow];
3172       caj    = ca + ci[crow];
3173       pJ++;
3174       /* Perform sparse axpy operation.  Note cjj includes apj. */
3175       for (k=0,nextap=0;nextap<apnzj;k++) {
3176         if (cjj[k]==apj[nextap]) {
3177           caj[k] += (*pA)*apa[apj[nextap++]];
3178         }
3179       }
3180       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
3181       pA++;
3182     }
3183 
3184     /* Zero the current row info for A*P */
3185     for (j=0;j<apnzj;j++) {
3186       apa[apj[j]]      = 0.;
3187       apjdense[apj[j]] = 0;
3188     }
3189   }
3190 
3191   /* Assemble the final matrix and clean up */
3192   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3193   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3194   ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr);
3195   PetscFunctionReturn(0);
3196 }
3197 
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ"
3200 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3201 {
3202   PetscErrorCode    ierr;
3203 
3204   PetscFunctionBegin;
3205   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3206   ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr);
3207   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr);
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ"
3213 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3214 {
3215   PetscFunctionBegin;
3216   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 EXTERN_C_BEGIN
3221 #undef __FUNCT__
3222 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
3223 PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3224 {
3225   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3226   Mat               a = b->AIJ,B;
3227   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
3228   PetscErrorCode    ierr;
3229   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3230   PetscInt          *cols;
3231   PetscScalar       *vals;
3232 
3233   PetscFunctionBegin;
3234   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
3235   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
3236   for (i=0; i<m; i++) {
3237     nmax = PetscMax(nmax,aij->ilen[i]);
3238     for (j=0; j<dof; j++) {
3239       ilen[dof*i+j] = aij->ilen[i];
3240     }
3241   }
3242   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
3243   ierr = PetscFree(ilen);CHKERRQ(ierr);
3244   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
3245   ii   = 0;
3246   for (i=0; i<m; i++) {
3247     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3248     for (j=0; j<dof; j++) {
3249       for (k=0; k<ncols; k++) {
3250         icols[k] = dof*cols[k]+j;
3251       }
3252       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3253       ii++;
3254     }
3255     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3256   }
3257   ierr = PetscFree(icols);CHKERRQ(ierr);
3258   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3259   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3260 
3261   if (reuse == MAT_REUSE_MATRIX) {
3262     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3263   } else {
3264     *newmat = B;
3265   }
3266   PetscFunctionReturn(0);
3267 }
3268 EXTERN_C_END
3269 
3270 #include "../src/mat/impls/aij/mpi/mpiaij.h"
3271 
3272 EXTERN_C_BEGIN
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
3275 PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3276 {
3277   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3278   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3279   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3280   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3281   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3282   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3283   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3284   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3285   PetscInt          rstart,cstart,*garray,ii,k;
3286   PetscErrorCode    ierr;
3287   PetscScalar       *vals,*ovals;
3288 
3289   PetscFunctionBegin;
3290   ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr);
3291   for (i=0; i<A->rmap->n/dof; i++) {
3292     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3293     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3294     for (j=0; j<dof; j++) {
3295       dnz[dof*i+j] = AIJ->ilen[i];
3296       onz[dof*i+j] = OAIJ->ilen[i];
3297     }
3298   }
3299   ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr);
3300   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
3301 
3302   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
3303   rstart = dof*maij->A->rmap->rstart;
3304   cstart = dof*maij->A->cmap->rstart;
3305   garray = mpiaij->garray;
3306 
3307   ii = rstart;
3308   for (i=0; i<A->rmap->n/dof; i++) {
3309     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3310     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3311     for (j=0; j<dof; j++) {
3312       for (k=0; k<ncols; k++) {
3313         icols[k] = cstart + dof*cols[k]+j;
3314       }
3315       for (k=0; k<oncols; k++) {
3316         oicols[k] = dof*garray[ocols[k]]+j;
3317       }
3318       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
3319       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
3320       ii++;
3321     }
3322     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
3323     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
3324   }
3325   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
3326 
3327   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329 
3330   if (reuse == MAT_REUSE_MATRIX) {
3331     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3332     ((PetscObject)A)->refct = 1;
3333     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3334     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3335   } else {
3336     *newmat = B;
3337   }
3338   PetscFunctionReturn(0);
3339 }
3340 EXTERN_C_END
3341 
3342 
3343 /* ---------------------------------------------------------------------------------- */
3344 #undef __FUNCT__
3345 #define __FUNCT__ "MatCreateMAIJ"
3346 /*@C
3347   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3348   operations for multicomponent problems.  It interpolates each component the same
3349   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3350   and MATMPIAIJ for distributed matrices.
3351 
3352   Collective
3353 
3354   Input Parameters:
3355 + A - the AIJ matrix describing the action on blocks
3356 - dof - the block size (number of components per node)
3357 
3358   Output Parameter:
3359 . maij - the new MAIJ matrix
3360 
3361   Operations provided:
3362 + MatMult
3363 . MatMultTranspose
3364 . MatMultAdd
3365 . MatMultTransposeAdd
3366 - MatView
3367 
3368   Level: advanced
3369 
3370 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3371 @*/
3372 PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3373 {
3374   PetscErrorCode ierr;
3375   PetscMPIInt    size;
3376   PetscInt       n;
3377   Mat            B;
3378 
3379   PetscFunctionBegin;
3380   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3381 
3382   if (dof == 1) {
3383     *maij = A;
3384   } else {
3385     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3386     ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr);
3387     ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr);
3388     ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr);
3389     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3390     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3391     B->assembled    = PETSC_TRUE;
3392 
3393     ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3394     if (size == 1) {
3395       Mat_SeqMAIJ    *b;
3396 
3397       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
3398       B->ops->destroy = MatDestroy_SeqMAIJ;
3399       B->ops->view    = MatView_SeqMAIJ;
3400       b      = (Mat_SeqMAIJ*)B->data;
3401       b->dof = dof;
3402       b->AIJ = A;
3403       if (dof == 2) {
3404         B->ops->mult             = MatMult_SeqMAIJ_2;
3405         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3406         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3407         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3408       } else if (dof == 3) {
3409         B->ops->mult             = MatMult_SeqMAIJ_3;
3410         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3411         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3412         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3413       } else if (dof == 4) {
3414         B->ops->mult             = MatMult_SeqMAIJ_4;
3415         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3416         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3417         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3418       } else if (dof == 5) {
3419         B->ops->mult             = MatMult_SeqMAIJ_5;
3420         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3421         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3422         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3423       } else if (dof == 6) {
3424         B->ops->mult             = MatMult_SeqMAIJ_6;
3425         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3426         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3427         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3428       } else if (dof == 7) {
3429         B->ops->mult             = MatMult_SeqMAIJ_7;
3430         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3431         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3432         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3433       } else if (dof == 8) {
3434         B->ops->mult             = MatMult_SeqMAIJ_8;
3435         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3436         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3437         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3438       } else if (dof == 9) {
3439         B->ops->mult             = MatMult_SeqMAIJ_9;
3440         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3441         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3442         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3443       } else if (dof == 10) {
3444         B->ops->mult             = MatMult_SeqMAIJ_10;
3445         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3446         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3447         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3448       } else if (dof == 11) {
3449         B->ops->mult             = MatMult_SeqMAIJ_11;
3450         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3451         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3452         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3453       } else if (dof == 16) {
3454         B->ops->mult             = MatMult_SeqMAIJ_16;
3455         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3456         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3457         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3458       } else if (dof == 18) {
3459         B->ops->mult             = MatMult_SeqMAIJ_18;
3460         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3461         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3462         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3463       } else {
3464         B->ops->mult             = MatMult_SeqMAIJ_N;
3465         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3466         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3467         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3468       }
3469       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3470       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3471       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
3472     } else {
3473       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3474       Mat_MPIMAIJ *b;
3475       IS          from,to;
3476       Vec         gvec;
3477 
3478       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
3479       B->ops->destroy = MatDestroy_MPIMAIJ;
3480       B->ops->view    = MatView_MPIMAIJ;
3481       b      = (Mat_MPIMAIJ*)B->data;
3482       b->dof = dof;
3483       b->A   = A;
3484       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
3485       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
3486 
3487       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
3488       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
3489       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
3490 
3491       /* create two temporary Index sets for build scatter gather */
3492       ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
3493       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
3494 
3495       /* create temporary global vector to generate scatter context */
3496       ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
3497       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
3498 
3499       /* generate the scatter context */
3500       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
3501 
3502       ierr = ISDestroy(from);CHKERRQ(ierr);
3503       ierr = ISDestroy(to);CHKERRQ(ierr);
3504       ierr = VecDestroy(gvec);CHKERRQ(ierr);
3505 
3506       B->ops->mult             = MatMult_MPIMAIJ_dof;
3507       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3508       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3509       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3510       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3511       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3512       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
3513     }
3514     *maij = B;
3515     ierr = MatView_Private(B);CHKERRQ(ierr);
3516   }
3517   PetscFunctionReturn(0);
3518 }
3519