xref: /petsc/src/mat/impls/maij/maij.c (revision 593c1905dad567ae273cd337623ecaef31b3e817)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "src/mat/impls/maij/maij.h"
21 #include "src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27 {
28   PetscErrorCode ierr;
29   PetscTruth     ismpimaij,isseqmaij;
30 
31   PetscFunctionBegin;
32   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34   if (ismpimaij) {
35     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36 
37     *B = b->A;
38   } else if (isseqmaij) {
39     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40 
41     *B = b->AIJ;
42   } else {
43     *B = A;
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMAIJRedimension"
50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51 {
52   PetscErrorCode ierr;
53   Mat            Aij;
54 
55   PetscFunctionBegin;
56   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqMAIJ"
63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
67 
68   PetscFunctionBegin;
69   if (b->AIJ) {
70     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
71   }
72   ierr = PetscFree(b);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatView_SeqMAIJ"
78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79 {
80   PetscErrorCode ierr;
81   Mat            B;
82 
83   PetscFunctionBegin;
84   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85   ierr = MatView(B,viewer);CHKERRQ(ierr);
86   ierr = MatDestroy(B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatView_MPIMAIJ"
92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93 {
94   PetscErrorCode ierr;
95   Mat            B;
96 
97   PetscFunctionBegin;
98   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99   ierr = MatView(B,viewer);CHKERRQ(ierr);
100   ierr = MatDestroy(B);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatDestroy_MPIMAIJ"
106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107 {
108   PetscErrorCode ierr;
109   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
110 
111   PetscFunctionBegin;
112   if (b->AIJ) {
113     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
114   }
115   if (b->OAIJ) {
116     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117   }
118   if (b->A) {
119     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120   }
121   if (b->ctx) {
122     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123   }
124   if (b->w) {
125     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126   }
127   ierr = PetscFree(b);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /*MC
132   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
133   multicomponent problems, interpolating or restricting each component the same way independently.
134   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
135 
136   Operations provided:
137 . MatMult
138 . MatMultTranspose
139 . MatMultAdd
140 . MatMultTransposeAdd
141 
142   Level: advanced
143 
144 .seealso: MatCreateSeqDense
145 M*/
146 
147 EXTERN_C_BEGIN
148 #undef __FUNCT__
149 #define __FUNCT__ "MatCreate_MAIJ"
150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIMAIJ    *b;
154 
155   PetscFunctionBegin;
156   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
157   A->data  = (void*)b;
158   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
159   A->factor           = 0;
160   A->mapping          = 0;
161 
162   b->AIJ  = 0;
163   b->dof  = 0;
164   b->OAIJ = 0;
165   b->ctx  = 0;
166   b->w    = 0;
167   ierr = PetscObjectChangeTypeName((PetscObject)A,MATMAIJ);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 EXTERN_C_END
171 
172 /* --------------------------------------------------------------------------------------*/
173 #undef __FUNCT__
174 #define __FUNCT__ "MatMult_SeqMAIJ_2"
175 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
176 {
177   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
178   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
179   PetscScalar    *x,*y,*v,sum1, sum2;
180   PetscErrorCode ierr;
181   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
182   PetscInt       n,i,jrow,j;
183 
184   PetscFunctionBegin;
185   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
186   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
187   idx  = a->j;
188   v    = a->a;
189   ii   = a->i;
190 
191   for (i=0; i<m; i++) {
192     jrow = ii[i];
193     n    = ii[i+1] - jrow;
194     sum1  = 0.0;
195     sum2  = 0.0;
196     for (j=0; j<n; j++) {
197       sum1 += v[jrow]*x[2*idx[jrow]];
198       sum2 += v[jrow]*x[2*idx[jrow]+1];
199       jrow++;
200      }
201     y[2*i]   = sum1;
202     y[2*i+1] = sum2;
203   }
204 
205   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
206   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
207   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
213 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
214 {
215   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
216   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
217   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
218   PetscErrorCode ierr;
219   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
220 
221   PetscFunctionBegin;
222   ierr = VecSet(yy,zero);CHKERRQ(ierr);
223   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
224   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
225 
226   for (i=0; i<m; i++) {
227     idx    = a->j + a->i[i] ;
228     v      = a->a + a->i[i] ;
229     n      = a->i[i+1] - a->i[i];
230     alpha1 = x[2*i];
231     alpha2 = x[2*i+1];
232     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
233   }
234   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
235   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
236   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
242 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
243 {
244   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
245   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
246   PetscScalar    *x,*y,*v,sum1, sum2;
247   PetscErrorCode ierr;
248   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
249   PetscInt       n,i,jrow,j;
250 
251   PetscFunctionBegin;
252   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
253   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
254   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
255   idx  = a->j;
256   v    = a->a;
257   ii   = a->i;
258 
259   for (i=0; i<m; i++) {
260     jrow = ii[i];
261     n    = ii[i+1] - jrow;
262     sum1  = 0.0;
263     sum2  = 0.0;
264     for (j=0; j<n; j++) {
265       sum1 += v[jrow]*x[2*idx[jrow]];
266       sum2 += v[jrow]*x[2*idx[jrow]+1];
267       jrow++;
268      }
269     y[2*i]   += sum1;
270     y[2*i+1] += sum2;
271   }
272 
273   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
274   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
275   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #undef __FUNCT__
279 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
280 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
281 {
282   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
283   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
284   PetscScalar    *x,*y,*v,alpha1,alpha2;
285   PetscErrorCode ierr;
286   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
287 
288   PetscFunctionBegin;
289   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
290   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
291   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
292 
293   for (i=0; i<m; i++) {
294     idx   = a->j + a->i[i] ;
295     v     = a->a + a->i[i] ;
296     n     = a->i[i+1] - a->i[i];
297     alpha1 = x[2*i];
298     alpha2 = x[2*i+1];
299     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
300   }
301   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
302   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
303   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 /* --------------------------------------------------------------------------------------*/
307 #undef __FUNCT__
308 #define __FUNCT__ "MatMult_SeqMAIJ_3"
309 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
310 {
311   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
312   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
313   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
314   PetscErrorCode ierr;
315   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
316   PetscInt       n,i,jrow,j;
317 
318   PetscFunctionBegin;
319   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
320   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
321   idx  = a->j;
322   v    = a->a;
323   ii   = a->i;
324 
325   for (i=0; i<m; i++) {
326     jrow = ii[i];
327     n    = ii[i+1] - jrow;
328     sum1  = 0.0;
329     sum2  = 0.0;
330     sum3  = 0.0;
331     for (j=0; j<n; j++) {
332       sum1 += v[jrow]*x[3*idx[jrow]];
333       sum2 += v[jrow]*x[3*idx[jrow]+1];
334       sum3 += v[jrow]*x[3*idx[jrow]+2];
335       jrow++;
336      }
337     y[3*i]   = sum1;
338     y[3*i+1] = sum2;
339     y[3*i+2] = sum3;
340   }
341 
342   ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr);
343   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
350 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
351 {
352   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
353   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
354   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
355   PetscErrorCode ierr;
356   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
357 
358   PetscFunctionBegin;
359   ierr = VecSet(yy,zero);CHKERRQ(ierr);
360   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
361   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
362 
363   for (i=0; i<m; i++) {
364     idx    = a->j + a->i[i];
365     v      = a->a + a->i[i];
366     n      = a->i[i+1] - a->i[i];
367     alpha1 = x[3*i];
368     alpha2 = x[3*i+1];
369     alpha3 = x[3*i+2];
370     while (n-->0) {
371       y[3*(*idx)]   += alpha1*(*v);
372       y[3*(*idx)+1] += alpha2*(*v);
373       y[3*(*idx)+2] += alpha3*(*v);
374       idx++; v++;
375     }
376   }
377   ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr);
378   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
379   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
385 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
386 {
387   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
388   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
389   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
390   PetscErrorCode ierr;
391   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
392   PetscInt       n,i,jrow,j;
393 
394   PetscFunctionBegin;
395   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
396   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
397   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
398   idx  = a->j;
399   v    = a->a;
400   ii   = a->i;
401 
402   for (i=0; i<m; i++) {
403     jrow = ii[i];
404     n    = ii[i+1] - jrow;
405     sum1  = 0.0;
406     sum2  = 0.0;
407     sum3  = 0.0;
408     for (j=0; j<n; j++) {
409       sum1 += v[jrow]*x[3*idx[jrow]];
410       sum2 += v[jrow]*x[3*idx[jrow]+1];
411       sum3 += v[jrow]*x[3*idx[jrow]+2];
412       jrow++;
413      }
414     y[3*i]   += sum1;
415     y[3*i+1] += sum2;
416     y[3*i+2] += sum3;
417   }
418 
419   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
420   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
421   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 #undef __FUNCT__
425 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
426 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
430   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
431   PetscErrorCode ierr;
432   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
433 
434   PetscFunctionBegin;
435   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
436   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
437   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
438   for (i=0; i<m; i++) {
439     idx    = a->j + a->i[i] ;
440     v      = a->a + a->i[i] ;
441     n      = a->i[i+1] - a->i[i];
442     alpha1 = x[3*i];
443     alpha2 = x[3*i+1];
444     alpha3 = x[3*i+2];
445     while (n-->0) {
446       y[3*(*idx)]   += alpha1*(*v);
447       y[3*(*idx)+1] += alpha2*(*v);
448       y[3*(*idx)+2] += alpha3*(*v);
449       idx++; v++;
450     }
451   }
452   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
453   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
454   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 /* ------------------------------------------------------------------------------*/
459 #undef __FUNCT__
460 #define __FUNCT__ "MatMult_SeqMAIJ_4"
461 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
462 {
463   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
464   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
465   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
466   PetscErrorCode ierr;
467   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
468   PetscInt       n,i,jrow,j;
469 
470   PetscFunctionBegin;
471   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
472   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
473   idx  = a->j;
474   v    = a->a;
475   ii   = a->i;
476 
477   for (i=0; i<m; i++) {
478     jrow = ii[i];
479     n    = ii[i+1] - jrow;
480     sum1  = 0.0;
481     sum2  = 0.0;
482     sum3  = 0.0;
483     sum4  = 0.0;
484     for (j=0; j<n; j++) {
485       sum1 += v[jrow]*x[4*idx[jrow]];
486       sum2 += v[jrow]*x[4*idx[jrow]+1];
487       sum3 += v[jrow]*x[4*idx[jrow]+2];
488       sum4 += v[jrow]*x[4*idx[jrow]+3];
489       jrow++;
490      }
491     y[4*i]   = sum1;
492     y[4*i+1] = sum2;
493     y[4*i+2] = sum3;
494     y[4*i+3] = sum4;
495   }
496 
497   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
498   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
499   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
505 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
506 {
507   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
508   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
509   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
510   PetscErrorCode ierr;
511   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(yy,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
516   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
517   for (i=0; i<m; i++) {
518     idx    = a->j + a->i[i] ;
519     v      = a->a + a->i[i] ;
520     n      = a->i[i+1] - a->i[i];
521     alpha1 = x[4*i];
522     alpha2 = x[4*i+1];
523     alpha3 = x[4*i+2];
524     alpha4 = x[4*i+3];
525     while (n-->0) {
526       y[4*(*idx)]   += alpha1*(*v);
527       y[4*(*idx)+1] += alpha2*(*v);
528       y[4*(*idx)+2] += alpha3*(*v);
529       y[4*(*idx)+3] += alpha4*(*v);
530       idx++; v++;
531     }
532   }
533   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
534   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
541 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
542 {
543   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
544   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
545   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
546   PetscErrorCode ierr;
547   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
548   PetscInt       n,i,jrow,j;
549 
550   PetscFunctionBegin;
551   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
552   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
553   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
554   idx  = a->j;
555   v    = a->a;
556   ii   = a->i;
557 
558   for (i=0; i<m; i++) {
559     jrow = ii[i];
560     n    = ii[i+1] - jrow;
561     sum1  = 0.0;
562     sum2  = 0.0;
563     sum3  = 0.0;
564     sum4  = 0.0;
565     for (j=0; j<n; j++) {
566       sum1 += v[jrow]*x[4*idx[jrow]];
567       sum2 += v[jrow]*x[4*idx[jrow]+1];
568       sum3 += v[jrow]*x[4*idx[jrow]+2];
569       sum4 += v[jrow]*x[4*idx[jrow]+3];
570       jrow++;
571      }
572     y[4*i]   += sum1;
573     y[4*i+1] += sum2;
574     y[4*i+2] += sum3;
575     y[4*i+3] += sum4;
576   }
577 
578   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
579   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
580   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 #undef __FUNCT__
584 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
585 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
586 {
587   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
588   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
589   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
590   PetscErrorCode ierr;
591   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
592 
593   PetscFunctionBegin;
594   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
595   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
596   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
597 
598   for (i=0; i<m; i++) {
599     idx    = a->j + a->i[i] ;
600     v      = a->a + a->i[i] ;
601     n      = a->i[i+1] - a->i[i];
602     alpha1 = x[4*i];
603     alpha2 = x[4*i+1];
604     alpha3 = x[4*i+2];
605     alpha4 = x[4*i+3];
606     while (n-->0) {
607       y[4*(*idx)]   += alpha1*(*v);
608       y[4*(*idx)+1] += alpha2*(*v);
609       y[4*(*idx)+2] += alpha3*(*v);
610       y[4*(*idx)+3] += alpha4*(*v);
611       idx++; v++;
612     }
613   }
614   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
615   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
616   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 /* ------------------------------------------------------------------------------*/
620 
621 #undef __FUNCT__
622 #define __FUNCT__ "MatMult_SeqMAIJ_5"
623 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
624 {
625   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
626   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
627   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
628   PetscErrorCode ierr;
629   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
630   PetscInt       n,i,jrow,j;
631 
632   PetscFunctionBegin;
633   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
634   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
635   idx  = a->j;
636   v    = a->a;
637   ii   = a->i;
638 
639   for (i=0; i<m; i++) {
640     jrow = ii[i];
641     n    = ii[i+1] - jrow;
642     sum1  = 0.0;
643     sum2  = 0.0;
644     sum3  = 0.0;
645     sum4  = 0.0;
646     sum5  = 0.0;
647     for (j=0; j<n; j++) {
648       sum1 += v[jrow]*x[5*idx[jrow]];
649       sum2 += v[jrow]*x[5*idx[jrow]+1];
650       sum3 += v[jrow]*x[5*idx[jrow]+2];
651       sum4 += v[jrow]*x[5*idx[jrow]+3];
652       sum5 += v[jrow]*x[5*idx[jrow]+4];
653       jrow++;
654      }
655     y[5*i]   = sum1;
656     y[5*i+1] = sum2;
657     y[5*i+2] = sum3;
658     y[5*i+3] = sum4;
659     y[5*i+4] = sum5;
660   }
661 
662   ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr);
663   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
664   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
670 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
671 {
672   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
673   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
674   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
675   PetscErrorCode ierr;
676   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
677 
678   PetscFunctionBegin;
679   ierr = VecSet(yy,zero);CHKERRQ(ierr);
680   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
681   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
682 
683   for (i=0; i<m; i++) {
684     idx    = a->j + a->i[i] ;
685     v      = a->a + a->i[i] ;
686     n      = a->i[i+1] - a->i[i];
687     alpha1 = x[5*i];
688     alpha2 = x[5*i+1];
689     alpha3 = x[5*i+2];
690     alpha4 = x[5*i+3];
691     alpha5 = x[5*i+4];
692     while (n-->0) {
693       y[5*(*idx)]   += alpha1*(*v);
694       y[5*(*idx)+1] += alpha2*(*v);
695       y[5*(*idx)+2] += alpha3*(*v);
696       y[5*(*idx)+3] += alpha4*(*v);
697       y[5*(*idx)+4] += alpha5*(*v);
698       idx++; v++;
699     }
700   }
701   ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr);
702   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
703   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
709 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
710 {
711   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
712   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
713   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
714   PetscErrorCode ierr;
715   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
716   PetscInt       n,i,jrow,j;
717 
718   PetscFunctionBegin;
719   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
720   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
721   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
722   idx  = a->j;
723   v    = a->a;
724   ii   = a->i;
725 
726   for (i=0; i<m; i++) {
727     jrow = ii[i];
728     n    = ii[i+1] - jrow;
729     sum1  = 0.0;
730     sum2  = 0.0;
731     sum3  = 0.0;
732     sum4  = 0.0;
733     sum5  = 0.0;
734     for (j=0; j<n; j++) {
735       sum1 += v[jrow]*x[5*idx[jrow]];
736       sum2 += v[jrow]*x[5*idx[jrow]+1];
737       sum3 += v[jrow]*x[5*idx[jrow]+2];
738       sum4 += v[jrow]*x[5*idx[jrow]+3];
739       sum5 += v[jrow]*x[5*idx[jrow]+4];
740       jrow++;
741      }
742     y[5*i]   += sum1;
743     y[5*i+1] += sum2;
744     y[5*i+2] += sum3;
745     y[5*i+3] += sum4;
746     y[5*i+4] += sum5;
747   }
748 
749   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
750   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
751   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
757 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
758 {
759   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
760   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
761   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
762   PetscErrorCode ierr;
763   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
764 
765   PetscFunctionBegin;
766   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
767   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
768   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
769 
770   for (i=0; i<m; i++) {
771     idx    = a->j + a->i[i] ;
772     v      = a->a + a->i[i] ;
773     n      = a->i[i+1] - a->i[i];
774     alpha1 = x[5*i];
775     alpha2 = x[5*i+1];
776     alpha3 = x[5*i+2];
777     alpha4 = x[5*i+3];
778     alpha5 = x[5*i+4];
779     while (n-->0) {
780       y[5*(*idx)]   += alpha1*(*v);
781       y[5*(*idx)+1] += alpha2*(*v);
782       y[5*(*idx)+2] += alpha3*(*v);
783       y[5*(*idx)+3] += alpha4*(*v);
784       y[5*(*idx)+4] += alpha5*(*v);
785       idx++; v++;
786     }
787   }
788   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
789   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
790   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 /* ------------------------------------------------------------------------------*/
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_SeqMAIJ_6"
797 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
798 {
799   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
800   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
801   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
802   PetscErrorCode ierr;
803   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
804   PetscInt       n,i,jrow,j;
805 
806   PetscFunctionBegin;
807   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
808   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
809   idx  = a->j;
810   v    = a->a;
811   ii   = a->i;
812 
813   for (i=0; i<m; i++) {
814     jrow = ii[i];
815     n    = ii[i+1] - jrow;
816     sum1  = 0.0;
817     sum2  = 0.0;
818     sum3  = 0.0;
819     sum4  = 0.0;
820     sum5  = 0.0;
821     sum6  = 0.0;
822     for (j=0; j<n; j++) {
823       sum1 += v[jrow]*x[6*idx[jrow]];
824       sum2 += v[jrow]*x[6*idx[jrow]+1];
825       sum3 += v[jrow]*x[6*idx[jrow]+2];
826       sum4 += v[jrow]*x[6*idx[jrow]+3];
827       sum5 += v[jrow]*x[6*idx[jrow]+4];
828       sum6 += v[jrow]*x[6*idx[jrow]+5];
829       jrow++;
830      }
831     y[6*i]   = sum1;
832     y[6*i+1] = sum2;
833     y[6*i+2] = sum3;
834     y[6*i+3] = sum4;
835     y[6*i+4] = sum5;
836     y[6*i+5] = sum6;
837   }
838 
839   ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr);
840   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
841   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
842   PetscFunctionReturn(0);
843 }
844 
845 #undef __FUNCT__
846 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
847 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
848 {
849   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
850   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
851   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
852   PetscErrorCode ierr;
853   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
854 
855   PetscFunctionBegin;
856   ierr = VecSet(yy,zero);CHKERRQ(ierr);
857   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
858   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
859 
860   for (i=0; i<m; i++) {
861     idx    = a->j + a->i[i] ;
862     v      = a->a + a->i[i] ;
863     n      = a->i[i+1] - a->i[i];
864     alpha1 = x[6*i];
865     alpha2 = x[6*i+1];
866     alpha3 = x[6*i+2];
867     alpha4 = x[6*i+3];
868     alpha5 = x[6*i+4];
869     alpha6 = x[6*i+5];
870     while (n-->0) {
871       y[6*(*idx)]   += alpha1*(*v);
872       y[6*(*idx)+1] += alpha2*(*v);
873       y[6*(*idx)+2] += alpha3*(*v);
874       y[6*(*idx)+3] += alpha4*(*v);
875       y[6*(*idx)+4] += alpha5*(*v);
876       y[6*(*idx)+5] += alpha6*(*v);
877       idx++; v++;
878     }
879   }
880   ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr);
881   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
888 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
889 {
890   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
891   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
892   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
893   PetscErrorCode ierr;
894   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
895   PetscInt       n,i,jrow,j;
896 
897   PetscFunctionBegin;
898   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
899   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
900   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
901   idx  = a->j;
902   v    = a->a;
903   ii   = a->i;
904 
905   for (i=0; i<m; i++) {
906     jrow = ii[i];
907     n    = ii[i+1] - jrow;
908     sum1  = 0.0;
909     sum2  = 0.0;
910     sum3  = 0.0;
911     sum4  = 0.0;
912     sum5  = 0.0;
913     sum6  = 0.0;
914     for (j=0; j<n; j++) {
915       sum1 += v[jrow]*x[6*idx[jrow]];
916       sum2 += v[jrow]*x[6*idx[jrow]+1];
917       sum3 += v[jrow]*x[6*idx[jrow]+2];
918       sum4 += v[jrow]*x[6*idx[jrow]+3];
919       sum5 += v[jrow]*x[6*idx[jrow]+4];
920       sum6 += v[jrow]*x[6*idx[jrow]+5];
921       jrow++;
922      }
923     y[6*i]   += sum1;
924     y[6*i+1] += sum2;
925     y[6*i+2] += sum3;
926     y[6*i+3] += sum4;
927     y[6*i+4] += sum5;
928     y[6*i+5] += sum6;
929   }
930 
931   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
932   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
933   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
939 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
940 {
941   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
942   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
943   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
944   PetscErrorCode ierr;
945   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
946 
947   PetscFunctionBegin;
948   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
949   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
950   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
951 
952   for (i=0; i<m; i++) {
953     idx    = a->j + a->i[i] ;
954     v      = a->a + a->i[i] ;
955     n      = a->i[i+1] - a->i[i];
956     alpha1 = x[6*i];
957     alpha2 = x[6*i+1];
958     alpha3 = x[6*i+2];
959     alpha4 = x[6*i+3];
960     alpha5 = x[6*i+4];
961     alpha6 = x[6*i+5];
962     while (n-->0) {
963       y[6*(*idx)]   += alpha1*(*v);
964       y[6*(*idx)+1] += alpha2*(*v);
965       y[6*(*idx)+2] += alpha3*(*v);
966       y[6*(*idx)+3] += alpha4*(*v);
967       y[6*(*idx)+4] += alpha5*(*v);
968       y[6*(*idx)+5] += alpha6*(*v);
969       idx++; v++;
970     }
971   }
972   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
973   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
974   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 /* ------------------------------------------------------------------------------*/
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMult_SeqMAIJ_7"
981 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
982 {
983   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
984   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
985   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
986   PetscErrorCode ierr;
987   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
988   PetscInt       n,i,jrow,j;
989 
990   PetscFunctionBegin;
991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
993   idx  = a->j;
994   v    = a->a;
995   ii   = a->i;
996 
997   for (i=0; i<m; i++) {
998     jrow = ii[i];
999     n    = ii[i+1] - jrow;
1000     sum1  = 0.0;
1001     sum2  = 0.0;
1002     sum3  = 0.0;
1003     sum4  = 0.0;
1004     sum5  = 0.0;
1005     sum6  = 0.0;
1006     sum7  = 0.0;
1007     for (j=0; j<n; j++) {
1008       sum1 += v[jrow]*x[7*idx[jrow]];
1009       sum2 += v[jrow]*x[7*idx[jrow]+1];
1010       sum3 += v[jrow]*x[7*idx[jrow]+2];
1011       sum4 += v[jrow]*x[7*idx[jrow]+3];
1012       sum5 += v[jrow]*x[7*idx[jrow]+4];
1013       sum6 += v[jrow]*x[7*idx[jrow]+5];
1014       sum7 += v[jrow]*x[7*idx[jrow]+6];
1015       jrow++;
1016      }
1017     y[7*i]   = sum1;
1018     y[7*i+1] = sum2;
1019     y[7*i+2] = sum3;
1020     y[7*i+3] = sum4;
1021     y[7*i+4] = sum5;
1022     y[7*i+5] = sum6;
1023     y[7*i+6] = sum7;
1024   }
1025 
1026   ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr);
1027   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1028   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1034 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1035 {
1036   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1037   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1038   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1039   PetscErrorCode ierr;
1040   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1041 
1042   PetscFunctionBegin;
1043   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1044   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1045   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1046 
1047   for (i=0; i<m; i++) {
1048     idx    = a->j + a->i[i] ;
1049     v      = a->a + a->i[i] ;
1050     n      = a->i[i+1] - a->i[i];
1051     alpha1 = x[7*i];
1052     alpha2 = x[7*i+1];
1053     alpha3 = x[7*i+2];
1054     alpha4 = x[7*i+3];
1055     alpha5 = x[7*i+4];
1056     alpha6 = x[7*i+5];
1057     alpha7 = x[7*i+6];
1058     while (n-->0) {
1059       y[7*(*idx)]   += alpha1*(*v);
1060       y[7*(*idx)+1] += alpha2*(*v);
1061       y[7*(*idx)+2] += alpha3*(*v);
1062       y[7*(*idx)+3] += alpha4*(*v);
1063       y[7*(*idx)+4] += alpha5*(*v);
1064       y[7*(*idx)+5] += alpha6*(*v);
1065       y[7*(*idx)+6] += alpha7*(*v);
1066       idx++; v++;
1067     }
1068   }
1069   ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr);
1070   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1071   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1077 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1078 {
1079   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1080   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1081   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1082   PetscErrorCode ierr;
1083   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1084   PetscInt       n,i,jrow,j;
1085 
1086   PetscFunctionBegin;
1087   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1088   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1089   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1090   idx  = a->j;
1091   v    = a->a;
1092   ii   = a->i;
1093 
1094   for (i=0; i<m; i++) {
1095     jrow = ii[i];
1096     n    = ii[i+1] - jrow;
1097     sum1  = 0.0;
1098     sum2  = 0.0;
1099     sum3  = 0.0;
1100     sum4  = 0.0;
1101     sum5  = 0.0;
1102     sum6  = 0.0;
1103     sum7  = 0.0;
1104     for (j=0; j<n; j++) {
1105       sum1 += v[jrow]*x[7*idx[jrow]];
1106       sum2 += v[jrow]*x[7*idx[jrow]+1];
1107       sum3 += v[jrow]*x[7*idx[jrow]+2];
1108       sum4 += v[jrow]*x[7*idx[jrow]+3];
1109       sum5 += v[jrow]*x[7*idx[jrow]+4];
1110       sum6 += v[jrow]*x[7*idx[jrow]+5];
1111       sum7 += v[jrow]*x[7*idx[jrow]+6];
1112       jrow++;
1113      }
1114     y[7*i]   += sum1;
1115     y[7*i+1] += sum2;
1116     y[7*i+2] += sum3;
1117     y[7*i+3] += sum4;
1118     y[7*i+4] += sum5;
1119     y[7*i+5] += sum6;
1120     y[7*i+6] += sum7;
1121   }
1122 
1123   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1124   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1125   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1131 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1134   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1135   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1136   PetscErrorCode ierr;
1137   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1138 
1139   PetscFunctionBegin;
1140   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1141   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1142   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1143   for (i=0; i<m; i++) {
1144     idx    = a->j + a->i[i] ;
1145     v      = a->a + a->i[i] ;
1146     n      = a->i[i+1] - a->i[i];
1147     alpha1 = x[7*i];
1148     alpha2 = x[7*i+1];
1149     alpha3 = x[7*i+2];
1150     alpha4 = x[7*i+3];
1151     alpha5 = x[7*i+4];
1152     alpha6 = x[7*i+5];
1153     alpha7 = x[7*i+6];
1154     while (n-->0) {
1155       y[7*(*idx)]   += alpha1*(*v);
1156       y[7*(*idx)+1] += alpha2*(*v);
1157       y[7*(*idx)+2] += alpha3*(*v);
1158       y[7*(*idx)+3] += alpha4*(*v);
1159       y[7*(*idx)+4] += alpha5*(*v);
1160       y[7*(*idx)+5] += alpha6*(*v);
1161       y[7*(*idx)+6] += alpha7*(*v);
1162       idx++; v++;
1163     }
1164   }
1165   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1166   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1167   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1173 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1174 {
1175   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1176   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1177   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1178   PetscErrorCode ierr;
1179   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1180   PetscInt       n,i,jrow,j;
1181 
1182   PetscFunctionBegin;
1183   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1184   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1185   idx  = a->j;
1186   v    = a->a;
1187   ii   = a->i;
1188 
1189   for (i=0; i<m; i++) {
1190     jrow = ii[i];
1191     n    = ii[i+1] - jrow;
1192     sum1  = 0.0;
1193     sum2  = 0.0;
1194     sum3  = 0.0;
1195     sum4  = 0.0;
1196     sum5  = 0.0;
1197     sum6  = 0.0;
1198     sum7  = 0.0;
1199     sum8  = 0.0;
1200     for (j=0; j<n; j++) {
1201       sum1 += v[jrow]*x[8*idx[jrow]];
1202       sum2 += v[jrow]*x[8*idx[jrow]+1];
1203       sum3 += v[jrow]*x[8*idx[jrow]+2];
1204       sum4 += v[jrow]*x[8*idx[jrow]+3];
1205       sum5 += v[jrow]*x[8*idx[jrow]+4];
1206       sum6 += v[jrow]*x[8*idx[jrow]+5];
1207       sum7 += v[jrow]*x[8*idx[jrow]+6];
1208       sum8 += v[jrow]*x[8*idx[jrow]+7];
1209       jrow++;
1210      }
1211     y[8*i]   = sum1;
1212     y[8*i+1] = sum2;
1213     y[8*i+2] = sum3;
1214     y[8*i+3] = sum4;
1215     y[8*i+4] = sum5;
1216     y[8*i+5] = sum6;
1217     y[8*i+6] = sum7;
1218     y[8*i+7] = sum8;
1219   }
1220 
1221   ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr);
1222   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1223   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1229 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1230 {
1231   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1232   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1233   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1234   PetscErrorCode ierr;
1235   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1236 
1237   PetscFunctionBegin;
1238   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1240   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1241 
1242   for (i=0; i<m; i++) {
1243     idx    = a->j + a->i[i] ;
1244     v      = a->a + a->i[i] ;
1245     n      = a->i[i+1] - a->i[i];
1246     alpha1 = x[8*i];
1247     alpha2 = x[8*i+1];
1248     alpha3 = x[8*i+2];
1249     alpha4 = x[8*i+3];
1250     alpha5 = x[8*i+4];
1251     alpha6 = x[8*i+5];
1252     alpha7 = x[8*i+6];
1253     alpha8 = x[8*i+7];
1254     while (n-->0) {
1255       y[8*(*idx)]   += alpha1*(*v);
1256       y[8*(*idx)+1] += alpha2*(*v);
1257       y[8*(*idx)+2] += alpha3*(*v);
1258       y[8*(*idx)+3] += alpha4*(*v);
1259       y[8*(*idx)+4] += alpha5*(*v);
1260       y[8*(*idx)+5] += alpha6*(*v);
1261       y[8*(*idx)+6] += alpha7*(*v);
1262       y[8*(*idx)+7] += alpha8*(*v);
1263       idx++; v++;
1264     }
1265   }
1266   ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr);
1267   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1268   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1274 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1275 {
1276   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1277   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1278   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1279   PetscErrorCode ierr;
1280   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1281   PetscInt       n,i,jrow,j;
1282 
1283   PetscFunctionBegin;
1284   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1285   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1286   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1287   idx  = a->j;
1288   v    = a->a;
1289   ii   = a->i;
1290 
1291   for (i=0; i<m; i++) {
1292     jrow = ii[i];
1293     n    = ii[i+1] - jrow;
1294     sum1  = 0.0;
1295     sum2  = 0.0;
1296     sum3  = 0.0;
1297     sum4  = 0.0;
1298     sum5  = 0.0;
1299     sum6  = 0.0;
1300     sum7  = 0.0;
1301     sum8  = 0.0;
1302     for (j=0; j<n; j++) {
1303       sum1 += v[jrow]*x[8*idx[jrow]];
1304       sum2 += v[jrow]*x[8*idx[jrow]+1];
1305       sum3 += v[jrow]*x[8*idx[jrow]+2];
1306       sum4 += v[jrow]*x[8*idx[jrow]+3];
1307       sum5 += v[jrow]*x[8*idx[jrow]+4];
1308       sum6 += v[jrow]*x[8*idx[jrow]+5];
1309       sum7 += v[jrow]*x[8*idx[jrow]+6];
1310       sum8 += v[jrow]*x[8*idx[jrow]+7];
1311       jrow++;
1312      }
1313     y[8*i]   += sum1;
1314     y[8*i+1] += sum2;
1315     y[8*i+2] += sum3;
1316     y[8*i+3] += sum4;
1317     y[8*i+4] += sum5;
1318     y[8*i+5] += sum6;
1319     y[8*i+6] += sum7;
1320     y[8*i+7] += sum8;
1321   }
1322 
1323   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1324   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1325   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1331 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1332 {
1333   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1334   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1335   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1336   PetscErrorCode ierr;
1337   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1338 
1339   PetscFunctionBegin;
1340   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1341   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1342   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1343   for (i=0; i<m; i++) {
1344     idx    = a->j + a->i[i] ;
1345     v      = a->a + a->i[i] ;
1346     n      = a->i[i+1] - a->i[i];
1347     alpha1 = x[8*i];
1348     alpha2 = x[8*i+1];
1349     alpha3 = x[8*i+2];
1350     alpha4 = x[8*i+3];
1351     alpha5 = x[8*i+4];
1352     alpha6 = x[8*i+5];
1353     alpha7 = x[8*i+6];
1354     alpha8 = x[8*i+7];
1355     while (n-->0) {
1356       y[8*(*idx)]   += alpha1*(*v);
1357       y[8*(*idx)+1] += alpha2*(*v);
1358       y[8*(*idx)+2] += alpha3*(*v);
1359       y[8*(*idx)+3] += alpha4*(*v);
1360       y[8*(*idx)+4] += alpha5*(*v);
1361       y[8*(*idx)+5] += alpha6*(*v);
1362       y[8*(*idx)+6] += alpha7*(*v);
1363       y[8*(*idx)+7] += alpha8*(*v);
1364       idx++; v++;
1365     }
1366   }
1367   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1368   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1369   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 /* ------------------------------------------------------------------------------*/
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1376 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1377 {
1378   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1379   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1380   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1381   PetscErrorCode ierr;
1382   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1383   PetscInt       n,i,jrow,j;
1384 
1385   PetscFunctionBegin;
1386   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1387   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1388   idx  = a->j;
1389   v    = a->a;
1390   ii   = a->i;
1391 
1392   for (i=0; i<m; i++) {
1393     jrow = ii[i];
1394     n    = ii[i+1] - jrow;
1395     sum1  = 0.0;
1396     sum2  = 0.0;
1397     sum3  = 0.0;
1398     sum4  = 0.0;
1399     sum5  = 0.0;
1400     sum6  = 0.0;
1401     sum7  = 0.0;
1402     sum8  = 0.0;
1403     sum9  = 0.0;
1404     for (j=0; j<n; j++) {
1405       sum1 += v[jrow]*x[9*idx[jrow]];
1406       sum2 += v[jrow]*x[9*idx[jrow]+1];
1407       sum3 += v[jrow]*x[9*idx[jrow]+2];
1408       sum4 += v[jrow]*x[9*idx[jrow]+3];
1409       sum5 += v[jrow]*x[9*idx[jrow]+4];
1410       sum6 += v[jrow]*x[9*idx[jrow]+5];
1411       sum7 += v[jrow]*x[9*idx[jrow]+6];
1412       sum8 += v[jrow]*x[9*idx[jrow]+7];
1413       sum9 += v[jrow]*x[9*idx[jrow]+8];
1414       jrow++;
1415      }
1416     y[9*i]   = sum1;
1417     y[9*i+1] = sum2;
1418     y[9*i+2] = sum3;
1419     y[9*i+3] = sum4;
1420     y[9*i+4] = sum5;
1421     y[9*i+5] = sum6;
1422     y[9*i+6] = sum7;
1423     y[9*i+7] = sum8;
1424     y[9*i+8] = sum9;
1425   }
1426 
1427   ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr);
1428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1429   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 /* ------------------------------------------------------------------------------*/
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1437 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1438 {
1439   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1440   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1441   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1442   PetscErrorCode ierr;
1443   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1444 
1445   PetscFunctionBegin;
1446   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1447   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1448   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1449 
1450   for (i=0; i<m; i++) {
1451     idx    = a->j + a->i[i] ;
1452     v      = a->a + a->i[i] ;
1453     n      = a->i[i+1] - a->i[i];
1454     alpha1 = x[9*i];
1455     alpha2 = x[9*i+1];
1456     alpha3 = x[9*i+2];
1457     alpha4 = x[9*i+3];
1458     alpha5 = x[9*i+4];
1459     alpha6 = x[9*i+5];
1460     alpha7 = x[9*i+6];
1461     alpha8 = x[9*i+7];
1462     alpha9 = x[9*i+8];
1463     while (n-->0) {
1464       y[9*(*idx)]   += alpha1*(*v);
1465       y[9*(*idx)+1] += alpha2*(*v);
1466       y[9*(*idx)+2] += alpha3*(*v);
1467       y[9*(*idx)+3] += alpha4*(*v);
1468       y[9*(*idx)+4] += alpha5*(*v);
1469       y[9*(*idx)+5] += alpha6*(*v);
1470       y[9*(*idx)+6] += alpha7*(*v);
1471       y[9*(*idx)+7] += alpha8*(*v);
1472       y[9*(*idx)+8] += alpha9*(*v);
1473       idx++; v++;
1474     }
1475   }
1476   ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr);
1477   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1478   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1484 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1485 {
1486   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1488   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1489   PetscErrorCode ierr;
1490   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1491   PetscInt       n,i,jrow,j;
1492 
1493   PetscFunctionBegin;
1494   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1495   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1496   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1497   idx  = a->j;
1498   v    = a->a;
1499   ii   = a->i;
1500 
1501   for (i=0; i<m; i++) {
1502     jrow = ii[i];
1503     n    = ii[i+1] - jrow;
1504     sum1  = 0.0;
1505     sum2  = 0.0;
1506     sum3  = 0.0;
1507     sum4  = 0.0;
1508     sum5  = 0.0;
1509     sum6  = 0.0;
1510     sum7  = 0.0;
1511     sum8  = 0.0;
1512     sum9  = 0.0;
1513     for (j=0; j<n; j++) {
1514       sum1 += v[jrow]*x[9*idx[jrow]];
1515       sum2 += v[jrow]*x[9*idx[jrow]+1];
1516       sum3 += v[jrow]*x[9*idx[jrow]+2];
1517       sum4 += v[jrow]*x[9*idx[jrow]+3];
1518       sum5 += v[jrow]*x[9*idx[jrow]+4];
1519       sum6 += v[jrow]*x[9*idx[jrow]+5];
1520       sum7 += v[jrow]*x[9*idx[jrow]+6];
1521       sum8 += v[jrow]*x[9*idx[jrow]+7];
1522       sum9 += v[jrow]*x[9*idx[jrow]+8];
1523       jrow++;
1524      }
1525     y[9*i]   += sum1;
1526     y[9*i+1] += sum2;
1527     y[9*i+2] += sum3;
1528     y[9*i+3] += sum4;
1529     y[9*i+4] += sum5;
1530     y[9*i+5] += sum6;
1531     y[9*i+6] += sum7;
1532     y[9*i+7] += sum8;
1533     y[9*i+8] += sum9;
1534   }
1535 
1536   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1537   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1538   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1544 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1545 {
1546   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1547   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1548   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1549   PetscErrorCode ierr;
1550   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1551 
1552   PetscFunctionBegin;
1553   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1554   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1555   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1556   for (i=0; i<m; i++) {
1557     idx    = a->j + a->i[i] ;
1558     v      = a->a + a->i[i] ;
1559     n      = a->i[i+1] - a->i[i];
1560     alpha1 = x[9*i];
1561     alpha2 = x[9*i+1];
1562     alpha3 = x[9*i+2];
1563     alpha4 = x[9*i+3];
1564     alpha5 = x[9*i+4];
1565     alpha6 = x[9*i+5];
1566     alpha7 = x[9*i+6];
1567     alpha8 = x[9*i+7];
1568     alpha9 = x[9*i+8];
1569     while (n-->0) {
1570       y[9*(*idx)]   += alpha1*(*v);
1571       y[9*(*idx)+1] += alpha2*(*v);
1572       y[9*(*idx)+2] += alpha3*(*v);
1573       y[9*(*idx)+3] += alpha4*(*v);
1574       y[9*(*idx)+4] += alpha5*(*v);
1575       y[9*(*idx)+5] += alpha6*(*v);
1576       y[9*(*idx)+6] += alpha7*(*v);
1577       y[9*(*idx)+7] += alpha8*(*v);
1578       y[9*(*idx)+8] += alpha9*(*v);
1579       idx++; v++;
1580     }
1581   }
1582   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1583   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1584   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 /*--------------------------------------------------------------------------------------------*/
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1590 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1591 {
1592   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1593   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1594   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1595   PetscErrorCode ierr;
1596   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1597   PetscInt       n,i,jrow,j;
1598 
1599   PetscFunctionBegin;
1600   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1601   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1602   idx  = a->j;
1603   v    = a->a;
1604   ii   = a->i;
1605 
1606   for (i=0; i<m; i++) {
1607     jrow = ii[i];
1608     n    = ii[i+1] - jrow;
1609     sum1  = 0.0;
1610     sum2  = 0.0;
1611     sum3  = 0.0;
1612     sum4  = 0.0;
1613     sum5  = 0.0;
1614     sum6  = 0.0;
1615     sum7  = 0.0;
1616     sum8  = 0.0;
1617     sum9  = 0.0;
1618     sum10 = 0.0;
1619     for (j=0; j<n; j++) {
1620       sum1  += v[jrow]*x[10*idx[jrow]];
1621       sum2  += v[jrow]*x[10*idx[jrow]+1];
1622       sum3  += v[jrow]*x[10*idx[jrow]+2];
1623       sum4  += v[jrow]*x[10*idx[jrow]+3];
1624       sum5  += v[jrow]*x[10*idx[jrow]+4];
1625       sum6  += v[jrow]*x[10*idx[jrow]+5];
1626       sum7  += v[jrow]*x[10*idx[jrow]+6];
1627       sum8  += v[jrow]*x[10*idx[jrow]+7];
1628       sum9  += v[jrow]*x[10*idx[jrow]+8];
1629       sum10 += v[jrow]*x[10*idx[jrow]+9];
1630       jrow++;
1631      }
1632     y[10*i]   = sum1;
1633     y[10*i+1] = sum2;
1634     y[10*i+2] = sum3;
1635     y[10*i+3] = sum4;
1636     y[10*i+4] = sum5;
1637     y[10*i+5] = sum6;
1638     y[10*i+6] = sum7;
1639     y[10*i+7] = sum8;
1640     y[10*i+8] = sum9;
1641     y[10*i+9] = sum10;
1642   }
1643 
1644   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1645   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1646   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1652 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1653 {
1654   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1655   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1656   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1657   PetscErrorCode ierr;
1658   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1659   PetscInt       n,i,jrow,j;
1660 
1661   PetscFunctionBegin;
1662   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1663   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1664   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1665   idx  = a->j;
1666   v    = a->a;
1667   ii   = a->i;
1668 
1669   for (i=0; i<m; i++) {
1670     jrow = ii[i];
1671     n    = ii[i+1] - jrow;
1672     sum1  = 0.0;
1673     sum2  = 0.0;
1674     sum3  = 0.0;
1675     sum4  = 0.0;
1676     sum5  = 0.0;
1677     sum6  = 0.0;
1678     sum7  = 0.0;
1679     sum8  = 0.0;
1680     sum9  = 0.0;
1681     sum10 = 0.0;
1682     for (j=0; j<n; j++) {
1683       sum1  += v[jrow]*x[10*idx[jrow]];
1684       sum2  += v[jrow]*x[10*idx[jrow]+1];
1685       sum3  += v[jrow]*x[10*idx[jrow]+2];
1686       sum4  += v[jrow]*x[10*idx[jrow]+3];
1687       sum5  += v[jrow]*x[10*idx[jrow]+4];
1688       sum6  += v[jrow]*x[10*idx[jrow]+5];
1689       sum7  += v[jrow]*x[10*idx[jrow]+6];
1690       sum8  += v[jrow]*x[10*idx[jrow]+7];
1691       sum9  += v[jrow]*x[10*idx[jrow]+8];
1692       sum10 += v[jrow]*x[10*idx[jrow]+9];
1693       jrow++;
1694      }
1695     y[10*i]   += sum1;
1696     y[10*i+1] += sum2;
1697     y[10*i+2] += sum3;
1698     y[10*i+3] += sum4;
1699     y[10*i+4] += sum5;
1700     y[10*i+5] += sum6;
1701     y[10*i+6] += sum7;
1702     y[10*i+7] += sum8;
1703     y[10*i+8] += sum9;
1704     y[10*i+9] += sum10;
1705   }
1706 
1707   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1708   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1709   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1715 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1716 {
1717   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1718   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1719   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1720   PetscErrorCode ierr;
1721   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1722 
1723   PetscFunctionBegin;
1724   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1725   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1726   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1727 
1728   for (i=0; i<m; i++) {
1729     idx    = a->j + a->i[i] ;
1730     v      = a->a + a->i[i] ;
1731     n      = a->i[i+1] - a->i[i];
1732     alpha1 = x[10*i];
1733     alpha2 = x[10*i+1];
1734     alpha3 = x[10*i+2];
1735     alpha4 = x[10*i+3];
1736     alpha5 = x[10*i+4];
1737     alpha6 = x[10*i+5];
1738     alpha7 = x[10*i+6];
1739     alpha8 = x[10*i+7];
1740     alpha9 = x[10*i+8];
1741     alpha10 = x[10*i+9];
1742     while (n-->0) {
1743       y[10*(*idx)]   += alpha1*(*v);
1744       y[10*(*idx)+1] += alpha2*(*v);
1745       y[10*(*idx)+2] += alpha3*(*v);
1746       y[10*(*idx)+3] += alpha4*(*v);
1747       y[10*(*idx)+4] += alpha5*(*v);
1748       y[10*(*idx)+5] += alpha6*(*v);
1749       y[10*(*idx)+6] += alpha7*(*v);
1750       y[10*(*idx)+7] += alpha8*(*v);
1751       y[10*(*idx)+8] += alpha9*(*v);
1752       y[10*(*idx)+9] += alpha10*(*v);
1753       idx++; v++;
1754     }
1755   }
1756   ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr);
1757   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1758   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1764 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1765 {
1766   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1767   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1768   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1769   PetscErrorCode ierr;
1770   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1771 
1772   PetscFunctionBegin;
1773   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1774   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1775   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1776   for (i=0; i<m; i++) {
1777     idx    = a->j + a->i[i] ;
1778     v      = a->a + a->i[i] ;
1779     n      = a->i[i+1] - a->i[i];
1780     alpha1 = x[10*i];
1781     alpha2 = x[10*i+1];
1782     alpha3 = x[10*i+2];
1783     alpha4 = x[10*i+3];
1784     alpha5 = x[10*i+4];
1785     alpha6 = x[10*i+5];
1786     alpha7 = x[10*i+6];
1787     alpha8 = x[10*i+7];
1788     alpha9 = x[10*i+8];
1789     alpha10 = x[10*i+9];
1790     while (n-->0) {
1791       y[10*(*idx)]   += alpha1*(*v);
1792       y[10*(*idx)+1] += alpha2*(*v);
1793       y[10*(*idx)+2] += alpha3*(*v);
1794       y[10*(*idx)+3] += alpha4*(*v);
1795       y[10*(*idx)+4] += alpha5*(*v);
1796       y[10*(*idx)+5] += alpha6*(*v);
1797       y[10*(*idx)+6] += alpha7*(*v);
1798       y[10*(*idx)+7] += alpha8*(*v);
1799       y[10*(*idx)+8] += alpha9*(*v);
1800       y[10*(*idx)+9] += alpha10*(*v);
1801       idx++; v++;
1802     }
1803   }
1804   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1805   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1806   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 
1811 /*--------------------------------------------------------------------------------------------*/
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1814 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1815 {
1816   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1817   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1818   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1819   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1820   PetscErrorCode ierr;
1821   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1822   PetscInt       n,i,jrow,j;
1823 
1824   PetscFunctionBegin;
1825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1826   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1827   idx  = a->j;
1828   v    = a->a;
1829   ii   = a->i;
1830 
1831   for (i=0; i<m; i++) {
1832     jrow = ii[i];
1833     n    = ii[i+1] - jrow;
1834     sum1  = 0.0;
1835     sum2  = 0.0;
1836     sum3  = 0.0;
1837     sum4  = 0.0;
1838     sum5  = 0.0;
1839     sum6  = 0.0;
1840     sum7  = 0.0;
1841     sum8  = 0.0;
1842     sum9  = 0.0;
1843     sum10 = 0.0;
1844     sum11 = 0.0;
1845     sum12 = 0.0;
1846     sum13 = 0.0;
1847     sum14 = 0.0;
1848     sum15 = 0.0;
1849     sum16 = 0.0;
1850     for (j=0; j<n; j++) {
1851       sum1  += v[jrow]*x[16*idx[jrow]];
1852       sum2  += v[jrow]*x[16*idx[jrow]+1];
1853       sum3  += v[jrow]*x[16*idx[jrow]+2];
1854       sum4  += v[jrow]*x[16*idx[jrow]+3];
1855       sum5  += v[jrow]*x[16*idx[jrow]+4];
1856       sum6  += v[jrow]*x[16*idx[jrow]+5];
1857       sum7  += v[jrow]*x[16*idx[jrow]+6];
1858       sum8  += v[jrow]*x[16*idx[jrow]+7];
1859       sum9  += v[jrow]*x[16*idx[jrow]+8];
1860       sum10 += v[jrow]*x[16*idx[jrow]+9];
1861       sum11 += v[jrow]*x[16*idx[jrow]+10];
1862       sum12 += v[jrow]*x[16*idx[jrow]+11];
1863       sum13 += v[jrow]*x[16*idx[jrow]+12];
1864       sum14 += v[jrow]*x[16*idx[jrow]+13];
1865       sum15 += v[jrow]*x[16*idx[jrow]+14];
1866       sum16 += v[jrow]*x[16*idx[jrow]+15];
1867       jrow++;
1868      }
1869     y[16*i]    = sum1;
1870     y[16*i+1]  = sum2;
1871     y[16*i+2]  = sum3;
1872     y[16*i+3]  = sum4;
1873     y[16*i+4]  = sum5;
1874     y[16*i+5]  = sum6;
1875     y[16*i+6]  = sum7;
1876     y[16*i+7]  = sum8;
1877     y[16*i+8]  = sum9;
1878     y[16*i+9]  = sum10;
1879     y[16*i+10] = sum11;
1880     y[16*i+11] = sum12;
1881     y[16*i+12] = sum13;
1882     y[16*i+13] = sum14;
1883     y[16*i+14] = sum15;
1884     y[16*i+15] = sum16;
1885   }
1886 
1887   ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr);
1888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1889   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1895 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1896 {
1897   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1898   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1899   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1900   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1901   PetscErrorCode ierr;
1902   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1903 
1904   PetscFunctionBegin;
1905   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1906   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1907   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1908 
1909   for (i=0; i<m; i++) {
1910     idx    = a->j + a->i[i] ;
1911     v      = a->a + a->i[i] ;
1912     n      = a->i[i+1] - a->i[i];
1913     alpha1  = x[16*i];
1914     alpha2  = x[16*i+1];
1915     alpha3  = x[16*i+2];
1916     alpha4  = x[16*i+3];
1917     alpha5  = x[16*i+4];
1918     alpha6  = x[16*i+5];
1919     alpha7  = x[16*i+6];
1920     alpha8  = x[16*i+7];
1921     alpha9  = x[16*i+8];
1922     alpha10 = x[16*i+9];
1923     alpha11 = x[16*i+10];
1924     alpha12 = x[16*i+11];
1925     alpha13 = x[16*i+12];
1926     alpha14 = x[16*i+13];
1927     alpha15 = x[16*i+14];
1928     alpha16 = x[16*i+15];
1929     while (n-->0) {
1930       y[16*(*idx)]    += alpha1*(*v);
1931       y[16*(*idx)+1]  += alpha2*(*v);
1932       y[16*(*idx)+2]  += alpha3*(*v);
1933       y[16*(*idx)+3]  += alpha4*(*v);
1934       y[16*(*idx)+4]  += alpha5*(*v);
1935       y[16*(*idx)+5]  += alpha6*(*v);
1936       y[16*(*idx)+6]  += alpha7*(*v);
1937       y[16*(*idx)+7]  += alpha8*(*v);
1938       y[16*(*idx)+8]  += alpha9*(*v);
1939       y[16*(*idx)+9]  += alpha10*(*v);
1940       y[16*(*idx)+10] += alpha11*(*v);
1941       y[16*(*idx)+11] += alpha12*(*v);
1942       y[16*(*idx)+12] += alpha13*(*v);
1943       y[16*(*idx)+13] += alpha14*(*v);
1944       y[16*(*idx)+14] += alpha15*(*v);
1945       y[16*(*idx)+15] += alpha16*(*v);
1946       idx++; v++;
1947     }
1948   }
1949   ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr);
1950   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1951   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1957 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1958 {
1959   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1960   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1961   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1962   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1963   PetscErrorCode ierr;
1964   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1965   PetscInt       n,i,jrow,j;
1966 
1967   PetscFunctionBegin;
1968   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1969   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1970   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1971   idx  = a->j;
1972   v    = a->a;
1973   ii   = a->i;
1974 
1975   for (i=0; i<m; i++) {
1976     jrow = ii[i];
1977     n    = ii[i+1] - jrow;
1978     sum1  = 0.0;
1979     sum2  = 0.0;
1980     sum3  = 0.0;
1981     sum4  = 0.0;
1982     sum5  = 0.0;
1983     sum6  = 0.0;
1984     sum7  = 0.0;
1985     sum8  = 0.0;
1986     sum9  = 0.0;
1987     sum10 = 0.0;
1988     sum11 = 0.0;
1989     sum12 = 0.0;
1990     sum13 = 0.0;
1991     sum14 = 0.0;
1992     sum15 = 0.0;
1993     sum16 = 0.0;
1994     for (j=0; j<n; j++) {
1995       sum1  += v[jrow]*x[16*idx[jrow]];
1996       sum2  += v[jrow]*x[16*idx[jrow]+1];
1997       sum3  += v[jrow]*x[16*idx[jrow]+2];
1998       sum4  += v[jrow]*x[16*idx[jrow]+3];
1999       sum5  += v[jrow]*x[16*idx[jrow]+4];
2000       sum6  += v[jrow]*x[16*idx[jrow]+5];
2001       sum7  += v[jrow]*x[16*idx[jrow]+6];
2002       sum8  += v[jrow]*x[16*idx[jrow]+7];
2003       sum9  += v[jrow]*x[16*idx[jrow]+8];
2004       sum10 += v[jrow]*x[16*idx[jrow]+9];
2005       sum11 += v[jrow]*x[16*idx[jrow]+10];
2006       sum12 += v[jrow]*x[16*idx[jrow]+11];
2007       sum13 += v[jrow]*x[16*idx[jrow]+12];
2008       sum14 += v[jrow]*x[16*idx[jrow]+13];
2009       sum15 += v[jrow]*x[16*idx[jrow]+14];
2010       sum16 += v[jrow]*x[16*idx[jrow]+15];
2011       jrow++;
2012      }
2013     y[16*i]    += sum1;
2014     y[16*i+1]  += sum2;
2015     y[16*i+2]  += sum3;
2016     y[16*i+3]  += sum4;
2017     y[16*i+4]  += sum5;
2018     y[16*i+5]  += sum6;
2019     y[16*i+6]  += sum7;
2020     y[16*i+7]  += sum8;
2021     y[16*i+8]  += sum9;
2022     y[16*i+9]  += sum10;
2023     y[16*i+10] += sum11;
2024     y[16*i+11] += sum12;
2025     y[16*i+12] += sum13;
2026     y[16*i+13] += sum14;
2027     y[16*i+14] += sum15;
2028     y[16*i+15] += sum16;
2029   }
2030 
2031   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2032   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2033   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2039 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2040 {
2041   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2042   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2043   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2044   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2045   PetscErrorCode ierr;
2046   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2047 
2048   PetscFunctionBegin;
2049   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2050   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2051   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2052   for (i=0; i<m; i++) {
2053     idx    = a->j + a->i[i] ;
2054     v      = a->a + a->i[i] ;
2055     n      = a->i[i+1] - a->i[i];
2056     alpha1 = x[16*i];
2057     alpha2 = x[16*i+1];
2058     alpha3 = x[16*i+2];
2059     alpha4 = x[16*i+3];
2060     alpha5 = x[16*i+4];
2061     alpha6 = x[16*i+5];
2062     alpha7 = x[16*i+6];
2063     alpha8 = x[16*i+7];
2064     alpha9  = x[16*i+8];
2065     alpha10 = x[16*i+9];
2066     alpha11 = x[16*i+10];
2067     alpha12 = x[16*i+11];
2068     alpha13 = x[16*i+12];
2069     alpha14 = x[16*i+13];
2070     alpha15 = x[16*i+14];
2071     alpha16 = x[16*i+15];
2072     while (n-->0) {
2073       y[16*(*idx)]   += alpha1*(*v);
2074       y[16*(*idx)+1] += alpha2*(*v);
2075       y[16*(*idx)+2] += alpha3*(*v);
2076       y[16*(*idx)+3] += alpha4*(*v);
2077       y[16*(*idx)+4] += alpha5*(*v);
2078       y[16*(*idx)+5] += alpha6*(*v);
2079       y[16*(*idx)+6] += alpha7*(*v);
2080       y[16*(*idx)+7] += alpha8*(*v);
2081       y[16*(*idx)+8]  += alpha9*(*v);
2082       y[16*(*idx)+9]  += alpha10*(*v);
2083       y[16*(*idx)+10] += alpha11*(*v);
2084       y[16*(*idx)+11] += alpha12*(*v);
2085       y[16*(*idx)+12] += alpha13*(*v);
2086       y[16*(*idx)+13] += alpha14*(*v);
2087       y[16*(*idx)+14] += alpha15*(*v);
2088       y[16*(*idx)+15] += alpha16*(*v);
2089       idx++; v++;
2090     }
2091   }
2092   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2093   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2094   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 /*===================================================================================*/
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2101 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2102 {
2103   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   /* start the scatter */
2108   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2109   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2110   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2111   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2117 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2118 {
2119   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2124   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2125   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2126   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2132 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2133 {
2134   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   /* start the scatter */
2139   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2140   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2141   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2142   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2148 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2149 {
2150   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2151   PetscErrorCode ierr;
2152 
2153   PetscFunctionBegin;
2154   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2155   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2156   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2157   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2163 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2164 {
2165   /* This routine requires testing -- but it's getting better. */
2166   PetscErrorCode     ierr;
2167   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2168   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2169   Mat                P=pp->AIJ;
2170   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2171   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2172   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2173   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2174   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2175   MatScalar          *ca;
2176 
2177   PetscFunctionBegin;
2178   /* Start timer */
2179   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2180 
2181   /* Get ij structure of P^T */
2182   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2183 
2184   cn = pn*ppdof;
2185   /* Allocate ci array, arrays for fill computation and */
2186   /* free space for accumulating nonzero column info */
2187   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2188   ci[0] = 0;
2189 
2190   /* Work arrays for rows of P^T*A */
2191   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
2192   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
2193   ptasparserow = ptadenserow  + an;
2194   denserow     = ptasparserow + an;
2195   sparserow    = denserow     + cn;
2196 
2197   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2198   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2199   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2200   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2201   current_space = free_space;
2202 
2203   /* Determine symbolic info for each row of C: */
2204   for (i=0;i<pn;i++) {
2205     ptnzi  = pti[i+1] - pti[i];
2206     ptJ    = ptj + pti[i];
2207     for (dof=0;dof<ppdof;dof++) {
2208       ptanzi = 0;
2209       /* Determine symbolic row of PtA: */
2210       for (j=0;j<ptnzi;j++) {
2211         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2212         arow = ptJ[j]*ppdof + dof;
2213         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2214         anzj = ai[arow+1] - ai[arow];
2215         ajj  = aj + ai[arow];
2216         for (k=0;k<anzj;k++) {
2217           if (!ptadenserow[ajj[k]]) {
2218             ptadenserow[ajj[k]]    = -1;
2219             ptasparserow[ptanzi++] = ajj[k];
2220           }
2221         }
2222       }
2223       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2224       ptaj = ptasparserow;
2225       cnzi   = 0;
2226       for (j=0;j<ptanzi;j++) {
2227         /* Get offset within block of P */
2228         pshift = *ptaj%ppdof;
2229         /* Get block row of P */
2230         prow = (*ptaj++)/ppdof; /* integer division */
2231         /* P has same number of nonzeros per row as the compressed form */
2232         pnzj = pi[prow+1] - pi[prow];
2233         pjj  = pj + pi[prow];
2234         for (k=0;k<pnzj;k++) {
2235           /* Locations in C are shifted by the offset within the block */
2236           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2237           if (!denserow[pjj[k]*ppdof+pshift]) {
2238             denserow[pjj[k]*ppdof+pshift] = -1;
2239             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2240           }
2241         }
2242       }
2243 
2244       /* sort sparserow */
2245       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2246 
2247       /* If free space is not available, make more free space */
2248       /* Double the amount of total space in the list */
2249       if (current_space->local_remaining<cnzi) {
2250         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
2251       }
2252 
2253       /* Copy data into free space, and zero out denserows */
2254       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2255       current_space->array           += cnzi;
2256       current_space->local_used      += cnzi;
2257       current_space->local_remaining -= cnzi;
2258 
2259       for (j=0;j<ptanzi;j++) {
2260         ptadenserow[ptasparserow[j]] = 0;
2261       }
2262       for (j=0;j<cnzi;j++) {
2263         denserow[sparserow[j]] = 0;
2264       }
2265       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2266       /*        For now, we will recompute what is needed. */
2267       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2268     }
2269   }
2270   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2271   /* Allocate space for cj, initialize cj, and */
2272   /* destroy list of free space and other temporary array(s) */
2273   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2274   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2275   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
2276 
2277   /* Allocate space for ca */
2278   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
2279   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
2280 
2281   /* put together the new matrix */
2282   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2283 
2284   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2285   /* Since these are PETSc arrays, change flags to free them as necessary. */
2286   c          = (Mat_SeqAIJ *)((*C)->data);
2287   c->free_a  = PETSC_TRUE;
2288   c->free_ij = PETSC_TRUE;
2289   c->nonew   = 0;
2290 
2291   /* Clean up. */
2292   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2293 
2294   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 #undef __FUNCT__
2299 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
2300 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2301 {
2302   /* This routine requires testing -- first draft only */
2303   PetscErrorCode ierr;
2304   PetscInt       flops=0;
2305   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2306   Mat            P=pp->AIJ;
2307   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2308   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2309   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2310   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2311   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2312   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2313   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2314   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2315 
2316   PetscFunctionBegin;
2317   /* Allocate temporary array for storage of one row of A*P */
2318   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
2319   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
2320 
2321   apj      = (PetscInt *)(apa + cn);
2322   apjdense = apj + cn;
2323 
2324   /* Clear old values in C */
2325   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
2326 
2327   for (i=0;i<am;i++) {
2328     /* Form sparse row of A*P */
2329     anzi  = ai[i+1] - ai[i];
2330     apnzj = 0;
2331     for (j=0;j<anzi;j++) {
2332       /* Get offset within block of P */
2333       pshift = *aj%ppdof;
2334       /* Get block row of P */
2335       prow   = *aj++/ppdof; /* integer division */
2336       pnzj = pi[prow+1] - pi[prow];
2337       pjj  = pj + pi[prow];
2338       paj  = pa + pi[prow];
2339       for (k=0;k<pnzj;k++) {
2340         poffset = pjj[k]*ppdof+pshift;
2341         if (!apjdense[poffset]) {
2342           apjdense[poffset] = -1;
2343           apj[apnzj++]      = poffset;
2344         }
2345         apa[poffset] += (*aa)*paj[k];
2346       }
2347       flops += 2*pnzj;
2348       aa++;
2349     }
2350 
2351     /* Sort the j index array for quick sparse axpy. */
2352     /* Note: a array does not need sorting as it is in dense storage locations. */
2353     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
2354 
2355     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2356     prow    = i/ppdof; /* integer division */
2357     pshift  = i%ppdof;
2358     poffset = pi[prow];
2359     pnzi = pi[prow+1] - poffset;
2360     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2361     pJ   = pj+poffset;
2362     pA   = pa+poffset;
2363     for (j=0;j<pnzi;j++) {
2364       crow   = (*pJ)*ppdof+pshift;
2365       cjj    = cj + ci[crow];
2366       caj    = ca + ci[crow];
2367       pJ++;
2368       /* Perform sparse axpy operation.  Note cjj includes apj. */
2369       for (k=0,nextap=0;nextap<apnzj;k++) {
2370         if (cjj[k]==apj[nextap]) {
2371           caj[k] += (*pA)*apa[apj[nextap++]];
2372         }
2373       }
2374       flops += 2*apnzj;
2375       pA++;
2376     }
2377 
2378     /* Zero the current row info for A*P */
2379     for (j=0;j<apnzj;j++) {
2380       apa[apj[j]]      = 0.;
2381       apjdense[apj[j]] = 0;
2382     }
2383   }
2384 
2385   /* Assemble the final matrix and clean up */
2386   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = PetscFree(apa);CHKERRQ(ierr);
2389   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
2390 
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 EXTERN_C_BEGIN
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2397 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2398 {
2399   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2400   Mat               a = b->AIJ,B;
2401   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2402   PetscErrorCode    ierr;
2403   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2404   PetscInt          *cols;
2405   PetscScalar       *vals;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
2409   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
2410   for (i=0; i<m; i++) {
2411     nmax = PetscMax(nmax,aij->ilen[i]);
2412     for (j=0; j<dof; j++) {
2413       ilen[dof*i+j] = aij->ilen[i];
2414     }
2415   }
2416   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2417   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2418   ierr = PetscFree(ilen);CHKERRQ(ierr);
2419   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
2420   ii   = 0;
2421   for (i=0; i<m; i++) {
2422     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2423     for (j=0; j<dof; j++) {
2424       for (k=0; k<ncols; k++) {
2425         icols[k] = dof*cols[k]+j;
2426       }
2427       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2428       ii++;
2429     }
2430     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2431   }
2432   ierr = PetscFree(icols);CHKERRQ(ierr);
2433   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2434   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2435 
2436   if (reuse == MAT_REUSE_MATRIX) {
2437     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2438   } else {
2439     *newmat = B;
2440   }
2441   PetscFunctionReturn(0);
2442 }
2443 EXTERN_C_END
2444 
2445 #include "src/mat/impls/aij/mpi/mpiaij.h"
2446 
2447 EXTERN_C_BEGIN
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2450 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2451 {
2452   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2453   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2454   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2455   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2456   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2457   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2458   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2459   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2460   PetscInt          rstart,cstart,*garray,ii,k;
2461   PetscErrorCode    ierr;
2462   PetscScalar       *vals,*ovals;
2463 
2464   PetscFunctionBegin;
2465   ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr);
2466   for (i=0; i<A->rmap.n/dof; i++) {
2467     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2468     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2469     for (j=0; j<dof; j++) {
2470       dnz[dof*i+j] = AIJ->ilen[i];
2471       onz[dof*i+j] = OAIJ->ilen[i];
2472     }
2473   }
2474   ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2475   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2476   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2477 
2478   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2479   rstart = dof*maij->A->rmap.rstart;
2480   cstart = dof*maij->A->cmap.rstart;
2481   garray = mpiaij->garray;
2482 
2483   ii = rstart;
2484   for (i=0; i<A->rmap.n/dof; i++) {
2485     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2486     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2487     for (j=0; j<dof; j++) {
2488       for (k=0; k<ncols; k++) {
2489         icols[k] = cstart + dof*cols[k]+j;
2490       }
2491       for (k=0; k<oncols; k++) {
2492         oicols[k] = dof*garray[ocols[k]]+j;
2493       }
2494       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2495       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2496       ii++;
2497     }
2498     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2499     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2500   }
2501   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2502 
2503   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2504   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2505 
2506   if (reuse == MAT_REUSE_MATRIX) {
2507     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2508   } else {
2509     *newmat = B;
2510   }
2511   PetscFunctionReturn(0);
2512 }
2513 EXTERN_C_END
2514 
2515 
2516 /* ---------------------------------------------------------------------------------- */
2517 /*MC
2518   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2519   operations for multicomponent problems.  It interpolates each component the same
2520   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2521   and MATMPIAIJ for distributed matrices.
2522 
2523   Operations provided:
2524 + MatMult
2525 . MatMultTranspose
2526 . MatMultAdd
2527 . MatMultTransposeAdd
2528 - MatView
2529 
2530   Level: advanced
2531 
2532 M*/
2533 #undef __FUNCT__
2534 #define __FUNCT__ "MatCreateMAIJ"
2535 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2536 {
2537   PetscErrorCode ierr;
2538   PetscMPIInt    size;
2539   PetscInt       n;
2540   Mat_MPIMAIJ    *b;
2541   Mat            B;
2542 
2543   PetscFunctionBegin;
2544   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2545 
2546   if (dof == 1) {
2547     *maij = A;
2548   } else {
2549     ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
2550     ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr);
2551     B->assembled    = PETSC_TRUE;
2552 
2553     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2554     if (size == 1) {
2555       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2556       B->ops->destroy = MatDestroy_SeqMAIJ;
2557       B->ops->view    = MatView_SeqMAIJ;
2558       b      = (Mat_MPIMAIJ*)B->data;
2559       b->dof = dof;
2560       b->AIJ = A;
2561       if (dof == 2) {
2562         B->ops->mult             = MatMult_SeqMAIJ_2;
2563         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2564         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2565         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2566       } else if (dof == 3) {
2567         B->ops->mult             = MatMult_SeqMAIJ_3;
2568         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2569         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2570         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2571       } else if (dof == 4) {
2572         B->ops->mult             = MatMult_SeqMAIJ_4;
2573         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2574         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2575         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2576       } else if (dof == 5) {
2577         B->ops->mult             = MatMult_SeqMAIJ_5;
2578         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2579         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2580         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2581       } else if (dof == 6) {
2582         B->ops->mult             = MatMult_SeqMAIJ_6;
2583         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2584         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2585         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2586       } else if (dof == 7) {
2587         B->ops->mult             = MatMult_SeqMAIJ_7;
2588         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2589         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2590         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2591       } else if (dof == 8) {
2592         B->ops->mult             = MatMult_SeqMAIJ_8;
2593         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2594         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2595         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2596       } else if (dof == 9) {
2597         B->ops->mult             = MatMult_SeqMAIJ_9;
2598         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2599         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2600         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2601       } else if (dof == 10) {
2602         B->ops->mult             = MatMult_SeqMAIJ_10;
2603         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2604         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2605         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2606       } else if (dof == 16) {
2607         B->ops->mult             = MatMult_SeqMAIJ_16;
2608         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2609         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2610         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2611       } else {
2612         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2613       }
2614       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2615       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2616       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2617     } else {
2618       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2619       IS         from,to;
2620       Vec        gvec;
2621       PetscInt   *garray,i;
2622 
2623       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2624       B->ops->destroy = MatDestroy_MPIMAIJ;
2625       B->ops->view    = MatView_MPIMAIJ;
2626       b      = (Mat_MPIMAIJ*)B->data;
2627       b->dof = dof;
2628       b->A   = A;
2629       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
2630       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
2631 
2632       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2633       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
2634       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2635 
2636       /* create two temporary Index sets for build scatter gather */
2637       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2638       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2639       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2640       ierr = PetscFree(garray);CHKERRQ(ierr);
2641       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2642 
2643       /* create temporary global vector to generate scatter context */
2644       ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr);
2645       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2646 
2647       /* generate the scatter context */
2648       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2649 
2650       ierr = ISDestroy(from);CHKERRQ(ierr);
2651       ierr = ISDestroy(to);CHKERRQ(ierr);
2652       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2653 
2654       B->ops->mult             = MatMult_MPIMAIJ_dof;
2655       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2656       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2657       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2658       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2659     }
2660     *maij = B;
2661     ierr = MatView_Private(B);CHKERRQ(ierr);
2662   }
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 
2667 
2668 
2669 
2670 
2671 
2672 
2673 
2674 
2675 
2676 
2677