xref: /petsc/src/mat/impls/maij/maij.c (revision 58cd72c3d0f0c5a63e84afc03ac54bb1bb84cd8d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "src/mat/impls/maij/maij.h"
21 #include "src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27 {
28   PetscErrorCode ierr;
29   PetscTruth     ismpimaij,isseqmaij;
30 
31   PetscFunctionBegin;
32   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34   if (ismpimaij) {
35     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36 
37     *B = b->A;
38   } else if (isseqmaij) {
39     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40 
41     *B = b->AIJ;
42   } else {
43     *B = A;
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMAIJRedimension"
50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51 {
52   PetscErrorCode ierr;
53   Mat            Aij;
54 
55   PetscFunctionBegin;
56   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqMAIJ"
63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
67 
68   PetscFunctionBegin;
69   if (b->AIJ) {
70     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
71   }
72   ierr = PetscFree(b);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatView_SeqMAIJ"
78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79 {
80   PetscErrorCode ierr;
81   Mat            B;
82 
83   PetscFunctionBegin;
84   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85   ierr = MatView(B,viewer);CHKERRQ(ierr);
86   ierr = MatDestroy(B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatView_MPIMAIJ"
92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93 {
94   PetscErrorCode ierr;
95   Mat            B;
96 
97   PetscFunctionBegin;
98   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99   ierr = MatView(B,viewer);CHKERRQ(ierr);
100   ierr = MatDestroy(B);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatDestroy_MPIMAIJ"
106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107 {
108   PetscErrorCode ierr;
109   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
110 
111   PetscFunctionBegin;
112   if (b->AIJ) {
113     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
114   }
115   if (b->OAIJ) {
116     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117   }
118   if (b->A) {
119     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120   }
121   if (b->ctx) {
122     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123   }
124   if (b->w) {
125     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126   }
127   ierr = PetscFree(b);CHKERRQ(ierr);
128   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /*MC
133   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134   multicomponent problems, interpolating or restricting each component the same way independently.
135   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
136 
137   Operations provided:
138 . MatMult
139 . MatMultTranspose
140 . MatMultAdd
141 . MatMultTransposeAdd
142 
143   Level: advanced
144 
145 .seealso: MatCreateSeqDense
146 M*/
147 
148 EXTERN_C_BEGIN
149 #undef __FUNCT__
150 #define __FUNCT__ "MatCreate_MAIJ"
151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
152 {
153   PetscErrorCode ierr;
154   Mat_MPIMAIJ    *b;
155   PetscMPIInt    size;
156 
157   PetscFunctionBegin;
158   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
159   A->data  = (void*)b;
160   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
161   A->factor           = 0;
162   A->mapping          = 0;
163 
164   b->AIJ  = 0;
165   b->dof  = 0;
166   b->OAIJ = 0;
167   b->ctx  = 0;
168   b->w    = 0;
169   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
170   if (size == 1){
171     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
172   } else {
173     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
174   }
175   PetscFunctionReturn(0);
176 }
177 EXTERN_C_END
178 
179 /* --------------------------------------------------------------------------------------*/
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMult_SeqMAIJ_2"
182 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183 {
184   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186   PetscScalar    *x,*y,*v,sum1, sum2;
187   PetscErrorCode ierr;
188   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
189   PetscInt       n,i,jrow,j;
190 
191   PetscFunctionBegin;
192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
193   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
194   idx  = a->j;
195   v    = a->a;
196   ii   = a->i;
197 
198   for (i=0; i<m; i++) {
199     jrow = ii[i];
200     n    = ii[i+1] - jrow;
201     sum1  = 0.0;
202     sum2  = 0.0;
203     for (j=0; j<n; j++) {
204       sum1 += v[jrow]*x[2*idx[jrow]];
205       sum2 += v[jrow]*x[2*idx[jrow]+1];
206       jrow++;
207      }
208     y[2*i]   = sum1;
209     y[2*i+1] = sum2;
210   }
211 
212   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
220 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221 {
222   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
223   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
224   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
225   PetscErrorCode ierr;
226   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
227 
228   PetscFunctionBegin;
229   ierr = VecSet(yy,zero);CHKERRQ(ierr);
230   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
231   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
232 
233   for (i=0; i<m; i++) {
234     idx    = a->j + a->i[i] ;
235     v      = a->a + a->i[i] ;
236     n      = a->i[i+1] - a->i[i];
237     alpha1 = x[2*i];
238     alpha2 = x[2*i+1];
239     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240   }
241   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
242   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
249 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250 {
251   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253   PetscScalar    *x,*y,*v,sum1, sum2;
254   PetscErrorCode ierr;
255   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
256   PetscInt       n,i,jrow,j;
257 
258   PetscFunctionBegin;
259   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
260   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
261   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
262   idx  = a->j;
263   v    = a->a;
264   ii   = a->i;
265 
266   for (i=0; i<m; i++) {
267     jrow = ii[i];
268     n    = ii[i+1] - jrow;
269     sum1  = 0.0;
270     sum2  = 0.0;
271     for (j=0; j<n; j++) {
272       sum1 += v[jrow]*x[2*idx[jrow]];
273       sum2 += v[jrow]*x[2*idx[jrow]+1];
274       jrow++;
275      }
276     y[2*i]   += sum1;
277     y[2*i+1] += sum2;
278   }
279 
280   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
281   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
282   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 #undef __FUNCT__
286 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
287 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288 {
289   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
290   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
291   PetscScalar    *x,*y,*v,alpha1,alpha2;
292   PetscErrorCode ierr;
293   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
294 
295   PetscFunctionBegin;
296   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
297   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
298   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
299 
300   for (i=0; i<m; i++) {
301     idx   = a->j + a->i[i] ;
302     v     = a->a + a->i[i] ;
303     n     = a->i[i+1] - a->i[i];
304     alpha1 = x[2*i];
305     alpha2 = x[2*i+1];
306     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307   }
308   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
309   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 /* --------------------------------------------------------------------------------------*/
314 #undef __FUNCT__
315 #define __FUNCT__ "MatMult_SeqMAIJ_3"
316 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317 {
318   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
319   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
320   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
321   PetscErrorCode ierr;
322   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
323   PetscInt       n,i,jrow,j;
324 
325   PetscFunctionBegin;
326   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
327   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
328   idx  = a->j;
329   v    = a->a;
330   ii   = a->i;
331 
332   for (i=0; i<m; i++) {
333     jrow = ii[i];
334     n    = ii[i+1] - jrow;
335     sum1  = 0.0;
336     sum2  = 0.0;
337     sum3  = 0.0;
338     for (j=0; j<n; j++) {
339       sum1 += v[jrow]*x[3*idx[jrow]];
340       sum2 += v[jrow]*x[3*idx[jrow]+1];
341       sum3 += v[jrow]*x[3*idx[jrow]+2];
342       jrow++;
343      }
344     y[3*i]   = sum1;
345     y[3*i+1] = sum2;
346     y[3*i+2] = sum3;
347   }
348 
349   ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr);
350   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 #undef __FUNCT__
356 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
357 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
358 {
359   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
360   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
361   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
362   PetscErrorCode ierr;
363   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
364 
365   PetscFunctionBegin;
366   ierr = VecSet(yy,zero);CHKERRQ(ierr);
367   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
368   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
369 
370   for (i=0; i<m; i++) {
371     idx    = a->j + a->i[i];
372     v      = a->a + a->i[i];
373     n      = a->i[i+1] - a->i[i];
374     alpha1 = x[3*i];
375     alpha2 = x[3*i+1];
376     alpha3 = x[3*i+2];
377     while (n-->0) {
378       y[3*(*idx)]   += alpha1*(*v);
379       y[3*(*idx)+1] += alpha2*(*v);
380       y[3*(*idx)+2] += alpha3*(*v);
381       idx++; v++;
382     }
383   }
384   ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr);
385   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
386   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
392 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393 {
394   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
395   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
396   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
397   PetscErrorCode ierr;
398   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
399   PetscInt       n,i,jrow,j;
400 
401   PetscFunctionBegin;
402   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
403   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
404   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
405   idx  = a->j;
406   v    = a->a;
407   ii   = a->i;
408 
409   for (i=0; i<m; i++) {
410     jrow = ii[i];
411     n    = ii[i+1] - jrow;
412     sum1  = 0.0;
413     sum2  = 0.0;
414     sum3  = 0.0;
415     for (j=0; j<n; j++) {
416       sum1 += v[jrow]*x[3*idx[jrow]];
417       sum2 += v[jrow]*x[3*idx[jrow]+1];
418       sum3 += v[jrow]*x[3*idx[jrow]+2];
419       jrow++;
420      }
421     y[3*i]   += sum1;
422     y[3*i+1] += sum2;
423     y[3*i+2] += sum3;
424   }
425 
426   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
427   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
428   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 #undef __FUNCT__
432 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
433 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
434 {
435   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
436   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
437   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
438   PetscErrorCode ierr;
439   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
440 
441   PetscFunctionBegin;
442   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
443   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
444   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
445   for (i=0; i<m; i++) {
446     idx    = a->j + a->i[i] ;
447     v      = a->a + a->i[i] ;
448     n      = a->i[i+1] - a->i[i];
449     alpha1 = x[3*i];
450     alpha2 = x[3*i+1];
451     alpha3 = x[3*i+2];
452     while (n-->0) {
453       y[3*(*idx)]   += alpha1*(*v);
454       y[3*(*idx)+1] += alpha2*(*v);
455       y[3*(*idx)+2] += alpha3*(*v);
456       idx++; v++;
457     }
458   }
459   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
460   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
461   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 /* ------------------------------------------------------------------------------*/
466 #undef __FUNCT__
467 #define __FUNCT__ "MatMult_SeqMAIJ_4"
468 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
469 {
470   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
471   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
472   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
473   PetscErrorCode ierr;
474   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
475   PetscInt       n,i,jrow,j;
476 
477   PetscFunctionBegin;
478   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
479   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
480   idx  = a->j;
481   v    = a->a;
482   ii   = a->i;
483 
484   for (i=0; i<m; i++) {
485     jrow = ii[i];
486     n    = ii[i+1] - jrow;
487     sum1  = 0.0;
488     sum2  = 0.0;
489     sum3  = 0.0;
490     sum4  = 0.0;
491     for (j=0; j<n; j++) {
492       sum1 += v[jrow]*x[4*idx[jrow]];
493       sum2 += v[jrow]*x[4*idx[jrow]+1];
494       sum3 += v[jrow]*x[4*idx[jrow]+2];
495       sum4 += v[jrow]*x[4*idx[jrow]+3];
496       jrow++;
497      }
498     y[4*i]   = sum1;
499     y[4*i+1] = sum2;
500     y[4*i+2] = sum3;
501     y[4*i+3] = sum4;
502   }
503 
504   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
505   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
506   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
512 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
513 {
514   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
515   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
516   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
517   PetscErrorCode ierr;
518   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
519 
520   PetscFunctionBegin;
521   ierr = VecSet(yy,zero);CHKERRQ(ierr);
522   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
523   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
524   for (i=0; i<m; i++) {
525     idx    = a->j + a->i[i] ;
526     v      = a->a + a->i[i] ;
527     n      = a->i[i+1] - a->i[i];
528     alpha1 = x[4*i];
529     alpha2 = x[4*i+1];
530     alpha3 = x[4*i+2];
531     alpha4 = x[4*i+3];
532     while (n-->0) {
533       y[4*(*idx)]   += alpha1*(*v);
534       y[4*(*idx)+1] += alpha2*(*v);
535       y[4*(*idx)+2] += alpha3*(*v);
536       y[4*(*idx)+3] += alpha4*(*v);
537       idx++; v++;
538     }
539   }
540   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
541   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
542   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
548 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
551   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
552   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
553   PetscErrorCode ierr;
554   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
555   PetscInt       n,i,jrow,j;
556 
557   PetscFunctionBegin;
558   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
559   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
560   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
561   idx  = a->j;
562   v    = a->a;
563   ii   = a->i;
564 
565   for (i=0; i<m; i++) {
566     jrow = ii[i];
567     n    = ii[i+1] - jrow;
568     sum1  = 0.0;
569     sum2  = 0.0;
570     sum3  = 0.0;
571     sum4  = 0.0;
572     for (j=0; j<n; j++) {
573       sum1 += v[jrow]*x[4*idx[jrow]];
574       sum2 += v[jrow]*x[4*idx[jrow]+1];
575       sum3 += v[jrow]*x[4*idx[jrow]+2];
576       sum4 += v[jrow]*x[4*idx[jrow]+3];
577       jrow++;
578      }
579     y[4*i]   += sum1;
580     y[4*i+1] += sum2;
581     y[4*i+2] += sum3;
582     y[4*i+3] += sum4;
583   }
584 
585   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
586   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
587   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 #undef __FUNCT__
591 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
592 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
593 {
594   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
595   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
596   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
597   PetscErrorCode ierr;
598   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
599 
600   PetscFunctionBegin;
601   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
602   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
603   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
604 
605   for (i=0; i<m; i++) {
606     idx    = a->j + a->i[i] ;
607     v      = a->a + a->i[i] ;
608     n      = a->i[i+1] - a->i[i];
609     alpha1 = x[4*i];
610     alpha2 = x[4*i+1];
611     alpha3 = x[4*i+2];
612     alpha4 = x[4*i+3];
613     while (n-->0) {
614       y[4*(*idx)]   += alpha1*(*v);
615       y[4*(*idx)+1] += alpha2*(*v);
616       y[4*(*idx)+2] += alpha3*(*v);
617       y[4*(*idx)+3] += alpha4*(*v);
618       idx++; v++;
619     }
620   }
621   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
622   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
623   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 /* ------------------------------------------------------------------------------*/
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatMult_SeqMAIJ_5"
630 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
631 {
632   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
633   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
634   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
635   PetscErrorCode ierr;
636   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
637   PetscInt       n,i,jrow,j;
638 
639   PetscFunctionBegin;
640   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
641   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
642   idx  = a->j;
643   v    = a->a;
644   ii   = a->i;
645 
646   for (i=0; i<m; i++) {
647     jrow = ii[i];
648     n    = ii[i+1] - jrow;
649     sum1  = 0.0;
650     sum2  = 0.0;
651     sum3  = 0.0;
652     sum4  = 0.0;
653     sum5  = 0.0;
654     for (j=0; j<n; j++) {
655       sum1 += v[jrow]*x[5*idx[jrow]];
656       sum2 += v[jrow]*x[5*idx[jrow]+1];
657       sum3 += v[jrow]*x[5*idx[jrow]+2];
658       sum4 += v[jrow]*x[5*idx[jrow]+3];
659       sum5 += v[jrow]*x[5*idx[jrow]+4];
660       jrow++;
661      }
662     y[5*i]   = sum1;
663     y[5*i+1] = sum2;
664     y[5*i+2] = sum3;
665     y[5*i+3] = sum4;
666     y[5*i+4] = sum5;
667   }
668 
669   ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr);
670   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
671   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
677 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
678 {
679   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
680   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
681   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
682   PetscErrorCode ierr;
683   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
684 
685   PetscFunctionBegin;
686   ierr = VecSet(yy,zero);CHKERRQ(ierr);
687   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
688   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
689 
690   for (i=0; i<m; i++) {
691     idx    = a->j + a->i[i] ;
692     v      = a->a + a->i[i] ;
693     n      = a->i[i+1] - a->i[i];
694     alpha1 = x[5*i];
695     alpha2 = x[5*i+1];
696     alpha3 = x[5*i+2];
697     alpha4 = x[5*i+3];
698     alpha5 = x[5*i+4];
699     while (n-->0) {
700       y[5*(*idx)]   += alpha1*(*v);
701       y[5*(*idx)+1] += alpha2*(*v);
702       y[5*(*idx)+2] += alpha3*(*v);
703       y[5*(*idx)+3] += alpha4*(*v);
704       y[5*(*idx)+4] += alpha5*(*v);
705       idx++; v++;
706     }
707   }
708   ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr);
709   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
710   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
716 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
717 {
718   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
719   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
720   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
721   PetscErrorCode ierr;
722   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
723   PetscInt       n,i,jrow,j;
724 
725   PetscFunctionBegin;
726   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
727   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
728   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
729   idx  = a->j;
730   v    = a->a;
731   ii   = a->i;
732 
733   for (i=0; i<m; i++) {
734     jrow = ii[i];
735     n    = ii[i+1] - jrow;
736     sum1  = 0.0;
737     sum2  = 0.0;
738     sum3  = 0.0;
739     sum4  = 0.0;
740     sum5  = 0.0;
741     for (j=0; j<n; j++) {
742       sum1 += v[jrow]*x[5*idx[jrow]];
743       sum2 += v[jrow]*x[5*idx[jrow]+1];
744       sum3 += v[jrow]*x[5*idx[jrow]+2];
745       sum4 += v[jrow]*x[5*idx[jrow]+3];
746       sum5 += v[jrow]*x[5*idx[jrow]+4];
747       jrow++;
748      }
749     y[5*i]   += sum1;
750     y[5*i+1] += sum2;
751     y[5*i+2] += sum3;
752     y[5*i+3] += sum4;
753     y[5*i+4] += sum5;
754   }
755 
756   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
757   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
758   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
764 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
765 {
766   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
767   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
768   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
769   PetscErrorCode ierr;
770   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
771 
772   PetscFunctionBegin;
773   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
774   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
775   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
776 
777   for (i=0; i<m; i++) {
778     idx    = a->j + a->i[i] ;
779     v      = a->a + a->i[i] ;
780     n      = a->i[i+1] - a->i[i];
781     alpha1 = x[5*i];
782     alpha2 = x[5*i+1];
783     alpha3 = x[5*i+2];
784     alpha4 = x[5*i+3];
785     alpha5 = x[5*i+4];
786     while (n-->0) {
787       y[5*(*idx)]   += alpha1*(*v);
788       y[5*(*idx)+1] += alpha2*(*v);
789       y[5*(*idx)+2] += alpha3*(*v);
790       y[5*(*idx)+3] += alpha4*(*v);
791       y[5*(*idx)+4] += alpha5*(*v);
792       idx++; v++;
793     }
794   }
795   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
796   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
797   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 /* ------------------------------------------------------------------------------*/
802 #undef __FUNCT__
803 #define __FUNCT__ "MatMult_SeqMAIJ_6"
804 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
805 {
806   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
807   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
808   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
809   PetscErrorCode ierr;
810   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
811   PetscInt       n,i,jrow,j;
812 
813   PetscFunctionBegin;
814   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
815   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
816   idx  = a->j;
817   v    = a->a;
818   ii   = a->i;
819 
820   for (i=0; i<m; i++) {
821     jrow = ii[i];
822     n    = ii[i+1] - jrow;
823     sum1  = 0.0;
824     sum2  = 0.0;
825     sum3  = 0.0;
826     sum4  = 0.0;
827     sum5  = 0.0;
828     sum6  = 0.0;
829     for (j=0; j<n; j++) {
830       sum1 += v[jrow]*x[6*idx[jrow]];
831       sum2 += v[jrow]*x[6*idx[jrow]+1];
832       sum3 += v[jrow]*x[6*idx[jrow]+2];
833       sum4 += v[jrow]*x[6*idx[jrow]+3];
834       sum5 += v[jrow]*x[6*idx[jrow]+4];
835       sum6 += v[jrow]*x[6*idx[jrow]+5];
836       jrow++;
837      }
838     y[6*i]   = sum1;
839     y[6*i+1] = sum2;
840     y[6*i+2] = sum3;
841     y[6*i+3] = sum4;
842     y[6*i+4] = sum5;
843     y[6*i+5] = sum6;
844   }
845 
846   ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr);
847   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
848   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 
852 #undef __FUNCT__
853 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
854 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
855 {
856   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
857   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
858   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
859   PetscErrorCode ierr;
860   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
861 
862   PetscFunctionBegin;
863   ierr = VecSet(yy,zero);CHKERRQ(ierr);
864   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
865   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
866 
867   for (i=0; i<m; i++) {
868     idx    = a->j + a->i[i] ;
869     v      = a->a + a->i[i] ;
870     n      = a->i[i+1] - a->i[i];
871     alpha1 = x[6*i];
872     alpha2 = x[6*i+1];
873     alpha3 = x[6*i+2];
874     alpha4 = x[6*i+3];
875     alpha5 = x[6*i+4];
876     alpha6 = x[6*i+5];
877     while (n-->0) {
878       y[6*(*idx)]   += alpha1*(*v);
879       y[6*(*idx)+1] += alpha2*(*v);
880       y[6*(*idx)+2] += alpha3*(*v);
881       y[6*(*idx)+3] += alpha4*(*v);
882       y[6*(*idx)+4] += alpha5*(*v);
883       y[6*(*idx)+5] += alpha6*(*v);
884       idx++; v++;
885     }
886   }
887   ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr);
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
895 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
896 {
897   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
898   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
899   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
900   PetscErrorCode ierr;
901   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
902   PetscInt       n,i,jrow,j;
903 
904   PetscFunctionBegin;
905   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
906   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
908   idx  = a->j;
909   v    = a->a;
910   ii   = a->i;
911 
912   for (i=0; i<m; i++) {
913     jrow = ii[i];
914     n    = ii[i+1] - jrow;
915     sum1  = 0.0;
916     sum2  = 0.0;
917     sum3  = 0.0;
918     sum4  = 0.0;
919     sum5  = 0.0;
920     sum6  = 0.0;
921     for (j=0; j<n; j++) {
922       sum1 += v[jrow]*x[6*idx[jrow]];
923       sum2 += v[jrow]*x[6*idx[jrow]+1];
924       sum3 += v[jrow]*x[6*idx[jrow]+2];
925       sum4 += v[jrow]*x[6*idx[jrow]+3];
926       sum5 += v[jrow]*x[6*idx[jrow]+4];
927       sum6 += v[jrow]*x[6*idx[jrow]+5];
928       jrow++;
929      }
930     y[6*i]   += sum1;
931     y[6*i+1] += sum2;
932     y[6*i+2] += sum3;
933     y[6*i+3] += sum4;
934     y[6*i+4] += sum5;
935     y[6*i+5] += sum6;
936   }
937 
938   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
939   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
940   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
946 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
947 {
948   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
949   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
950   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
951   PetscErrorCode ierr;
952   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
953 
954   PetscFunctionBegin;
955   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
956   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
957   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
958 
959   for (i=0; i<m; i++) {
960     idx    = a->j + a->i[i] ;
961     v      = a->a + a->i[i] ;
962     n      = a->i[i+1] - a->i[i];
963     alpha1 = x[6*i];
964     alpha2 = x[6*i+1];
965     alpha3 = x[6*i+2];
966     alpha4 = x[6*i+3];
967     alpha5 = x[6*i+4];
968     alpha6 = x[6*i+5];
969     while (n-->0) {
970       y[6*(*idx)]   += alpha1*(*v);
971       y[6*(*idx)+1] += alpha2*(*v);
972       y[6*(*idx)+2] += alpha3*(*v);
973       y[6*(*idx)+3] += alpha4*(*v);
974       y[6*(*idx)+4] += alpha5*(*v);
975       y[6*(*idx)+5] += alpha6*(*v);
976       idx++; v++;
977     }
978   }
979   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
980   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
981   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 /* ------------------------------------------------------------------------------*/
986 #undef __FUNCT__
987 #define __FUNCT__ "MatMult_SeqMAIJ_7"
988 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
989 {
990   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
991   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
992   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
993   PetscErrorCode ierr;
994   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
995   PetscInt       n,i,jrow,j;
996 
997   PetscFunctionBegin;
998   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
999   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1000   idx  = a->j;
1001   v    = a->a;
1002   ii   = a->i;
1003 
1004   for (i=0; i<m; i++) {
1005     jrow = ii[i];
1006     n    = ii[i+1] - jrow;
1007     sum1  = 0.0;
1008     sum2  = 0.0;
1009     sum3  = 0.0;
1010     sum4  = 0.0;
1011     sum5  = 0.0;
1012     sum6  = 0.0;
1013     sum7  = 0.0;
1014     for (j=0; j<n; j++) {
1015       sum1 += v[jrow]*x[7*idx[jrow]];
1016       sum2 += v[jrow]*x[7*idx[jrow]+1];
1017       sum3 += v[jrow]*x[7*idx[jrow]+2];
1018       sum4 += v[jrow]*x[7*idx[jrow]+3];
1019       sum5 += v[jrow]*x[7*idx[jrow]+4];
1020       sum6 += v[jrow]*x[7*idx[jrow]+5];
1021       sum7 += v[jrow]*x[7*idx[jrow]+6];
1022       jrow++;
1023      }
1024     y[7*i]   = sum1;
1025     y[7*i+1] = sum2;
1026     y[7*i+2] = sum3;
1027     y[7*i+3] = sum4;
1028     y[7*i+4] = sum5;
1029     y[7*i+5] = sum6;
1030     y[7*i+6] = sum7;
1031   }
1032 
1033   ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr);
1034   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1035   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1041 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1042 {
1043   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1044   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1045   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1046   PetscErrorCode ierr;
1047   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1048 
1049   PetscFunctionBegin;
1050   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1051   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1052   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1053 
1054   for (i=0; i<m; i++) {
1055     idx    = a->j + a->i[i] ;
1056     v      = a->a + a->i[i] ;
1057     n      = a->i[i+1] - a->i[i];
1058     alpha1 = x[7*i];
1059     alpha2 = x[7*i+1];
1060     alpha3 = x[7*i+2];
1061     alpha4 = x[7*i+3];
1062     alpha5 = x[7*i+4];
1063     alpha6 = x[7*i+5];
1064     alpha7 = x[7*i+6];
1065     while (n-->0) {
1066       y[7*(*idx)]   += alpha1*(*v);
1067       y[7*(*idx)+1] += alpha2*(*v);
1068       y[7*(*idx)+2] += alpha3*(*v);
1069       y[7*(*idx)+3] += alpha4*(*v);
1070       y[7*(*idx)+4] += alpha5*(*v);
1071       y[7*(*idx)+5] += alpha6*(*v);
1072       y[7*(*idx)+6] += alpha7*(*v);
1073       idx++; v++;
1074     }
1075   }
1076   ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr);
1077   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1078   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1084 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1085 {
1086   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1087   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1088   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1089   PetscErrorCode ierr;
1090   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1091   PetscInt       n,i,jrow,j;
1092 
1093   PetscFunctionBegin;
1094   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1095   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1096   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1097   idx  = a->j;
1098   v    = a->a;
1099   ii   = a->i;
1100 
1101   for (i=0; i<m; i++) {
1102     jrow = ii[i];
1103     n    = ii[i+1] - jrow;
1104     sum1  = 0.0;
1105     sum2  = 0.0;
1106     sum3  = 0.0;
1107     sum4  = 0.0;
1108     sum5  = 0.0;
1109     sum6  = 0.0;
1110     sum7  = 0.0;
1111     for (j=0; j<n; j++) {
1112       sum1 += v[jrow]*x[7*idx[jrow]];
1113       sum2 += v[jrow]*x[7*idx[jrow]+1];
1114       sum3 += v[jrow]*x[7*idx[jrow]+2];
1115       sum4 += v[jrow]*x[7*idx[jrow]+3];
1116       sum5 += v[jrow]*x[7*idx[jrow]+4];
1117       sum6 += v[jrow]*x[7*idx[jrow]+5];
1118       sum7 += v[jrow]*x[7*idx[jrow]+6];
1119       jrow++;
1120      }
1121     y[7*i]   += sum1;
1122     y[7*i+1] += sum2;
1123     y[7*i+2] += sum3;
1124     y[7*i+3] += sum4;
1125     y[7*i+4] += sum5;
1126     y[7*i+5] += sum6;
1127     y[7*i+6] += sum7;
1128   }
1129 
1130   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1131   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1132   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1138 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1139 {
1140   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1141   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1142   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1143   PetscErrorCode ierr;
1144   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1145 
1146   PetscFunctionBegin;
1147   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1148   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1149   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1150   for (i=0; i<m; i++) {
1151     idx    = a->j + a->i[i] ;
1152     v      = a->a + a->i[i] ;
1153     n      = a->i[i+1] - a->i[i];
1154     alpha1 = x[7*i];
1155     alpha2 = x[7*i+1];
1156     alpha3 = x[7*i+2];
1157     alpha4 = x[7*i+3];
1158     alpha5 = x[7*i+4];
1159     alpha6 = x[7*i+5];
1160     alpha7 = x[7*i+6];
1161     while (n-->0) {
1162       y[7*(*idx)]   += alpha1*(*v);
1163       y[7*(*idx)+1] += alpha2*(*v);
1164       y[7*(*idx)+2] += alpha3*(*v);
1165       y[7*(*idx)+3] += alpha4*(*v);
1166       y[7*(*idx)+4] += alpha5*(*v);
1167       y[7*(*idx)+5] += alpha6*(*v);
1168       y[7*(*idx)+6] += alpha7*(*v);
1169       idx++; v++;
1170     }
1171   }
1172   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1173   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1174   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1180 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1181 {
1182   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1183   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1184   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1185   PetscErrorCode ierr;
1186   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1187   PetscInt       n,i,jrow,j;
1188 
1189   PetscFunctionBegin;
1190   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1191   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1192   idx  = a->j;
1193   v    = a->a;
1194   ii   = a->i;
1195 
1196   for (i=0; i<m; i++) {
1197     jrow = ii[i];
1198     n    = ii[i+1] - jrow;
1199     sum1  = 0.0;
1200     sum2  = 0.0;
1201     sum3  = 0.0;
1202     sum4  = 0.0;
1203     sum5  = 0.0;
1204     sum6  = 0.0;
1205     sum7  = 0.0;
1206     sum8  = 0.0;
1207     for (j=0; j<n; j++) {
1208       sum1 += v[jrow]*x[8*idx[jrow]];
1209       sum2 += v[jrow]*x[8*idx[jrow]+1];
1210       sum3 += v[jrow]*x[8*idx[jrow]+2];
1211       sum4 += v[jrow]*x[8*idx[jrow]+3];
1212       sum5 += v[jrow]*x[8*idx[jrow]+4];
1213       sum6 += v[jrow]*x[8*idx[jrow]+5];
1214       sum7 += v[jrow]*x[8*idx[jrow]+6];
1215       sum8 += v[jrow]*x[8*idx[jrow]+7];
1216       jrow++;
1217      }
1218     y[8*i]   = sum1;
1219     y[8*i+1] = sum2;
1220     y[8*i+2] = sum3;
1221     y[8*i+3] = sum4;
1222     y[8*i+4] = sum5;
1223     y[8*i+5] = sum6;
1224     y[8*i+6] = sum7;
1225     y[8*i+7] = sum8;
1226   }
1227 
1228   ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr);
1229   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1230   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1236 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1237 {
1238   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1239   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1240   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1241   PetscErrorCode ierr;
1242   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1243 
1244   PetscFunctionBegin;
1245   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1246   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1247   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1248 
1249   for (i=0; i<m; i++) {
1250     idx    = a->j + a->i[i] ;
1251     v      = a->a + a->i[i] ;
1252     n      = a->i[i+1] - a->i[i];
1253     alpha1 = x[8*i];
1254     alpha2 = x[8*i+1];
1255     alpha3 = x[8*i+2];
1256     alpha4 = x[8*i+3];
1257     alpha5 = x[8*i+4];
1258     alpha6 = x[8*i+5];
1259     alpha7 = x[8*i+6];
1260     alpha8 = x[8*i+7];
1261     while (n-->0) {
1262       y[8*(*idx)]   += alpha1*(*v);
1263       y[8*(*idx)+1] += alpha2*(*v);
1264       y[8*(*idx)+2] += alpha3*(*v);
1265       y[8*(*idx)+3] += alpha4*(*v);
1266       y[8*(*idx)+4] += alpha5*(*v);
1267       y[8*(*idx)+5] += alpha6*(*v);
1268       y[8*(*idx)+6] += alpha7*(*v);
1269       y[8*(*idx)+7] += alpha8*(*v);
1270       idx++; v++;
1271     }
1272   }
1273   ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr);
1274   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1275   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1281 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1282 {
1283   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1284   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1285   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1286   PetscErrorCode ierr;
1287   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1288   PetscInt       n,i,jrow,j;
1289 
1290   PetscFunctionBegin;
1291   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1292   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1293   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1294   idx  = a->j;
1295   v    = a->a;
1296   ii   = a->i;
1297 
1298   for (i=0; i<m; i++) {
1299     jrow = ii[i];
1300     n    = ii[i+1] - jrow;
1301     sum1  = 0.0;
1302     sum2  = 0.0;
1303     sum3  = 0.0;
1304     sum4  = 0.0;
1305     sum5  = 0.0;
1306     sum6  = 0.0;
1307     sum7  = 0.0;
1308     sum8  = 0.0;
1309     for (j=0; j<n; j++) {
1310       sum1 += v[jrow]*x[8*idx[jrow]];
1311       sum2 += v[jrow]*x[8*idx[jrow]+1];
1312       sum3 += v[jrow]*x[8*idx[jrow]+2];
1313       sum4 += v[jrow]*x[8*idx[jrow]+3];
1314       sum5 += v[jrow]*x[8*idx[jrow]+4];
1315       sum6 += v[jrow]*x[8*idx[jrow]+5];
1316       sum7 += v[jrow]*x[8*idx[jrow]+6];
1317       sum8 += v[jrow]*x[8*idx[jrow]+7];
1318       jrow++;
1319      }
1320     y[8*i]   += sum1;
1321     y[8*i+1] += sum2;
1322     y[8*i+2] += sum3;
1323     y[8*i+3] += sum4;
1324     y[8*i+4] += sum5;
1325     y[8*i+5] += sum6;
1326     y[8*i+6] += sum7;
1327     y[8*i+7] += sum8;
1328   }
1329 
1330   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1331   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1332   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1338 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1339 {
1340   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1341   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1342   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1343   PetscErrorCode ierr;
1344   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1345 
1346   PetscFunctionBegin;
1347   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1348   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1349   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1350   for (i=0; i<m; i++) {
1351     idx    = a->j + a->i[i] ;
1352     v      = a->a + a->i[i] ;
1353     n      = a->i[i+1] - a->i[i];
1354     alpha1 = x[8*i];
1355     alpha2 = x[8*i+1];
1356     alpha3 = x[8*i+2];
1357     alpha4 = x[8*i+3];
1358     alpha5 = x[8*i+4];
1359     alpha6 = x[8*i+5];
1360     alpha7 = x[8*i+6];
1361     alpha8 = x[8*i+7];
1362     while (n-->0) {
1363       y[8*(*idx)]   += alpha1*(*v);
1364       y[8*(*idx)+1] += alpha2*(*v);
1365       y[8*(*idx)+2] += alpha3*(*v);
1366       y[8*(*idx)+3] += alpha4*(*v);
1367       y[8*(*idx)+4] += alpha5*(*v);
1368       y[8*(*idx)+5] += alpha6*(*v);
1369       y[8*(*idx)+6] += alpha7*(*v);
1370       y[8*(*idx)+7] += alpha8*(*v);
1371       idx++; v++;
1372     }
1373   }
1374   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1375   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1376   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 /* ------------------------------------------------------------------------------*/
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1383 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1384 {
1385   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1386   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1387   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1388   PetscErrorCode ierr;
1389   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1390   PetscInt       n,i,jrow,j;
1391 
1392   PetscFunctionBegin;
1393   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1394   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1395   idx  = a->j;
1396   v    = a->a;
1397   ii   = a->i;
1398 
1399   for (i=0; i<m; i++) {
1400     jrow = ii[i];
1401     n    = ii[i+1] - jrow;
1402     sum1  = 0.0;
1403     sum2  = 0.0;
1404     sum3  = 0.0;
1405     sum4  = 0.0;
1406     sum5  = 0.0;
1407     sum6  = 0.0;
1408     sum7  = 0.0;
1409     sum8  = 0.0;
1410     sum9  = 0.0;
1411     for (j=0; j<n; j++) {
1412       sum1 += v[jrow]*x[9*idx[jrow]];
1413       sum2 += v[jrow]*x[9*idx[jrow]+1];
1414       sum3 += v[jrow]*x[9*idx[jrow]+2];
1415       sum4 += v[jrow]*x[9*idx[jrow]+3];
1416       sum5 += v[jrow]*x[9*idx[jrow]+4];
1417       sum6 += v[jrow]*x[9*idx[jrow]+5];
1418       sum7 += v[jrow]*x[9*idx[jrow]+6];
1419       sum8 += v[jrow]*x[9*idx[jrow]+7];
1420       sum9 += v[jrow]*x[9*idx[jrow]+8];
1421       jrow++;
1422      }
1423     y[9*i]   = sum1;
1424     y[9*i+1] = sum2;
1425     y[9*i+2] = sum3;
1426     y[9*i+3] = sum4;
1427     y[9*i+4] = sum5;
1428     y[9*i+5] = sum6;
1429     y[9*i+6] = sum7;
1430     y[9*i+7] = sum8;
1431     y[9*i+8] = sum9;
1432   }
1433 
1434   ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr);
1435   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1436   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 /* ------------------------------------------------------------------------------*/
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1444 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1445 {
1446   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1447   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1448   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1449   PetscErrorCode ierr;
1450   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1451 
1452   PetscFunctionBegin;
1453   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1455   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1456 
1457   for (i=0; i<m; i++) {
1458     idx    = a->j + a->i[i] ;
1459     v      = a->a + a->i[i] ;
1460     n      = a->i[i+1] - a->i[i];
1461     alpha1 = x[9*i];
1462     alpha2 = x[9*i+1];
1463     alpha3 = x[9*i+2];
1464     alpha4 = x[9*i+3];
1465     alpha5 = x[9*i+4];
1466     alpha6 = x[9*i+5];
1467     alpha7 = x[9*i+6];
1468     alpha8 = x[9*i+7];
1469     alpha9 = x[9*i+8];
1470     while (n-->0) {
1471       y[9*(*idx)]   += alpha1*(*v);
1472       y[9*(*idx)+1] += alpha2*(*v);
1473       y[9*(*idx)+2] += alpha3*(*v);
1474       y[9*(*idx)+3] += alpha4*(*v);
1475       y[9*(*idx)+4] += alpha5*(*v);
1476       y[9*(*idx)+5] += alpha6*(*v);
1477       y[9*(*idx)+6] += alpha7*(*v);
1478       y[9*(*idx)+7] += alpha8*(*v);
1479       y[9*(*idx)+8] += alpha9*(*v);
1480       idx++; v++;
1481     }
1482   }
1483   ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr);
1484   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1485   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1491 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1492 {
1493   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1494   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1495   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1496   PetscErrorCode ierr;
1497   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1498   PetscInt       n,i,jrow,j;
1499 
1500   PetscFunctionBegin;
1501   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1502   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1503   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1504   idx  = a->j;
1505   v    = a->a;
1506   ii   = a->i;
1507 
1508   for (i=0; i<m; i++) {
1509     jrow = ii[i];
1510     n    = ii[i+1] - jrow;
1511     sum1  = 0.0;
1512     sum2  = 0.0;
1513     sum3  = 0.0;
1514     sum4  = 0.0;
1515     sum5  = 0.0;
1516     sum6  = 0.0;
1517     sum7  = 0.0;
1518     sum8  = 0.0;
1519     sum9  = 0.0;
1520     for (j=0; j<n; j++) {
1521       sum1 += v[jrow]*x[9*idx[jrow]];
1522       sum2 += v[jrow]*x[9*idx[jrow]+1];
1523       sum3 += v[jrow]*x[9*idx[jrow]+2];
1524       sum4 += v[jrow]*x[9*idx[jrow]+3];
1525       sum5 += v[jrow]*x[9*idx[jrow]+4];
1526       sum6 += v[jrow]*x[9*idx[jrow]+5];
1527       sum7 += v[jrow]*x[9*idx[jrow]+6];
1528       sum8 += v[jrow]*x[9*idx[jrow]+7];
1529       sum9 += v[jrow]*x[9*idx[jrow]+8];
1530       jrow++;
1531      }
1532     y[9*i]   += sum1;
1533     y[9*i+1] += sum2;
1534     y[9*i+2] += sum3;
1535     y[9*i+3] += sum4;
1536     y[9*i+4] += sum5;
1537     y[9*i+5] += sum6;
1538     y[9*i+6] += sum7;
1539     y[9*i+7] += sum8;
1540     y[9*i+8] += sum9;
1541   }
1542 
1543   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1544   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1545   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1551 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1552 {
1553   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1554   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1555   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1556   PetscErrorCode ierr;
1557   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1558 
1559   PetscFunctionBegin;
1560   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1561   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1562   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1563   for (i=0; i<m; i++) {
1564     idx    = a->j + a->i[i] ;
1565     v      = a->a + a->i[i] ;
1566     n      = a->i[i+1] - a->i[i];
1567     alpha1 = x[9*i];
1568     alpha2 = x[9*i+1];
1569     alpha3 = x[9*i+2];
1570     alpha4 = x[9*i+3];
1571     alpha5 = x[9*i+4];
1572     alpha6 = x[9*i+5];
1573     alpha7 = x[9*i+6];
1574     alpha8 = x[9*i+7];
1575     alpha9 = x[9*i+8];
1576     while (n-->0) {
1577       y[9*(*idx)]   += alpha1*(*v);
1578       y[9*(*idx)+1] += alpha2*(*v);
1579       y[9*(*idx)+2] += alpha3*(*v);
1580       y[9*(*idx)+3] += alpha4*(*v);
1581       y[9*(*idx)+4] += alpha5*(*v);
1582       y[9*(*idx)+5] += alpha6*(*v);
1583       y[9*(*idx)+6] += alpha7*(*v);
1584       y[9*(*idx)+7] += alpha8*(*v);
1585       y[9*(*idx)+8] += alpha9*(*v);
1586       idx++; v++;
1587     }
1588   }
1589   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1590   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1591   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1592   PetscFunctionReturn(0);
1593 }
1594 /*--------------------------------------------------------------------------------------------*/
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1597 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1598 {
1599   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1600   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1601   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1602   PetscErrorCode ierr;
1603   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1604   PetscInt       n,i,jrow,j;
1605 
1606   PetscFunctionBegin;
1607   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1608   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1609   idx  = a->j;
1610   v    = a->a;
1611   ii   = a->i;
1612 
1613   for (i=0; i<m; i++) {
1614     jrow = ii[i];
1615     n    = ii[i+1] - jrow;
1616     sum1  = 0.0;
1617     sum2  = 0.0;
1618     sum3  = 0.0;
1619     sum4  = 0.0;
1620     sum5  = 0.0;
1621     sum6  = 0.0;
1622     sum7  = 0.0;
1623     sum8  = 0.0;
1624     sum9  = 0.0;
1625     sum10 = 0.0;
1626     for (j=0; j<n; j++) {
1627       sum1  += v[jrow]*x[10*idx[jrow]];
1628       sum2  += v[jrow]*x[10*idx[jrow]+1];
1629       sum3  += v[jrow]*x[10*idx[jrow]+2];
1630       sum4  += v[jrow]*x[10*idx[jrow]+3];
1631       sum5  += v[jrow]*x[10*idx[jrow]+4];
1632       sum6  += v[jrow]*x[10*idx[jrow]+5];
1633       sum7  += v[jrow]*x[10*idx[jrow]+6];
1634       sum8  += v[jrow]*x[10*idx[jrow]+7];
1635       sum9  += v[jrow]*x[10*idx[jrow]+8];
1636       sum10 += v[jrow]*x[10*idx[jrow]+9];
1637       jrow++;
1638      }
1639     y[10*i]   = sum1;
1640     y[10*i+1] = sum2;
1641     y[10*i+2] = sum3;
1642     y[10*i+3] = sum4;
1643     y[10*i+4] = sum5;
1644     y[10*i+5] = sum6;
1645     y[10*i+6] = sum7;
1646     y[10*i+7] = sum8;
1647     y[10*i+8] = sum9;
1648     y[10*i+9] = sum10;
1649   }
1650 
1651   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1652   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1653   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1659 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1660 {
1661   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1662   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1663   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1664   PetscErrorCode ierr;
1665   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1666   PetscInt       n,i,jrow,j;
1667 
1668   PetscFunctionBegin;
1669   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1670   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1671   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1672   idx  = a->j;
1673   v    = a->a;
1674   ii   = a->i;
1675 
1676   for (i=0; i<m; i++) {
1677     jrow = ii[i];
1678     n    = ii[i+1] - jrow;
1679     sum1  = 0.0;
1680     sum2  = 0.0;
1681     sum3  = 0.0;
1682     sum4  = 0.0;
1683     sum5  = 0.0;
1684     sum6  = 0.0;
1685     sum7  = 0.0;
1686     sum8  = 0.0;
1687     sum9  = 0.0;
1688     sum10 = 0.0;
1689     for (j=0; j<n; j++) {
1690       sum1  += v[jrow]*x[10*idx[jrow]];
1691       sum2  += v[jrow]*x[10*idx[jrow]+1];
1692       sum3  += v[jrow]*x[10*idx[jrow]+2];
1693       sum4  += v[jrow]*x[10*idx[jrow]+3];
1694       sum5  += v[jrow]*x[10*idx[jrow]+4];
1695       sum6  += v[jrow]*x[10*idx[jrow]+5];
1696       sum7  += v[jrow]*x[10*idx[jrow]+6];
1697       sum8  += v[jrow]*x[10*idx[jrow]+7];
1698       sum9  += v[jrow]*x[10*idx[jrow]+8];
1699       sum10 += v[jrow]*x[10*idx[jrow]+9];
1700       jrow++;
1701      }
1702     y[10*i]   += sum1;
1703     y[10*i+1] += sum2;
1704     y[10*i+2] += sum3;
1705     y[10*i+3] += sum4;
1706     y[10*i+4] += sum5;
1707     y[10*i+5] += sum6;
1708     y[10*i+6] += sum7;
1709     y[10*i+7] += sum8;
1710     y[10*i+8] += sum9;
1711     y[10*i+9] += sum10;
1712   }
1713 
1714   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1715   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1716   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1722 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1723 {
1724   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1725   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1726   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1727   PetscErrorCode ierr;
1728   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1729 
1730   PetscFunctionBegin;
1731   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1732   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1733   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1734 
1735   for (i=0; i<m; i++) {
1736     idx    = a->j + a->i[i] ;
1737     v      = a->a + a->i[i] ;
1738     n      = a->i[i+1] - a->i[i];
1739     alpha1 = x[10*i];
1740     alpha2 = x[10*i+1];
1741     alpha3 = x[10*i+2];
1742     alpha4 = x[10*i+3];
1743     alpha5 = x[10*i+4];
1744     alpha6 = x[10*i+5];
1745     alpha7 = x[10*i+6];
1746     alpha8 = x[10*i+7];
1747     alpha9 = x[10*i+8];
1748     alpha10 = x[10*i+9];
1749     while (n-->0) {
1750       y[10*(*idx)]   += alpha1*(*v);
1751       y[10*(*idx)+1] += alpha2*(*v);
1752       y[10*(*idx)+2] += alpha3*(*v);
1753       y[10*(*idx)+3] += alpha4*(*v);
1754       y[10*(*idx)+4] += alpha5*(*v);
1755       y[10*(*idx)+5] += alpha6*(*v);
1756       y[10*(*idx)+6] += alpha7*(*v);
1757       y[10*(*idx)+7] += alpha8*(*v);
1758       y[10*(*idx)+8] += alpha9*(*v);
1759       y[10*(*idx)+9] += alpha10*(*v);
1760       idx++; v++;
1761     }
1762   }
1763   ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr);
1764   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1765   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1771 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1772 {
1773   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1774   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1775   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1776   PetscErrorCode ierr;
1777   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1778 
1779   PetscFunctionBegin;
1780   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1781   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1782   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1783   for (i=0; i<m; i++) {
1784     idx    = a->j + a->i[i] ;
1785     v      = a->a + a->i[i] ;
1786     n      = a->i[i+1] - a->i[i];
1787     alpha1 = x[10*i];
1788     alpha2 = x[10*i+1];
1789     alpha3 = x[10*i+2];
1790     alpha4 = x[10*i+3];
1791     alpha5 = x[10*i+4];
1792     alpha6 = x[10*i+5];
1793     alpha7 = x[10*i+6];
1794     alpha8 = x[10*i+7];
1795     alpha9 = x[10*i+8];
1796     alpha10 = x[10*i+9];
1797     while (n-->0) {
1798       y[10*(*idx)]   += alpha1*(*v);
1799       y[10*(*idx)+1] += alpha2*(*v);
1800       y[10*(*idx)+2] += alpha3*(*v);
1801       y[10*(*idx)+3] += alpha4*(*v);
1802       y[10*(*idx)+4] += alpha5*(*v);
1803       y[10*(*idx)+5] += alpha6*(*v);
1804       y[10*(*idx)+6] += alpha7*(*v);
1805       y[10*(*idx)+7] += alpha8*(*v);
1806       y[10*(*idx)+8] += alpha9*(*v);
1807       y[10*(*idx)+9] += alpha10*(*v);
1808       idx++; v++;
1809     }
1810   }
1811   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1812   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1813   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 
1818 /*--------------------------------------------------------------------------------------------*/
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1821 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1822 {
1823   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1824   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1825   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1826   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1827   PetscErrorCode ierr;
1828   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1829   PetscInt       n,i,jrow,j;
1830 
1831   PetscFunctionBegin;
1832   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1833   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1834   idx  = a->j;
1835   v    = a->a;
1836   ii   = a->i;
1837 
1838   for (i=0; i<m; i++) {
1839     jrow = ii[i];
1840     n    = ii[i+1] - jrow;
1841     sum1  = 0.0;
1842     sum2  = 0.0;
1843     sum3  = 0.0;
1844     sum4  = 0.0;
1845     sum5  = 0.0;
1846     sum6  = 0.0;
1847     sum7  = 0.0;
1848     sum8  = 0.0;
1849     sum9  = 0.0;
1850     sum10 = 0.0;
1851     sum11 = 0.0;
1852     sum12 = 0.0;
1853     sum13 = 0.0;
1854     sum14 = 0.0;
1855     sum15 = 0.0;
1856     sum16 = 0.0;
1857     for (j=0; j<n; j++) {
1858       sum1  += v[jrow]*x[16*idx[jrow]];
1859       sum2  += v[jrow]*x[16*idx[jrow]+1];
1860       sum3  += v[jrow]*x[16*idx[jrow]+2];
1861       sum4  += v[jrow]*x[16*idx[jrow]+3];
1862       sum5  += v[jrow]*x[16*idx[jrow]+4];
1863       sum6  += v[jrow]*x[16*idx[jrow]+5];
1864       sum7  += v[jrow]*x[16*idx[jrow]+6];
1865       sum8  += v[jrow]*x[16*idx[jrow]+7];
1866       sum9  += v[jrow]*x[16*idx[jrow]+8];
1867       sum10 += v[jrow]*x[16*idx[jrow]+9];
1868       sum11 += v[jrow]*x[16*idx[jrow]+10];
1869       sum12 += v[jrow]*x[16*idx[jrow]+11];
1870       sum13 += v[jrow]*x[16*idx[jrow]+12];
1871       sum14 += v[jrow]*x[16*idx[jrow]+13];
1872       sum15 += v[jrow]*x[16*idx[jrow]+14];
1873       sum16 += v[jrow]*x[16*idx[jrow]+15];
1874       jrow++;
1875      }
1876     y[16*i]    = sum1;
1877     y[16*i+1]  = sum2;
1878     y[16*i+2]  = sum3;
1879     y[16*i+3]  = sum4;
1880     y[16*i+4]  = sum5;
1881     y[16*i+5]  = sum6;
1882     y[16*i+6]  = sum7;
1883     y[16*i+7]  = sum8;
1884     y[16*i+8]  = sum9;
1885     y[16*i+9]  = sum10;
1886     y[16*i+10] = sum11;
1887     y[16*i+11] = sum12;
1888     y[16*i+12] = sum13;
1889     y[16*i+13] = sum14;
1890     y[16*i+14] = sum15;
1891     y[16*i+15] = sum16;
1892   }
1893 
1894   ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr);
1895   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1896   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1902 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1903 {
1904   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1905   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1906   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1907   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1908   PetscErrorCode ierr;
1909   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1910 
1911   PetscFunctionBegin;
1912   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1913   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1914   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1915 
1916   for (i=0; i<m; i++) {
1917     idx    = a->j + a->i[i] ;
1918     v      = a->a + a->i[i] ;
1919     n      = a->i[i+1] - a->i[i];
1920     alpha1  = x[16*i];
1921     alpha2  = x[16*i+1];
1922     alpha3  = x[16*i+2];
1923     alpha4  = x[16*i+3];
1924     alpha5  = x[16*i+4];
1925     alpha6  = x[16*i+5];
1926     alpha7  = x[16*i+6];
1927     alpha8  = x[16*i+7];
1928     alpha9  = x[16*i+8];
1929     alpha10 = x[16*i+9];
1930     alpha11 = x[16*i+10];
1931     alpha12 = x[16*i+11];
1932     alpha13 = x[16*i+12];
1933     alpha14 = x[16*i+13];
1934     alpha15 = x[16*i+14];
1935     alpha16 = x[16*i+15];
1936     while (n-->0) {
1937       y[16*(*idx)]    += alpha1*(*v);
1938       y[16*(*idx)+1]  += alpha2*(*v);
1939       y[16*(*idx)+2]  += alpha3*(*v);
1940       y[16*(*idx)+3]  += alpha4*(*v);
1941       y[16*(*idx)+4]  += alpha5*(*v);
1942       y[16*(*idx)+5]  += alpha6*(*v);
1943       y[16*(*idx)+6]  += alpha7*(*v);
1944       y[16*(*idx)+7]  += alpha8*(*v);
1945       y[16*(*idx)+8]  += alpha9*(*v);
1946       y[16*(*idx)+9]  += alpha10*(*v);
1947       y[16*(*idx)+10] += alpha11*(*v);
1948       y[16*(*idx)+11] += alpha12*(*v);
1949       y[16*(*idx)+12] += alpha13*(*v);
1950       y[16*(*idx)+13] += alpha14*(*v);
1951       y[16*(*idx)+14] += alpha15*(*v);
1952       y[16*(*idx)+15] += alpha16*(*v);
1953       idx++; v++;
1954     }
1955   }
1956   ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr);
1957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1958   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1964 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1965 {
1966   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1967   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1968   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1969   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1970   PetscErrorCode ierr;
1971   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1972   PetscInt       n,i,jrow,j;
1973 
1974   PetscFunctionBegin;
1975   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1976   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1977   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1978   idx  = a->j;
1979   v    = a->a;
1980   ii   = a->i;
1981 
1982   for (i=0; i<m; i++) {
1983     jrow = ii[i];
1984     n    = ii[i+1] - jrow;
1985     sum1  = 0.0;
1986     sum2  = 0.0;
1987     sum3  = 0.0;
1988     sum4  = 0.0;
1989     sum5  = 0.0;
1990     sum6  = 0.0;
1991     sum7  = 0.0;
1992     sum8  = 0.0;
1993     sum9  = 0.0;
1994     sum10 = 0.0;
1995     sum11 = 0.0;
1996     sum12 = 0.0;
1997     sum13 = 0.0;
1998     sum14 = 0.0;
1999     sum15 = 0.0;
2000     sum16 = 0.0;
2001     for (j=0; j<n; j++) {
2002       sum1  += v[jrow]*x[16*idx[jrow]];
2003       sum2  += v[jrow]*x[16*idx[jrow]+1];
2004       sum3  += v[jrow]*x[16*idx[jrow]+2];
2005       sum4  += v[jrow]*x[16*idx[jrow]+3];
2006       sum5  += v[jrow]*x[16*idx[jrow]+4];
2007       sum6  += v[jrow]*x[16*idx[jrow]+5];
2008       sum7  += v[jrow]*x[16*idx[jrow]+6];
2009       sum8  += v[jrow]*x[16*idx[jrow]+7];
2010       sum9  += v[jrow]*x[16*idx[jrow]+8];
2011       sum10 += v[jrow]*x[16*idx[jrow]+9];
2012       sum11 += v[jrow]*x[16*idx[jrow]+10];
2013       sum12 += v[jrow]*x[16*idx[jrow]+11];
2014       sum13 += v[jrow]*x[16*idx[jrow]+12];
2015       sum14 += v[jrow]*x[16*idx[jrow]+13];
2016       sum15 += v[jrow]*x[16*idx[jrow]+14];
2017       sum16 += v[jrow]*x[16*idx[jrow]+15];
2018       jrow++;
2019      }
2020     y[16*i]    += sum1;
2021     y[16*i+1]  += sum2;
2022     y[16*i+2]  += sum3;
2023     y[16*i+3]  += sum4;
2024     y[16*i+4]  += sum5;
2025     y[16*i+5]  += sum6;
2026     y[16*i+6]  += sum7;
2027     y[16*i+7]  += sum8;
2028     y[16*i+8]  += sum9;
2029     y[16*i+9]  += sum10;
2030     y[16*i+10] += sum11;
2031     y[16*i+11] += sum12;
2032     y[16*i+12] += sum13;
2033     y[16*i+13] += sum14;
2034     y[16*i+14] += sum15;
2035     y[16*i+15] += sum16;
2036   }
2037 
2038   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2039   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2040   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2046 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2047 {
2048   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2049   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2050   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2051   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2052   PetscErrorCode ierr;
2053   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2054 
2055   PetscFunctionBegin;
2056   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2057   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2058   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2059   for (i=0; i<m; i++) {
2060     idx    = a->j + a->i[i] ;
2061     v      = a->a + a->i[i] ;
2062     n      = a->i[i+1] - a->i[i];
2063     alpha1 = x[16*i];
2064     alpha2 = x[16*i+1];
2065     alpha3 = x[16*i+2];
2066     alpha4 = x[16*i+3];
2067     alpha5 = x[16*i+4];
2068     alpha6 = x[16*i+5];
2069     alpha7 = x[16*i+6];
2070     alpha8 = x[16*i+7];
2071     alpha9  = x[16*i+8];
2072     alpha10 = x[16*i+9];
2073     alpha11 = x[16*i+10];
2074     alpha12 = x[16*i+11];
2075     alpha13 = x[16*i+12];
2076     alpha14 = x[16*i+13];
2077     alpha15 = x[16*i+14];
2078     alpha16 = x[16*i+15];
2079     while (n-->0) {
2080       y[16*(*idx)]   += alpha1*(*v);
2081       y[16*(*idx)+1] += alpha2*(*v);
2082       y[16*(*idx)+2] += alpha3*(*v);
2083       y[16*(*idx)+3] += alpha4*(*v);
2084       y[16*(*idx)+4] += alpha5*(*v);
2085       y[16*(*idx)+5] += alpha6*(*v);
2086       y[16*(*idx)+6] += alpha7*(*v);
2087       y[16*(*idx)+7] += alpha8*(*v);
2088       y[16*(*idx)+8]  += alpha9*(*v);
2089       y[16*(*idx)+9]  += alpha10*(*v);
2090       y[16*(*idx)+10] += alpha11*(*v);
2091       y[16*(*idx)+11] += alpha12*(*v);
2092       y[16*(*idx)+12] += alpha13*(*v);
2093       y[16*(*idx)+13] += alpha14*(*v);
2094       y[16*(*idx)+14] += alpha15*(*v);
2095       y[16*(*idx)+15] += alpha16*(*v);
2096       idx++; v++;
2097     }
2098   }
2099   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2100   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2101   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /*===================================================================================*/
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2108 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2109 {
2110   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2111   PetscErrorCode ierr;
2112 
2113   PetscFunctionBegin;
2114   /* start the scatter */
2115   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2116   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2117   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2118   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2124 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2125 {
2126   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2131   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2132   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2133   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2139 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2140 {
2141   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   /* start the scatter */
2146   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2147   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2148   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2149   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2155 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2156 {
2157   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2162   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2163   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2164   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2170 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2171 {
2172   /* This routine requires testing -- but it's getting better. */
2173   PetscErrorCode     ierr;
2174   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2175   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2176   Mat                P=pp->AIJ;
2177   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2178   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2179   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2180   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2181   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2182   MatScalar          *ca;
2183 
2184   PetscFunctionBegin;
2185   /* Start timer */
2186   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2187 
2188   /* Get ij structure of P^T */
2189   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2190 
2191   cn = pn*ppdof;
2192   /* Allocate ci array, arrays for fill computation and */
2193   /* free space for accumulating nonzero column info */
2194   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2195   ci[0] = 0;
2196 
2197   /* Work arrays for rows of P^T*A */
2198   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
2199   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
2200   ptasparserow = ptadenserow  + an;
2201   denserow     = ptasparserow + an;
2202   sparserow    = denserow     + cn;
2203 
2204   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2205   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2206   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2207   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2208   current_space = free_space;
2209 
2210   /* Determine symbolic info for each row of C: */
2211   for (i=0;i<pn;i++) {
2212     ptnzi  = pti[i+1] - pti[i];
2213     ptJ    = ptj + pti[i];
2214     for (dof=0;dof<ppdof;dof++) {
2215       ptanzi = 0;
2216       /* Determine symbolic row of PtA: */
2217       for (j=0;j<ptnzi;j++) {
2218         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2219         arow = ptJ[j]*ppdof + dof;
2220         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2221         anzj = ai[arow+1] - ai[arow];
2222         ajj  = aj + ai[arow];
2223         for (k=0;k<anzj;k++) {
2224           if (!ptadenserow[ajj[k]]) {
2225             ptadenserow[ajj[k]]    = -1;
2226             ptasparserow[ptanzi++] = ajj[k];
2227           }
2228         }
2229       }
2230       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2231       ptaj = ptasparserow;
2232       cnzi   = 0;
2233       for (j=0;j<ptanzi;j++) {
2234         /* Get offset within block of P */
2235         pshift = *ptaj%ppdof;
2236         /* Get block row of P */
2237         prow = (*ptaj++)/ppdof; /* integer division */
2238         /* P has same number of nonzeros per row as the compressed form */
2239         pnzj = pi[prow+1] - pi[prow];
2240         pjj  = pj + pi[prow];
2241         for (k=0;k<pnzj;k++) {
2242           /* Locations in C are shifted by the offset within the block */
2243           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2244           if (!denserow[pjj[k]*ppdof+pshift]) {
2245             denserow[pjj[k]*ppdof+pshift] = -1;
2246             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2247           }
2248         }
2249       }
2250 
2251       /* sort sparserow */
2252       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2253 
2254       /* If free space is not available, make more free space */
2255       /* Double the amount of total space in the list */
2256       if (current_space->local_remaining<cnzi) {
2257         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
2258       }
2259 
2260       /* Copy data into free space, and zero out denserows */
2261       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2262       current_space->array           += cnzi;
2263       current_space->local_used      += cnzi;
2264       current_space->local_remaining -= cnzi;
2265 
2266       for (j=0;j<ptanzi;j++) {
2267         ptadenserow[ptasparserow[j]] = 0;
2268       }
2269       for (j=0;j<cnzi;j++) {
2270         denserow[sparserow[j]] = 0;
2271       }
2272       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2273       /*        For now, we will recompute what is needed. */
2274       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2275     }
2276   }
2277   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2278   /* Allocate space for cj, initialize cj, and */
2279   /* destroy list of free space and other temporary array(s) */
2280   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2281   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2282   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
2283 
2284   /* Allocate space for ca */
2285   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
2286   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
2287 
2288   /* put together the new matrix */
2289   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2290 
2291   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2292   /* Since these are PETSc arrays, change flags to free them as necessary. */
2293   c          = (Mat_SeqAIJ *)((*C)->data);
2294   c->free_a  = PETSC_TRUE;
2295   c->free_ij = PETSC_TRUE;
2296   c->nonew   = 0;
2297 
2298   /* Clean up. */
2299   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2300 
2301   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
2307 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2308 {
2309   /* This routine requires testing -- first draft only */
2310   PetscErrorCode ierr;
2311   PetscInt       flops=0;
2312   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2313   Mat            P=pp->AIJ;
2314   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2315   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2316   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2317   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2318   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2319   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2320   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2321   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2322 
2323   PetscFunctionBegin;
2324   /* Allocate temporary array for storage of one row of A*P */
2325   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
2326   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
2327 
2328   apj      = (PetscInt *)(apa + cn);
2329   apjdense = apj + cn;
2330 
2331   /* Clear old values in C */
2332   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
2333 
2334   for (i=0;i<am;i++) {
2335     /* Form sparse row of A*P */
2336     anzi  = ai[i+1] - ai[i];
2337     apnzj = 0;
2338     for (j=0;j<anzi;j++) {
2339       /* Get offset within block of P */
2340       pshift = *aj%ppdof;
2341       /* Get block row of P */
2342       prow   = *aj++/ppdof; /* integer division */
2343       pnzj = pi[prow+1] - pi[prow];
2344       pjj  = pj + pi[prow];
2345       paj  = pa + pi[prow];
2346       for (k=0;k<pnzj;k++) {
2347         poffset = pjj[k]*ppdof+pshift;
2348         if (!apjdense[poffset]) {
2349           apjdense[poffset] = -1;
2350           apj[apnzj++]      = poffset;
2351         }
2352         apa[poffset] += (*aa)*paj[k];
2353       }
2354       flops += 2*pnzj;
2355       aa++;
2356     }
2357 
2358     /* Sort the j index array for quick sparse axpy. */
2359     /* Note: a array does not need sorting as it is in dense storage locations. */
2360     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
2361 
2362     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2363     prow    = i/ppdof; /* integer division */
2364     pshift  = i%ppdof;
2365     poffset = pi[prow];
2366     pnzi = pi[prow+1] - poffset;
2367     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2368     pJ   = pj+poffset;
2369     pA   = pa+poffset;
2370     for (j=0;j<pnzi;j++) {
2371       crow   = (*pJ)*ppdof+pshift;
2372       cjj    = cj + ci[crow];
2373       caj    = ca + ci[crow];
2374       pJ++;
2375       /* Perform sparse axpy operation.  Note cjj includes apj. */
2376       for (k=0,nextap=0;nextap<apnzj;k++) {
2377         if (cjj[k]==apj[nextap]) {
2378           caj[k] += (*pA)*apa[apj[nextap++]];
2379         }
2380       }
2381       flops += 2*apnzj;
2382       pA++;
2383     }
2384 
2385     /* Zero the current row info for A*P */
2386     for (j=0;j<apnzj;j++) {
2387       apa[apj[j]]      = 0.;
2388       apjdense[apj[j]] = 0;
2389     }
2390   }
2391 
2392   /* Assemble the final matrix and clean up */
2393   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2395   ierr = PetscFree(apa);CHKERRQ(ierr);
2396   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
2397 
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 EXTERN_C_BEGIN
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2404 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2405 {
2406   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2407   Mat               a = b->AIJ,B;
2408   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2409   PetscErrorCode    ierr;
2410   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2411   PetscInt          *cols;
2412   PetscScalar       *vals;
2413 
2414   PetscFunctionBegin;
2415   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
2416   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
2417   for (i=0; i<m; i++) {
2418     nmax = PetscMax(nmax,aij->ilen[i]);
2419     for (j=0; j<dof; j++) {
2420       ilen[dof*i+j] = aij->ilen[i];
2421     }
2422   }
2423   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2424   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2425   ierr = PetscFree(ilen);CHKERRQ(ierr);
2426   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
2427   ii   = 0;
2428   for (i=0; i<m; i++) {
2429     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2430     for (j=0; j<dof; j++) {
2431       for (k=0; k<ncols; k++) {
2432         icols[k] = dof*cols[k]+j;
2433       }
2434       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2435       ii++;
2436     }
2437     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2438   }
2439   ierr = PetscFree(icols);CHKERRQ(ierr);
2440   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2441   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2442 
2443   if (reuse == MAT_REUSE_MATRIX) {
2444     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2445   } else {
2446     *newmat = B;
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 EXTERN_C_END
2451 
2452 #include "src/mat/impls/aij/mpi/mpiaij.h"
2453 
2454 EXTERN_C_BEGIN
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2457 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2458 {
2459   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2460   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2461   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2462   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2463   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2464   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2465   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2466   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2467   PetscInt          rstart,cstart,*garray,ii,k;
2468   PetscErrorCode    ierr;
2469   PetscScalar       *vals,*ovals;
2470 
2471   PetscFunctionBegin;
2472   ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr);
2473   for (i=0; i<A->rmap.n/dof; i++) {
2474     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2475     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2476     for (j=0; j<dof; j++) {
2477       dnz[dof*i+j] = AIJ->ilen[i];
2478       onz[dof*i+j] = OAIJ->ilen[i];
2479     }
2480   }
2481   ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2482   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2483   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2484 
2485   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2486   rstart = dof*maij->A->rmap.rstart;
2487   cstart = dof*maij->A->cmap.rstart;
2488   garray = mpiaij->garray;
2489 
2490   ii = rstart;
2491   for (i=0; i<A->rmap.n/dof; i++) {
2492     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2493     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2494     for (j=0; j<dof; j++) {
2495       for (k=0; k<ncols; k++) {
2496         icols[k] = cstart + dof*cols[k]+j;
2497       }
2498       for (k=0; k<oncols; k++) {
2499         oicols[k] = dof*garray[ocols[k]]+j;
2500       }
2501       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2502       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2503       ii++;
2504     }
2505     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2506     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2507   }
2508   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2509 
2510   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2511   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2512 
2513   if (reuse == MAT_REUSE_MATRIX) {
2514     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2515   } else {
2516     *newmat = B;
2517   }
2518   PetscFunctionReturn(0);
2519 }
2520 EXTERN_C_END
2521 
2522 
2523 /* ---------------------------------------------------------------------------------- */
2524 /*MC
2525   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2526   operations for multicomponent problems.  It interpolates each component the same
2527   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2528   and MATMPIAIJ for distributed matrices.
2529 
2530   Operations provided:
2531 + MatMult
2532 . MatMultTranspose
2533 . MatMultAdd
2534 . MatMultTransposeAdd
2535 - MatView
2536 
2537   Level: advanced
2538 
2539 M*/
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatCreateMAIJ"
2542 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2543 {
2544   PetscErrorCode ierr;
2545   PetscMPIInt    size;
2546   PetscInt       n;
2547   Mat_MPIMAIJ    *b;
2548   Mat            B;
2549 
2550   PetscFunctionBegin;
2551   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2552 
2553   if (dof == 1) {
2554     *maij = A;
2555   } else {
2556     ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
2557     ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr);
2558     B->assembled    = PETSC_TRUE;
2559 
2560     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2561     if (size == 1) {
2562       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2563       B->ops->destroy = MatDestroy_SeqMAIJ;
2564       B->ops->view    = MatView_SeqMAIJ;
2565       b      = (Mat_MPIMAIJ*)B->data;
2566       b->dof = dof;
2567       b->AIJ = A;
2568       if (dof == 2) {
2569         B->ops->mult             = MatMult_SeqMAIJ_2;
2570         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2571         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2572         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2573       } else if (dof == 3) {
2574         B->ops->mult             = MatMult_SeqMAIJ_3;
2575         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2576         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2577         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2578       } else if (dof == 4) {
2579         B->ops->mult             = MatMult_SeqMAIJ_4;
2580         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2581         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2582         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2583       } else if (dof == 5) {
2584         B->ops->mult             = MatMult_SeqMAIJ_5;
2585         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2586         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2587         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2588       } else if (dof == 6) {
2589         B->ops->mult             = MatMult_SeqMAIJ_6;
2590         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2591         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2592         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2593       } else if (dof == 7) {
2594         B->ops->mult             = MatMult_SeqMAIJ_7;
2595         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2596         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2597         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2598       } else if (dof == 8) {
2599         B->ops->mult             = MatMult_SeqMAIJ_8;
2600         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2601         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2602         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2603       } else if (dof == 9) {
2604         B->ops->mult             = MatMult_SeqMAIJ_9;
2605         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2606         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2607         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2608       } else if (dof == 10) {
2609         B->ops->mult             = MatMult_SeqMAIJ_10;
2610         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2611         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2612         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2613       } else if (dof == 16) {
2614         B->ops->mult             = MatMult_SeqMAIJ_16;
2615         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2616         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2617         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2618       } else {
2619         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2620       }
2621       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2622       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2623       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2624     } else {
2625       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2626       IS         from,to;
2627       Vec        gvec;
2628       PetscInt   *garray,i;
2629 
2630       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2631       B->ops->destroy = MatDestroy_MPIMAIJ;
2632       B->ops->view    = MatView_MPIMAIJ;
2633       b      = (Mat_MPIMAIJ*)B->data;
2634       b->dof = dof;
2635       b->A   = A;
2636       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
2637       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
2638 
2639       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2640       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
2641       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2642 
2643       /* create two temporary Index sets for build scatter gather */
2644       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2645       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2646       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2647       ierr = PetscFree(garray);CHKERRQ(ierr);
2648       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2649 
2650       /* create temporary global vector to generate scatter context */
2651       ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr);
2652       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2653 
2654       /* generate the scatter context */
2655       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2656 
2657       ierr = ISDestroy(from);CHKERRQ(ierr);
2658       ierr = ISDestroy(to);CHKERRQ(ierr);
2659       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2660 
2661       B->ops->mult             = MatMult_MPIMAIJ_dof;
2662       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2663       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2664       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2665       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2666     }
2667     *maij = B;
2668     ierr = MatView_Private(B);CHKERRQ(ierr);
2669   }
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 
2674 
2675 
2676 
2677 
2678 
2679 
2680 
2681 
2682 
2683 
2684