xref: /petsc/src/mat/impls/maij/maij.c (revision b087b6d67c111367ed6b45cf2d6309e4e2fcfd8d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the MAIJ  matrix storage format.
5   This format is used for restriction and interpolation operations for
6   multicomponent problems. It interpolates each component the same way
7   independently.
8 
9      We provide:
10          MatMult()
11          MatMultTranspose()
12          MatMultTransposeAdd()
13          MatMultAdd()
14           and
15          MatCreateMAIJ(Mat,dof,Mat*)
16 
17      This single directory handles both the sequential and parallel codes
18 */
19 
20 #include "src/mat/impls/maij/maij.h"
21 #include "src/mat/utils/freespace.h"
22 #include "private/vecimpl.h"
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatMAIJGetAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27 {
28   PetscErrorCode ierr;
29   PetscTruth     ismpimaij,isseqmaij;
30 
31   PetscFunctionBegin;
32   ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr);
33   ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr);
34   if (ismpimaij) {
35     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
36 
37     *B = b->A;
38   } else if (isseqmaij) {
39     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
40 
41     *B = b->AIJ;
42   } else {
43     *B = A;
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMAIJRedimension"
50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51 {
52   PetscErrorCode ierr;
53   Mat            Aij;
54 
55   PetscFunctionBegin;
56   ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr);
57   ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqMAIJ"
63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
67 
68   PetscFunctionBegin;
69   if (b->AIJ) {
70     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
71   }
72   ierr = PetscFree(b);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatView_SeqMAIJ"
78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79 {
80   PetscErrorCode ierr;
81   Mat            B;
82 
83   PetscFunctionBegin;
84   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85   ierr = MatView(B,viewer);CHKERRQ(ierr);
86   ierr = MatDestroy(B);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatView_MPIMAIJ"
92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93 {
94   PetscErrorCode ierr;
95   Mat            B;
96 
97   PetscFunctionBegin;
98   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99   ierr = MatView(B,viewer);CHKERRQ(ierr);
100   ierr = MatDestroy(B);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatDestroy_MPIMAIJ"
106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107 {
108   PetscErrorCode ierr;
109   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
110 
111   PetscFunctionBegin;
112   if (b->AIJ) {
113     ierr = MatDestroy(b->AIJ);CHKERRQ(ierr);
114   }
115   if (b->OAIJ) {
116     ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr);
117   }
118   if (b->A) {
119     ierr = MatDestroy(b->A);CHKERRQ(ierr);
120   }
121   if (b->ctx) {
122     ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr);
123   }
124   if (b->w) {
125     ierr = VecDestroy(b->w);CHKERRQ(ierr);
126   }
127   ierr = PetscFree(b);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /*MC
132   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
133   multicomponent problems, interpolating or restricting each component the same way independently.
134   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
135 
136   Operations provided:
137 . MatMult
138 . MatMultTranspose
139 . MatMultAdd
140 . MatMultTransposeAdd
141 
142   Level: advanced
143 
144 .seealso: MatCreateSeqDense
145 M*/
146 
147 EXTERN_C_BEGIN
148 #undef __FUNCT__
149 #define __FUNCT__ "MatCreate_MAIJ"
150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIMAIJ    *b;
154   PetscMPIInt    size;
155 
156   PetscFunctionBegin;
157   ierr     = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr);
158   A->data  = (void*)b;
159   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
160   A->factor           = 0;
161   A->mapping          = 0;
162 
163   b->AIJ  = 0;
164   b->dof  = 0;
165   b->OAIJ = 0;
166   b->ctx  = 0;
167   b->w    = 0;
168   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
169   if (size == 1){
170     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr);
171   } else {
172     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr);
173   }
174   PetscFunctionReturn(0);
175 }
176 EXTERN_C_END
177 
178 /* --------------------------------------------------------------------------------------*/
179 #undef __FUNCT__
180 #define __FUNCT__ "MatMult_SeqMAIJ_2"
181 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182 {
183   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
184   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
185   PetscScalar    *x,*y,*v,sum1, sum2;
186   PetscErrorCode ierr;
187   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
188   PetscInt       n,i,jrow,j;
189 
190   PetscFunctionBegin;
191   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
192   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
193   idx  = a->j;
194   v    = a->a;
195   ii   = a->i;
196 
197   for (i=0; i<m; i++) {
198     jrow = ii[i];
199     n    = ii[i+1] - jrow;
200     sum1  = 0.0;
201     sum2  = 0.0;
202     for (j=0; j<n; j++) {
203       sum1 += v[jrow]*x[2*idx[jrow]];
204       sum2 += v[jrow]*x[2*idx[jrow]+1];
205       jrow++;
206      }
207     y[2*i]   = sum1;
208     y[2*i+1] = sum2;
209   }
210 
211   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
212   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
213   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
214   PetscFunctionReturn(0);
215 }
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2"
219 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
220 {
221   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
222   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
223   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
224   PetscErrorCode ierr;
225   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
226 
227   PetscFunctionBegin;
228   ierr = VecSet(yy,zero);CHKERRQ(ierr);
229   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
230   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
231 
232   for (i=0; i<m; i++) {
233     idx    = a->j + a->i[i] ;
234     v      = a->a + a->i[i] ;
235     n      = a->i[i+1] - a->i[i];
236     alpha1 = x[2*i];
237     alpha2 = x[2*i+1];
238     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
239   }
240   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
241   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2"
248 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
249 {
250   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
251   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
252   PetscScalar    *x,*y,*v,sum1, sum2;
253   PetscErrorCode ierr;
254   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
255   PetscInt       n,i,jrow,j;
256 
257   PetscFunctionBegin;
258   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
259   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
260   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
261   idx  = a->j;
262   v    = a->a;
263   ii   = a->i;
264 
265   for (i=0; i<m; i++) {
266     jrow = ii[i];
267     n    = ii[i+1] - jrow;
268     sum1  = 0.0;
269     sum2  = 0.0;
270     for (j=0; j<n; j++) {
271       sum1 += v[jrow]*x[2*idx[jrow]];
272       sum2 += v[jrow]*x[2*idx[jrow]+1];
273       jrow++;
274      }
275     y[2*i]   += sum1;
276     y[2*i+1] += sum2;
277   }
278 
279   ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr);
280   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
281   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 #undef __FUNCT__
285 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2"
286 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
287 {
288   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
289   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
290   PetscScalar    *x,*y,*v,alpha1,alpha2;
291   PetscErrorCode ierr;
292   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
293 
294   PetscFunctionBegin;
295   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
296   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
297   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
298 
299   for (i=0; i<m; i++) {
300     idx   = a->j + a->i[i] ;
301     v     = a->a + a->i[i] ;
302     n     = a->i[i+1] - a->i[i];
303     alpha1 = x[2*i];
304     alpha2 = x[2*i+1];
305     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
306   }
307   ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr);
308   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
309   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
310   PetscFunctionReturn(0);
311 }
312 /* --------------------------------------------------------------------------------------*/
313 #undef __FUNCT__
314 #define __FUNCT__ "MatMult_SeqMAIJ_3"
315 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316 {
317   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
318   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
319   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
320   PetscErrorCode ierr;
321   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
322   PetscInt       n,i,jrow,j;
323 
324   PetscFunctionBegin;
325   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
326   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
327   idx  = a->j;
328   v    = a->a;
329   ii   = a->i;
330 
331   for (i=0; i<m; i++) {
332     jrow = ii[i];
333     n    = ii[i+1] - jrow;
334     sum1  = 0.0;
335     sum2  = 0.0;
336     sum3  = 0.0;
337     for (j=0; j<n; j++) {
338       sum1 += v[jrow]*x[3*idx[jrow]];
339       sum2 += v[jrow]*x[3*idx[jrow]+1];
340       sum3 += v[jrow]*x[3*idx[jrow]+2];
341       jrow++;
342      }
343     y[3*i]   = sum1;
344     y[3*i+1] = sum2;
345     y[3*i+2] = sum3;
346   }
347 
348   ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr);
349   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
350   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3"
356 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
357 {
358   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
359   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
360   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
361   PetscErrorCode ierr;
362   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
363 
364   PetscFunctionBegin;
365   ierr = VecSet(yy,zero);CHKERRQ(ierr);
366   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
367   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
368 
369   for (i=0; i<m; i++) {
370     idx    = a->j + a->i[i];
371     v      = a->a + a->i[i];
372     n      = a->i[i+1] - a->i[i];
373     alpha1 = x[3*i];
374     alpha2 = x[3*i+1];
375     alpha3 = x[3*i+2];
376     while (n-->0) {
377       y[3*(*idx)]   += alpha1*(*v);
378       y[3*(*idx)+1] += alpha2*(*v);
379       y[3*(*idx)+2] += alpha3*(*v);
380       idx++; v++;
381     }
382   }
383   ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr);
384   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
385   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3"
391 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
392 {
393   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
394   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
395   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
396   PetscErrorCode ierr;
397   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
398   PetscInt       n,i,jrow,j;
399 
400   PetscFunctionBegin;
401   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
402   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
403   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
404   idx  = a->j;
405   v    = a->a;
406   ii   = a->i;
407 
408   for (i=0; i<m; i++) {
409     jrow = ii[i];
410     n    = ii[i+1] - jrow;
411     sum1  = 0.0;
412     sum2  = 0.0;
413     sum3  = 0.0;
414     for (j=0; j<n; j++) {
415       sum1 += v[jrow]*x[3*idx[jrow]];
416       sum2 += v[jrow]*x[3*idx[jrow]+1];
417       sum3 += v[jrow]*x[3*idx[jrow]+2];
418       jrow++;
419      }
420     y[3*i]   += sum1;
421     y[3*i+1] += sum2;
422     y[3*i+2] += sum3;
423   }
424 
425   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
426   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
427   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 #undef __FUNCT__
431 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3"
432 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433 {
434   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
435   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
436   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
437   PetscErrorCode ierr;
438   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
439 
440   PetscFunctionBegin;
441   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
442   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
443   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
444   for (i=0; i<m; i++) {
445     idx    = a->j + a->i[i] ;
446     v      = a->a + a->i[i] ;
447     n      = a->i[i+1] - a->i[i];
448     alpha1 = x[3*i];
449     alpha2 = x[3*i+1];
450     alpha3 = x[3*i+2];
451     while (n-->0) {
452       y[3*(*idx)]   += alpha1*(*v);
453       y[3*(*idx)+1] += alpha2*(*v);
454       y[3*(*idx)+2] += alpha3*(*v);
455       idx++; v++;
456     }
457   }
458   ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr);
459   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
460   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 /* ------------------------------------------------------------------------------*/
465 #undef __FUNCT__
466 #define __FUNCT__ "MatMult_SeqMAIJ_4"
467 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
468 {
469   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
470   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
471   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
472   PetscErrorCode ierr;
473   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
474   PetscInt       n,i,jrow,j;
475 
476   PetscFunctionBegin;
477   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
478   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
479   idx  = a->j;
480   v    = a->a;
481   ii   = a->i;
482 
483   for (i=0; i<m; i++) {
484     jrow = ii[i];
485     n    = ii[i+1] - jrow;
486     sum1  = 0.0;
487     sum2  = 0.0;
488     sum3  = 0.0;
489     sum4  = 0.0;
490     for (j=0; j<n; j++) {
491       sum1 += v[jrow]*x[4*idx[jrow]];
492       sum2 += v[jrow]*x[4*idx[jrow]+1];
493       sum3 += v[jrow]*x[4*idx[jrow]+2];
494       sum4 += v[jrow]*x[4*idx[jrow]+3];
495       jrow++;
496      }
497     y[4*i]   = sum1;
498     y[4*i+1] = sum2;
499     y[4*i+2] = sum3;
500     y[4*i+3] = sum4;
501   }
502 
503   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
504   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
505   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4"
511 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512 {
513   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
514   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
515   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
516   PetscErrorCode ierr;
517   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
518 
519   PetscFunctionBegin;
520   ierr = VecSet(yy,zero);CHKERRQ(ierr);
521   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
522   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
523   for (i=0; i<m; i++) {
524     idx    = a->j + a->i[i] ;
525     v      = a->a + a->i[i] ;
526     n      = a->i[i+1] - a->i[i];
527     alpha1 = x[4*i];
528     alpha2 = x[4*i+1];
529     alpha3 = x[4*i+2];
530     alpha4 = x[4*i+3];
531     while (n-->0) {
532       y[4*(*idx)]   += alpha1*(*v);
533       y[4*(*idx)+1] += alpha2*(*v);
534       y[4*(*idx)+2] += alpha3*(*v);
535       y[4*(*idx)+3] += alpha4*(*v);
536       idx++; v++;
537     }
538   }
539   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
540   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
541   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4"
547 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
548 {
549   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
550   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
551   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
552   PetscErrorCode ierr;
553   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
554   PetscInt       n,i,jrow,j;
555 
556   PetscFunctionBegin;
557   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
558   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
559   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
560   idx  = a->j;
561   v    = a->a;
562   ii   = a->i;
563 
564   for (i=0; i<m; i++) {
565     jrow = ii[i];
566     n    = ii[i+1] - jrow;
567     sum1  = 0.0;
568     sum2  = 0.0;
569     sum3  = 0.0;
570     sum4  = 0.0;
571     for (j=0; j<n; j++) {
572       sum1 += v[jrow]*x[4*idx[jrow]];
573       sum2 += v[jrow]*x[4*idx[jrow]+1];
574       sum3 += v[jrow]*x[4*idx[jrow]+2];
575       sum4 += v[jrow]*x[4*idx[jrow]+3];
576       jrow++;
577      }
578     y[4*i]   += sum1;
579     y[4*i+1] += sum2;
580     y[4*i+2] += sum3;
581     y[4*i+3] += sum4;
582   }
583 
584   ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr);
585   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
586   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
587   PetscFunctionReturn(0);
588 }
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4"
591 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
594   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
595   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
596   PetscErrorCode ierr;
597   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
598 
599   PetscFunctionBegin;
600   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
601   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
602   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
603 
604   for (i=0; i<m; i++) {
605     idx    = a->j + a->i[i] ;
606     v      = a->a + a->i[i] ;
607     n      = a->i[i+1] - a->i[i];
608     alpha1 = x[4*i];
609     alpha2 = x[4*i+1];
610     alpha3 = x[4*i+2];
611     alpha4 = x[4*i+3];
612     while (n-->0) {
613       y[4*(*idx)]   += alpha1*(*v);
614       y[4*(*idx)+1] += alpha2*(*v);
615       y[4*(*idx)+2] += alpha3*(*v);
616       y[4*(*idx)+3] += alpha4*(*v);
617       idx++; v++;
618     }
619   }
620   ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr);
621   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
622   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 /* ------------------------------------------------------------------------------*/
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatMult_SeqMAIJ_5"
629 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
630 {
631   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
632   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
633   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
634   PetscErrorCode ierr;
635   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
636   PetscInt       n,i,jrow,j;
637 
638   PetscFunctionBegin;
639   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
640   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
641   idx  = a->j;
642   v    = a->a;
643   ii   = a->i;
644 
645   for (i=0; i<m; i++) {
646     jrow = ii[i];
647     n    = ii[i+1] - jrow;
648     sum1  = 0.0;
649     sum2  = 0.0;
650     sum3  = 0.0;
651     sum4  = 0.0;
652     sum5  = 0.0;
653     for (j=0; j<n; j++) {
654       sum1 += v[jrow]*x[5*idx[jrow]];
655       sum2 += v[jrow]*x[5*idx[jrow]+1];
656       sum3 += v[jrow]*x[5*idx[jrow]+2];
657       sum4 += v[jrow]*x[5*idx[jrow]+3];
658       sum5 += v[jrow]*x[5*idx[jrow]+4];
659       jrow++;
660      }
661     y[5*i]   = sum1;
662     y[5*i+1] = sum2;
663     y[5*i+2] = sum3;
664     y[5*i+3] = sum4;
665     y[5*i+4] = sum5;
666   }
667 
668   ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr);
669   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
670   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5"
676 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
677 {
678   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
679   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
680   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
681   PetscErrorCode ierr;
682   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
683 
684   PetscFunctionBegin;
685   ierr = VecSet(yy,zero);CHKERRQ(ierr);
686   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
687   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
688 
689   for (i=0; i<m; i++) {
690     idx    = a->j + a->i[i] ;
691     v      = a->a + a->i[i] ;
692     n      = a->i[i+1] - a->i[i];
693     alpha1 = x[5*i];
694     alpha2 = x[5*i+1];
695     alpha3 = x[5*i+2];
696     alpha4 = x[5*i+3];
697     alpha5 = x[5*i+4];
698     while (n-->0) {
699       y[5*(*idx)]   += alpha1*(*v);
700       y[5*(*idx)+1] += alpha2*(*v);
701       y[5*(*idx)+2] += alpha3*(*v);
702       y[5*(*idx)+3] += alpha4*(*v);
703       y[5*(*idx)+4] += alpha5*(*v);
704       idx++; v++;
705     }
706   }
707   ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr);
708   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
709   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5"
715 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
716 {
717   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
718   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
719   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
720   PetscErrorCode ierr;
721   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
722   PetscInt       n,i,jrow,j;
723 
724   PetscFunctionBegin;
725   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
726   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
727   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
728   idx  = a->j;
729   v    = a->a;
730   ii   = a->i;
731 
732   for (i=0; i<m; i++) {
733     jrow = ii[i];
734     n    = ii[i+1] - jrow;
735     sum1  = 0.0;
736     sum2  = 0.0;
737     sum3  = 0.0;
738     sum4  = 0.0;
739     sum5  = 0.0;
740     for (j=0; j<n; j++) {
741       sum1 += v[jrow]*x[5*idx[jrow]];
742       sum2 += v[jrow]*x[5*idx[jrow]+1];
743       sum3 += v[jrow]*x[5*idx[jrow]+2];
744       sum4 += v[jrow]*x[5*idx[jrow]+3];
745       sum5 += v[jrow]*x[5*idx[jrow]+4];
746       jrow++;
747      }
748     y[5*i]   += sum1;
749     y[5*i+1] += sum2;
750     y[5*i+2] += sum3;
751     y[5*i+3] += sum4;
752     y[5*i+4] += sum5;
753   }
754 
755   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
756   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
757   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5"
763 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
764 {
765   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
766   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
767   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
768   PetscErrorCode ierr;
769   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
770 
771   PetscFunctionBegin;
772   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
773   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
774   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
775 
776   for (i=0; i<m; i++) {
777     idx    = a->j + a->i[i] ;
778     v      = a->a + a->i[i] ;
779     n      = a->i[i+1] - a->i[i];
780     alpha1 = x[5*i];
781     alpha2 = x[5*i+1];
782     alpha3 = x[5*i+2];
783     alpha4 = x[5*i+3];
784     alpha5 = x[5*i+4];
785     while (n-->0) {
786       y[5*(*idx)]   += alpha1*(*v);
787       y[5*(*idx)+1] += alpha2*(*v);
788       y[5*(*idx)+2] += alpha3*(*v);
789       y[5*(*idx)+3] += alpha4*(*v);
790       y[5*(*idx)+4] += alpha5*(*v);
791       idx++; v++;
792     }
793   }
794   ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr);
795   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
796   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
797   PetscFunctionReturn(0);
798 }
799 
800 /* ------------------------------------------------------------------------------*/
801 #undef __FUNCT__
802 #define __FUNCT__ "MatMult_SeqMAIJ_6"
803 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
804 {
805   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
806   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
807   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
808   PetscErrorCode ierr;
809   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
810   PetscInt       n,i,jrow,j;
811 
812   PetscFunctionBegin;
813   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
814   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
815   idx  = a->j;
816   v    = a->a;
817   ii   = a->i;
818 
819   for (i=0; i<m; i++) {
820     jrow = ii[i];
821     n    = ii[i+1] - jrow;
822     sum1  = 0.0;
823     sum2  = 0.0;
824     sum3  = 0.0;
825     sum4  = 0.0;
826     sum5  = 0.0;
827     sum6  = 0.0;
828     for (j=0; j<n; j++) {
829       sum1 += v[jrow]*x[6*idx[jrow]];
830       sum2 += v[jrow]*x[6*idx[jrow]+1];
831       sum3 += v[jrow]*x[6*idx[jrow]+2];
832       sum4 += v[jrow]*x[6*idx[jrow]+3];
833       sum5 += v[jrow]*x[6*idx[jrow]+4];
834       sum6 += v[jrow]*x[6*idx[jrow]+5];
835       jrow++;
836      }
837     y[6*i]   = sum1;
838     y[6*i+1] = sum2;
839     y[6*i+2] = sum3;
840     y[6*i+3] = sum4;
841     y[6*i+4] = sum5;
842     y[6*i+5] = sum6;
843   }
844 
845   ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr);
846   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
847   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6"
853 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
854 {
855   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
856   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
857   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
858   PetscErrorCode ierr;
859   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
860 
861   PetscFunctionBegin;
862   ierr = VecSet(yy,zero);CHKERRQ(ierr);
863   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
864   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
865 
866   for (i=0; i<m; i++) {
867     idx    = a->j + a->i[i] ;
868     v      = a->a + a->i[i] ;
869     n      = a->i[i+1] - a->i[i];
870     alpha1 = x[6*i];
871     alpha2 = x[6*i+1];
872     alpha3 = x[6*i+2];
873     alpha4 = x[6*i+3];
874     alpha5 = x[6*i+4];
875     alpha6 = x[6*i+5];
876     while (n-->0) {
877       y[6*(*idx)]   += alpha1*(*v);
878       y[6*(*idx)+1] += alpha2*(*v);
879       y[6*(*idx)+2] += alpha3*(*v);
880       y[6*(*idx)+3] += alpha4*(*v);
881       y[6*(*idx)+4] += alpha5*(*v);
882       y[6*(*idx)+5] += alpha6*(*v);
883       idx++; v++;
884     }
885   }
886   ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr);
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6"
894 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
895 {
896   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
897   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
898   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
899   PetscErrorCode ierr;
900   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
901   PetscInt       n,i,jrow,j;
902 
903   PetscFunctionBegin;
904   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
905   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
906   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
907   idx  = a->j;
908   v    = a->a;
909   ii   = a->i;
910 
911   for (i=0; i<m; i++) {
912     jrow = ii[i];
913     n    = ii[i+1] - jrow;
914     sum1  = 0.0;
915     sum2  = 0.0;
916     sum3  = 0.0;
917     sum4  = 0.0;
918     sum5  = 0.0;
919     sum6  = 0.0;
920     for (j=0; j<n; j++) {
921       sum1 += v[jrow]*x[6*idx[jrow]];
922       sum2 += v[jrow]*x[6*idx[jrow]+1];
923       sum3 += v[jrow]*x[6*idx[jrow]+2];
924       sum4 += v[jrow]*x[6*idx[jrow]+3];
925       sum5 += v[jrow]*x[6*idx[jrow]+4];
926       sum6 += v[jrow]*x[6*idx[jrow]+5];
927       jrow++;
928      }
929     y[6*i]   += sum1;
930     y[6*i+1] += sum2;
931     y[6*i+2] += sum3;
932     y[6*i+3] += sum4;
933     y[6*i+4] += sum5;
934     y[6*i+5] += sum6;
935   }
936 
937   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 #undef __FUNCT__
944 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6"
945 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
946 {
947   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
948   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
949   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
950   PetscErrorCode ierr;
951   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
952 
953   PetscFunctionBegin;
954   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
955   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
956   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
957 
958   for (i=0; i<m; i++) {
959     idx    = a->j + a->i[i] ;
960     v      = a->a + a->i[i] ;
961     n      = a->i[i+1] - a->i[i];
962     alpha1 = x[6*i];
963     alpha2 = x[6*i+1];
964     alpha3 = x[6*i+2];
965     alpha4 = x[6*i+3];
966     alpha5 = x[6*i+4];
967     alpha6 = x[6*i+5];
968     while (n-->0) {
969       y[6*(*idx)]   += alpha1*(*v);
970       y[6*(*idx)+1] += alpha2*(*v);
971       y[6*(*idx)+2] += alpha3*(*v);
972       y[6*(*idx)+3] += alpha4*(*v);
973       y[6*(*idx)+4] += alpha5*(*v);
974       y[6*(*idx)+5] += alpha6*(*v);
975       idx++; v++;
976     }
977   }
978   ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr);
979   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
980   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 /* ------------------------------------------------------------------------------*/
985 #undef __FUNCT__
986 #define __FUNCT__ "MatMult_SeqMAIJ_7"
987 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
988 {
989   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
990   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
991   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
992   PetscErrorCode ierr;
993   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
994   PetscInt       n,i,jrow,j;
995 
996   PetscFunctionBegin;
997   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
998   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
999   idx  = a->j;
1000   v    = a->a;
1001   ii   = a->i;
1002 
1003   for (i=0; i<m; i++) {
1004     jrow = ii[i];
1005     n    = ii[i+1] - jrow;
1006     sum1  = 0.0;
1007     sum2  = 0.0;
1008     sum3  = 0.0;
1009     sum4  = 0.0;
1010     sum5  = 0.0;
1011     sum6  = 0.0;
1012     sum7  = 0.0;
1013     for (j=0; j<n; j++) {
1014       sum1 += v[jrow]*x[7*idx[jrow]];
1015       sum2 += v[jrow]*x[7*idx[jrow]+1];
1016       sum3 += v[jrow]*x[7*idx[jrow]+2];
1017       sum4 += v[jrow]*x[7*idx[jrow]+3];
1018       sum5 += v[jrow]*x[7*idx[jrow]+4];
1019       sum6 += v[jrow]*x[7*idx[jrow]+5];
1020       sum7 += v[jrow]*x[7*idx[jrow]+6];
1021       jrow++;
1022      }
1023     y[7*i]   = sum1;
1024     y[7*i+1] = sum2;
1025     y[7*i+2] = sum3;
1026     y[7*i+3] = sum4;
1027     y[7*i+4] = sum5;
1028     y[7*i+5] = sum6;
1029     y[7*i+6] = sum7;
1030   }
1031 
1032   ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr);
1033   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1034   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7"
1040 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1041 {
1042   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1043   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1044   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1045   PetscErrorCode ierr;
1046   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1047 
1048   PetscFunctionBegin;
1049   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1050   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1051   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1052 
1053   for (i=0; i<m; i++) {
1054     idx    = a->j + a->i[i] ;
1055     v      = a->a + a->i[i] ;
1056     n      = a->i[i+1] - a->i[i];
1057     alpha1 = x[7*i];
1058     alpha2 = x[7*i+1];
1059     alpha3 = x[7*i+2];
1060     alpha4 = x[7*i+3];
1061     alpha5 = x[7*i+4];
1062     alpha6 = x[7*i+5];
1063     alpha7 = x[7*i+6];
1064     while (n-->0) {
1065       y[7*(*idx)]   += alpha1*(*v);
1066       y[7*(*idx)+1] += alpha2*(*v);
1067       y[7*(*idx)+2] += alpha3*(*v);
1068       y[7*(*idx)+3] += alpha4*(*v);
1069       y[7*(*idx)+4] += alpha5*(*v);
1070       y[7*(*idx)+5] += alpha6*(*v);
1071       y[7*(*idx)+6] += alpha7*(*v);
1072       idx++; v++;
1073     }
1074   }
1075   ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr);
1076   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1077   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7"
1083 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1084 {
1085   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1086   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1087   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1088   PetscErrorCode ierr;
1089   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1090   PetscInt       n,i,jrow,j;
1091 
1092   PetscFunctionBegin;
1093   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1094   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1095   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1096   idx  = a->j;
1097   v    = a->a;
1098   ii   = a->i;
1099 
1100   for (i=0; i<m; i++) {
1101     jrow = ii[i];
1102     n    = ii[i+1] - jrow;
1103     sum1  = 0.0;
1104     sum2  = 0.0;
1105     sum3  = 0.0;
1106     sum4  = 0.0;
1107     sum5  = 0.0;
1108     sum6  = 0.0;
1109     sum7  = 0.0;
1110     for (j=0; j<n; j++) {
1111       sum1 += v[jrow]*x[7*idx[jrow]];
1112       sum2 += v[jrow]*x[7*idx[jrow]+1];
1113       sum3 += v[jrow]*x[7*idx[jrow]+2];
1114       sum4 += v[jrow]*x[7*idx[jrow]+3];
1115       sum5 += v[jrow]*x[7*idx[jrow]+4];
1116       sum6 += v[jrow]*x[7*idx[jrow]+5];
1117       sum7 += v[jrow]*x[7*idx[jrow]+6];
1118       jrow++;
1119      }
1120     y[7*i]   += sum1;
1121     y[7*i+1] += sum2;
1122     y[7*i+2] += sum3;
1123     y[7*i+3] += sum4;
1124     y[7*i+4] += sum5;
1125     y[7*i+5] += sum6;
1126     y[7*i+6] += sum7;
1127   }
1128 
1129   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1130   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1131   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7"
1137 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1138 {
1139   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1140   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1141   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1142   PetscErrorCode ierr;
1143   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1144 
1145   PetscFunctionBegin;
1146   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1147   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1148   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1149   for (i=0; i<m; i++) {
1150     idx    = a->j + a->i[i] ;
1151     v      = a->a + a->i[i] ;
1152     n      = a->i[i+1] - a->i[i];
1153     alpha1 = x[7*i];
1154     alpha2 = x[7*i+1];
1155     alpha3 = x[7*i+2];
1156     alpha4 = x[7*i+3];
1157     alpha5 = x[7*i+4];
1158     alpha6 = x[7*i+5];
1159     alpha7 = x[7*i+6];
1160     while (n-->0) {
1161       y[7*(*idx)]   += alpha1*(*v);
1162       y[7*(*idx)+1] += alpha2*(*v);
1163       y[7*(*idx)+2] += alpha3*(*v);
1164       y[7*(*idx)+3] += alpha4*(*v);
1165       y[7*(*idx)+4] += alpha5*(*v);
1166       y[7*(*idx)+5] += alpha6*(*v);
1167       y[7*(*idx)+6] += alpha7*(*v);
1168       idx++; v++;
1169     }
1170   }
1171   ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr);
1172   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1173   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatMult_SeqMAIJ_8"
1179 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1180 {
1181   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1182   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1183   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1184   PetscErrorCode ierr;
1185   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1186   PetscInt       n,i,jrow,j;
1187 
1188   PetscFunctionBegin;
1189   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1190   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1191   idx  = a->j;
1192   v    = a->a;
1193   ii   = a->i;
1194 
1195   for (i=0; i<m; i++) {
1196     jrow = ii[i];
1197     n    = ii[i+1] - jrow;
1198     sum1  = 0.0;
1199     sum2  = 0.0;
1200     sum3  = 0.0;
1201     sum4  = 0.0;
1202     sum5  = 0.0;
1203     sum6  = 0.0;
1204     sum7  = 0.0;
1205     sum8  = 0.0;
1206     for (j=0; j<n; j++) {
1207       sum1 += v[jrow]*x[8*idx[jrow]];
1208       sum2 += v[jrow]*x[8*idx[jrow]+1];
1209       sum3 += v[jrow]*x[8*idx[jrow]+2];
1210       sum4 += v[jrow]*x[8*idx[jrow]+3];
1211       sum5 += v[jrow]*x[8*idx[jrow]+4];
1212       sum6 += v[jrow]*x[8*idx[jrow]+5];
1213       sum7 += v[jrow]*x[8*idx[jrow]+6];
1214       sum8 += v[jrow]*x[8*idx[jrow]+7];
1215       jrow++;
1216      }
1217     y[8*i]   = sum1;
1218     y[8*i+1] = sum2;
1219     y[8*i+2] = sum3;
1220     y[8*i+3] = sum4;
1221     y[8*i+4] = sum5;
1222     y[8*i+5] = sum6;
1223     y[8*i+6] = sum7;
1224     y[8*i+7] = sum8;
1225   }
1226 
1227   ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr);
1228   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1229   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8"
1235 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1236 {
1237   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1238   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1239   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1240   PetscErrorCode ierr;
1241   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1242 
1243   PetscFunctionBegin;
1244   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1245   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1246   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1247 
1248   for (i=0; i<m; i++) {
1249     idx    = a->j + a->i[i] ;
1250     v      = a->a + a->i[i] ;
1251     n      = a->i[i+1] - a->i[i];
1252     alpha1 = x[8*i];
1253     alpha2 = x[8*i+1];
1254     alpha3 = x[8*i+2];
1255     alpha4 = x[8*i+3];
1256     alpha5 = x[8*i+4];
1257     alpha6 = x[8*i+5];
1258     alpha7 = x[8*i+6];
1259     alpha8 = x[8*i+7];
1260     while (n-->0) {
1261       y[8*(*idx)]   += alpha1*(*v);
1262       y[8*(*idx)+1] += alpha2*(*v);
1263       y[8*(*idx)+2] += alpha3*(*v);
1264       y[8*(*idx)+3] += alpha4*(*v);
1265       y[8*(*idx)+4] += alpha5*(*v);
1266       y[8*(*idx)+5] += alpha6*(*v);
1267       y[8*(*idx)+6] += alpha7*(*v);
1268       y[8*(*idx)+7] += alpha8*(*v);
1269       idx++; v++;
1270     }
1271   }
1272   ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr);
1273   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1274   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8"
1280 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1281 {
1282   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1283   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1284   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1285   PetscErrorCode ierr;
1286   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1287   PetscInt       n,i,jrow,j;
1288 
1289   PetscFunctionBegin;
1290   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1291   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1292   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1293   idx  = a->j;
1294   v    = a->a;
1295   ii   = a->i;
1296 
1297   for (i=0; i<m; i++) {
1298     jrow = ii[i];
1299     n    = ii[i+1] - jrow;
1300     sum1  = 0.0;
1301     sum2  = 0.0;
1302     sum3  = 0.0;
1303     sum4  = 0.0;
1304     sum5  = 0.0;
1305     sum6  = 0.0;
1306     sum7  = 0.0;
1307     sum8  = 0.0;
1308     for (j=0; j<n; j++) {
1309       sum1 += v[jrow]*x[8*idx[jrow]];
1310       sum2 += v[jrow]*x[8*idx[jrow]+1];
1311       sum3 += v[jrow]*x[8*idx[jrow]+2];
1312       sum4 += v[jrow]*x[8*idx[jrow]+3];
1313       sum5 += v[jrow]*x[8*idx[jrow]+4];
1314       sum6 += v[jrow]*x[8*idx[jrow]+5];
1315       sum7 += v[jrow]*x[8*idx[jrow]+6];
1316       sum8 += v[jrow]*x[8*idx[jrow]+7];
1317       jrow++;
1318      }
1319     y[8*i]   += sum1;
1320     y[8*i+1] += sum2;
1321     y[8*i+2] += sum3;
1322     y[8*i+3] += sum4;
1323     y[8*i+4] += sum5;
1324     y[8*i+5] += sum6;
1325     y[8*i+6] += sum7;
1326     y[8*i+7] += sum8;
1327   }
1328 
1329   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1330   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1331   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8"
1337 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1338 {
1339   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1340   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1341   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1342   PetscErrorCode ierr;
1343   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1344 
1345   PetscFunctionBegin;
1346   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1348   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1349   for (i=0; i<m; i++) {
1350     idx    = a->j + a->i[i] ;
1351     v      = a->a + a->i[i] ;
1352     n      = a->i[i+1] - a->i[i];
1353     alpha1 = x[8*i];
1354     alpha2 = x[8*i+1];
1355     alpha3 = x[8*i+2];
1356     alpha4 = x[8*i+3];
1357     alpha5 = x[8*i+4];
1358     alpha6 = x[8*i+5];
1359     alpha7 = x[8*i+6];
1360     alpha8 = x[8*i+7];
1361     while (n-->0) {
1362       y[8*(*idx)]   += alpha1*(*v);
1363       y[8*(*idx)+1] += alpha2*(*v);
1364       y[8*(*idx)+2] += alpha3*(*v);
1365       y[8*(*idx)+3] += alpha4*(*v);
1366       y[8*(*idx)+4] += alpha5*(*v);
1367       y[8*(*idx)+5] += alpha6*(*v);
1368       y[8*(*idx)+6] += alpha7*(*v);
1369       y[8*(*idx)+7] += alpha8*(*v);
1370       idx++; v++;
1371     }
1372   }
1373   ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr);
1374   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1375   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 /* ------------------------------------------------------------------------------*/
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatMult_SeqMAIJ_9"
1382 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1383 {
1384   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1385   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1386   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1387   PetscErrorCode ierr;
1388   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1389   PetscInt       n,i,jrow,j;
1390 
1391   PetscFunctionBegin;
1392   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1393   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1394   idx  = a->j;
1395   v    = a->a;
1396   ii   = a->i;
1397 
1398   for (i=0; i<m; i++) {
1399     jrow = ii[i];
1400     n    = ii[i+1] - jrow;
1401     sum1  = 0.0;
1402     sum2  = 0.0;
1403     sum3  = 0.0;
1404     sum4  = 0.0;
1405     sum5  = 0.0;
1406     sum6  = 0.0;
1407     sum7  = 0.0;
1408     sum8  = 0.0;
1409     sum9  = 0.0;
1410     for (j=0; j<n; j++) {
1411       sum1 += v[jrow]*x[9*idx[jrow]];
1412       sum2 += v[jrow]*x[9*idx[jrow]+1];
1413       sum3 += v[jrow]*x[9*idx[jrow]+2];
1414       sum4 += v[jrow]*x[9*idx[jrow]+3];
1415       sum5 += v[jrow]*x[9*idx[jrow]+4];
1416       sum6 += v[jrow]*x[9*idx[jrow]+5];
1417       sum7 += v[jrow]*x[9*idx[jrow]+6];
1418       sum8 += v[jrow]*x[9*idx[jrow]+7];
1419       sum9 += v[jrow]*x[9*idx[jrow]+8];
1420       jrow++;
1421      }
1422     y[9*i]   = sum1;
1423     y[9*i+1] = sum2;
1424     y[9*i+2] = sum3;
1425     y[9*i+3] = sum4;
1426     y[9*i+4] = sum5;
1427     y[9*i+5] = sum6;
1428     y[9*i+6] = sum7;
1429     y[9*i+7] = sum8;
1430     y[9*i+8] = sum9;
1431   }
1432 
1433   ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr);
1434   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1435   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 /* ------------------------------------------------------------------------------*/
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9"
1443 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1444 {
1445   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1446   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1447   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1448   PetscErrorCode ierr;
1449   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1450 
1451   PetscFunctionBegin;
1452   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1453   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1454   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1455 
1456   for (i=0; i<m; i++) {
1457     idx    = a->j + a->i[i] ;
1458     v      = a->a + a->i[i] ;
1459     n      = a->i[i+1] - a->i[i];
1460     alpha1 = x[9*i];
1461     alpha2 = x[9*i+1];
1462     alpha3 = x[9*i+2];
1463     alpha4 = x[9*i+3];
1464     alpha5 = x[9*i+4];
1465     alpha6 = x[9*i+5];
1466     alpha7 = x[9*i+6];
1467     alpha8 = x[9*i+7];
1468     alpha9 = x[9*i+8];
1469     while (n-->0) {
1470       y[9*(*idx)]   += alpha1*(*v);
1471       y[9*(*idx)+1] += alpha2*(*v);
1472       y[9*(*idx)+2] += alpha3*(*v);
1473       y[9*(*idx)+3] += alpha4*(*v);
1474       y[9*(*idx)+4] += alpha5*(*v);
1475       y[9*(*idx)+5] += alpha6*(*v);
1476       y[9*(*idx)+6] += alpha7*(*v);
1477       y[9*(*idx)+7] += alpha8*(*v);
1478       y[9*(*idx)+8] += alpha9*(*v);
1479       idx++; v++;
1480     }
1481   }
1482   ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr);
1483   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1484   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9"
1490 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1491 {
1492   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1493   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1494   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1495   PetscErrorCode ierr;
1496   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1497   PetscInt       n,i,jrow,j;
1498 
1499   PetscFunctionBegin;
1500   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1501   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1502   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1503   idx  = a->j;
1504   v    = a->a;
1505   ii   = a->i;
1506 
1507   for (i=0; i<m; i++) {
1508     jrow = ii[i];
1509     n    = ii[i+1] - jrow;
1510     sum1  = 0.0;
1511     sum2  = 0.0;
1512     sum3  = 0.0;
1513     sum4  = 0.0;
1514     sum5  = 0.0;
1515     sum6  = 0.0;
1516     sum7  = 0.0;
1517     sum8  = 0.0;
1518     sum9  = 0.0;
1519     for (j=0; j<n; j++) {
1520       sum1 += v[jrow]*x[9*idx[jrow]];
1521       sum2 += v[jrow]*x[9*idx[jrow]+1];
1522       sum3 += v[jrow]*x[9*idx[jrow]+2];
1523       sum4 += v[jrow]*x[9*idx[jrow]+3];
1524       sum5 += v[jrow]*x[9*idx[jrow]+4];
1525       sum6 += v[jrow]*x[9*idx[jrow]+5];
1526       sum7 += v[jrow]*x[9*idx[jrow]+6];
1527       sum8 += v[jrow]*x[9*idx[jrow]+7];
1528       sum9 += v[jrow]*x[9*idx[jrow]+8];
1529       jrow++;
1530      }
1531     y[9*i]   += sum1;
1532     y[9*i+1] += sum2;
1533     y[9*i+2] += sum3;
1534     y[9*i+3] += sum4;
1535     y[9*i+4] += sum5;
1536     y[9*i+5] += sum6;
1537     y[9*i+6] += sum7;
1538     y[9*i+7] += sum8;
1539     y[9*i+8] += sum9;
1540   }
1541 
1542   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1543   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1544   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9"
1550 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1551 {
1552   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1553   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1554   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1555   PetscErrorCode ierr;
1556   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1557 
1558   PetscFunctionBegin;
1559   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1560   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1561   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1562   for (i=0; i<m; i++) {
1563     idx    = a->j + a->i[i] ;
1564     v      = a->a + a->i[i] ;
1565     n      = a->i[i+1] - a->i[i];
1566     alpha1 = x[9*i];
1567     alpha2 = x[9*i+1];
1568     alpha3 = x[9*i+2];
1569     alpha4 = x[9*i+3];
1570     alpha5 = x[9*i+4];
1571     alpha6 = x[9*i+5];
1572     alpha7 = x[9*i+6];
1573     alpha8 = x[9*i+7];
1574     alpha9 = x[9*i+8];
1575     while (n-->0) {
1576       y[9*(*idx)]   += alpha1*(*v);
1577       y[9*(*idx)+1] += alpha2*(*v);
1578       y[9*(*idx)+2] += alpha3*(*v);
1579       y[9*(*idx)+3] += alpha4*(*v);
1580       y[9*(*idx)+4] += alpha5*(*v);
1581       y[9*(*idx)+5] += alpha6*(*v);
1582       y[9*(*idx)+6] += alpha7*(*v);
1583       y[9*(*idx)+7] += alpha8*(*v);
1584       y[9*(*idx)+8] += alpha9*(*v);
1585       idx++; v++;
1586     }
1587   }
1588   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
1589   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1590   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1591   PetscFunctionReturn(0);
1592 }
1593 /*--------------------------------------------------------------------------------------------*/
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1596 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1597 {
1598   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1599   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1600   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1601   PetscErrorCode ierr;
1602   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1603   PetscInt       n,i,jrow,j;
1604 
1605   PetscFunctionBegin;
1606   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1607   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1608   idx  = a->j;
1609   v    = a->a;
1610   ii   = a->i;
1611 
1612   for (i=0; i<m; i++) {
1613     jrow = ii[i];
1614     n    = ii[i+1] - jrow;
1615     sum1  = 0.0;
1616     sum2  = 0.0;
1617     sum3  = 0.0;
1618     sum4  = 0.0;
1619     sum5  = 0.0;
1620     sum6  = 0.0;
1621     sum7  = 0.0;
1622     sum8  = 0.0;
1623     sum9  = 0.0;
1624     sum10 = 0.0;
1625     for (j=0; j<n; j++) {
1626       sum1  += v[jrow]*x[10*idx[jrow]];
1627       sum2  += v[jrow]*x[10*idx[jrow]+1];
1628       sum3  += v[jrow]*x[10*idx[jrow]+2];
1629       sum4  += v[jrow]*x[10*idx[jrow]+3];
1630       sum5  += v[jrow]*x[10*idx[jrow]+4];
1631       sum6  += v[jrow]*x[10*idx[jrow]+5];
1632       sum7  += v[jrow]*x[10*idx[jrow]+6];
1633       sum8  += v[jrow]*x[10*idx[jrow]+7];
1634       sum9  += v[jrow]*x[10*idx[jrow]+8];
1635       sum10 += v[jrow]*x[10*idx[jrow]+9];
1636       jrow++;
1637      }
1638     y[10*i]   = sum1;
1639     y[10*i+1] = sum2;
1640     y[10*i+2] = sum3;
1641     y[10*i+3] = sum4;
1642     y[10*i+4] = sum5;
1643     y[10*i+5] = sum6;
1644     y[10*i+6] = sum7;
1645     y[10*i+7] = sum8;
1646     y[10*i+8] = sum9;
1647     y[10*i+9] = sum10;
1648   }
1649 
1650   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1651   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1652   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatMult_SeqMAIJ_10"
1658 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1659 {
1660   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1661   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1662   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1663   PetscErrorCode ierr;
1664   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1665   PetscInt       n,i,jrow,j;
1666 
1667   PetscFunctionBegin;
1668   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1669   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1670   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1671   idx  = a->j;
1672   v    = a->a;
1673   ii   = a->i;
1674 
1675   for (i=0; i<m; i++) {
1676     jrow = ii[i];
1677     n    = ii[i+1] - jrow;
1678     sum1  = 0.0;
1679     sum2  = 0.0;
1680     sum3  = 0.0;
1681     sum4  = 0.0;
1682     sum5  = 0.0;
1683     sum6  = 0.0;
1684     sum7  = 0.0;
1685     sum8  = 0.0;
1686     sum9  = 0.0;
1687     sum10 = 0.0;
1688     for (j=0; j<n; j++) {
1689       sum1  += v[jrow]*x[10*idx[jrow]];
1690       sum2  += v[jrow]*x[10*idx[jrow]+1];
1691       sum3  += v[jrow]*x[10*idx[jrow]+2];
1692       sum4  += v[jrow]*x[10*idx[jrow]+3];
1693       sum5  += v[jrow]*x[10*idx[jrow]+4];
1694       sum6  += v[jrow]*x[10*idx[jrow]+5];
1695       sum7  += v[jrow]*x[10*idx[jrow]+6];
1696       sum8  += v[jrow]*x[10*idx[jrow]+7];
1697       sum9  += v[jrow]*x[10*idx[jrow]+8];
1698       sum10 += v[jrow]*x[10*idx[jrow]+9];
1699       jrow++;
1700      }
1701     y[10*i]   += sum1;
1702     y[10*i+1] += sum2;
1703     y[10*i+2] += sum3;
1704     y[10*i+3] += sum4;
1705     y[10*i+4] += sum5;
1706     y[10*i+5] += sum6;
1707     y[10*i+6] += sum7;
1708     y[10*i+7] += sum8;
1709     y[10*i+8] += sum9;
1710     y[10*i+9] += sum10;
1711   }
1712 
1713   ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr);
1714   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1715   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10"
1721 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1722 {
1723   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1724   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1725   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1726   PetscErrorCode ierr;
1727   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1728 
1729   PetscFunctionBegin;
1730   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1731   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1732   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1733 
1734   for (i=0; i<m; i++) {
1735     idx    = a->j + a->i[i] ;
1736     v      = a->a + a->i[i] ;
1737     n      = a->i[i+1] - a->i[i];
1738     alpha1 = x[10*i];
1739     alpha2 = x[10*i+1];
1740     alpha3 = x[10*i+2];
1741     alpha4 = x[10*i+3];
1742     alpha5 = x[10*i+4];
1743     alpha6 = x[10*i+5];
1744     alpha7 = x[10*i+6];
1745     alpha8 = x[10*i+7];
1746     alpha9 = x[10*i+8];
1747     alpha10 = x[10*i+9];
1748     while (n-->0) {
1749       y[10*(*idx)]   += alpha1*(*v);
1750       y[10*(*idx)+1] += alpha2*(*v);
1751       y[10*(*idx)+2] += alpha3*(*v);
1752       y[10*(*idx)+3] += alpha4*(*v);
1753       y[10*(*idx)+4] += alpha5*(*v);
1754       y[10*(*idx)+5] += alpha6*(*v);
1755       y[10*(*idx)+6] += alpha7*(*v);
1756       y[10*(*idx)+7] += alpha8*(*v);
1757       y[10*(*idx)+8] += alpha9*(*v);
1758       y[10*(*idx)+9] += alpha10*(*v);
1759       idx++; v++;
1760     }
1761   }
1762   ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr);
1763   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1764   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10"
1770 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1771 {
1772   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1773   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1774   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1775   PetscErrorCode ierr;
1776   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1777 
1778   PetscFunctionBegin;
1779   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1780   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1781   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1782   for (i=0; i<m; i++) {
1783     idx    = a->j + a->i[i] ;
1784     v      = a->a + a->i[i] ;
1785     n      = a->i[i+1] - a->i[i];
1786     alpha1 = x[10*i];
1787     alpha2 = x[10*i+1];
1788     alpha3 = x[10*i+2];
1789     alpha4 = x[10*i+3];
1790     alpha5 = x[10*i+4];
1791     alpha6 = x[10*i+5];
1792     alpha7 = x[10*i+6];
1793     alpha8 = x[10*i+7];
1794     alpha9 = x[10*i+8];
1795     alpha10 = x[10*i+9];
1796     while (n-->0) {
1797       y[10*(*idx)]   += alpha1*(*v);
1798       y[10*(*idx)+1] += alpha2*(*v);
1799       y[10*(*idx)+2] += alpha3*(*v);
1800       y[10*(*idx)+3] += alpha4*(*v);
1801       y[10*(*idx)+4] += alpha5*(*v);
1802       y[10*(*idx)+5] += alpha6*(*v);
1803       y[10*(*idx)+6] += alpha7*(*v);
1804       y[10*(*idx)+7] += alpha8*(*v);
1805       y[10*(*idx)+8] += alpha9*(*v);
1806       y[10*(*idx)+9] += alpha10*(*v);
1807       idx++; v++;
1808     }
1809   }
1810   ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr);
1811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1812   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 
1817 /*--------------------------------------------------------------------------------------------*/
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatMult_SeqMAIJ_16"
1820 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1821 {
1822   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1823   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1824   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1825   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1826   PetscErrorCode ierr;
1827   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1828   PetscInt       n,i,jrow,j;
1829 
1830   PetscFunctionBegin;
1831   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1832   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1833   idx  = a->j;
1834   v    = a->a;
1835   ii   = a->i;
1836 
1837   for (i=0; i<m; i++) {
1838     jrow = ii[i];
1839     n    = ii[i+1] - jrow;
1840     sum1  = 0.0;
1841     sum2  = 0.0;
1842     sum3  = 0.0;
1843     sum4  = 0.0;
1844     sum5  = 0.0;
1845     sum6  = 0.0;
1846     sum7  = 0.0;
1847     sum8  = 0.0;
1848     sum9  = 0.0;
1849     sum10 = 0.0;
1850     sum11 = 0.0;
1851     sum12 = 0.0;
1852     sum13 = 0.0;
1853     sum14 = 0.0;
1854     sum15 = 0.0;
1855     sum16 = 0.0;
1856     for (j=0; j<n; j++) {
1857       sum1  += v[jrow]*x[16*idx[jrow]];
1858       sum2  += v[jrow]*x[16*idx[jrow]+1];
1859       sum3  += v[jrow]*x[16*idx[jrow]+2];
1860       sum4  += v[jrow]*x[16*idx[jrow]+3];
1861       sum5  += v[jrow]*x[16*idx[jrow]+4];
1862       sum6  += v[jrow]*x[16*idx[jrow]+5];
1863       sum7  += v[jrow]*x[16*idx[jrow]+6];
1864       sum8  += v[jrow]*x[16*idx[jrow]+7];
1865       sum9  += v[jrow]*x[16*idx[jrow]+8];
1866       sum10 += v[jrow]*x[16*idx[jrow]+9];
1867       sum11 += v[jrow]*x[16*idx[jrow]+10];
1868       sum12 += v[jrow]*x[16*idx[jrow]+11];
1869       sum13 += v[jrow]*x[16*idx[jrow]+12];
1870       sum14 += v[jrow]*x[16*idx[jrow]+13];
1871       sum15 += v[jrow]*x[16*idx[jrow]+14];
1872       sum16 += v[jrow]*x[16*idx[jrow]+15];
1873       jrow++;
1874      }
1875     y[16*i]    = sum1;
1876     y[16*i+1]  = sum2;
1877     y[16*i+2]  = sum3;
1878     y[16*i+3]  = sum4;
1879     y[16*i+4]  = sum5;
1880     y[16*i+5]  = sum6;
1881     y[16*i+6]  = sum7;
1882     y[16*i+7]  = sum8;
1883     y[16*i+8]  = sum9;
1884     y[16*i+9]  = sum10;
1885     y[16*i+10] = sum11;
1886     y[16*i+11] = sum12;
1887     y[16*i+12] = sum13;
1888     y[16*i+13] = sum14;
1889     y[16*i+14] = sum15;
1890     y[16*i+15] = sum16;
1891   }
1892 
1893   ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr);
1894   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1895   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16"
1901 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1902 {
1903   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1904   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1905   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1906   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1907   PetscErrorCode ierr;
1908   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
1909 
1910   PetscFunctionBegin;
1911   ierr = VecSet(yy,zero);CHKERRQ(ierr);
1912   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1913   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1914 
1915   for (i=0; i<m; i++) {
1916     idx    = a->j + a->i[i] ;
1917     v      = a->a + a->i[i] ;
1918     n      = a->i[i+1] - a->i[i];
1919     alpha1  = x[16*i];
1920     alpha2  = x[16*i+1];
1921     alpha3  = x[16*i+2];
1922     alpha4  = x[16*i+3];
1923     alpha5  = x[16*i+4];
1924     alpha6  = x[16*i+5];
1925     alpha7  = x[16*i+6];
1926     alpha8  = x[16*i+7];
1927     alpha9  = x[16*i+8];
1928     alpha10 = x[16*i+9];
1929     alpha11 = x[16*i+10];
1930     alpha12 = x[16*i+11];
1931     alpha13 = x[16*i+12];
1932     alpha14 = x[16*i+13];
1933     alpha15 = x[16*i+14];
1934     alpha16 = x[16*i+15];
1935     while (n-->0) {
1936       y[16*(*idx)]    += alpha1*(*v);
1937       y[16*(*idx)+1]  += alpha2*(*v);
1938       y[16*(*idx)+2]  += alpha3*(*v);
1939       y[16*(*idx)+3]  += alpha4*(*v);
1940       y[16*(*idx)+4]  += alpha5*(*v);
1941       y[16*(*idx)+5]  += alpha6*(*v);
1942       y[16*(*idx)+6]  += alpha7*(*v);
1943       y[16*(*idx)+7]  += alpha8*(*v);
1944       y[16*(*idx)+8]  += alpha9*(*v);
1945       y[16*(*idx)+9]  += alpha10*(*v);
1946       y[16*(*idx)+10] += alpha11*(*v);
1947       y[16*(*idx)+11] += alpha12*(*v);
1948       y[16*(*idx)+12] += alpha13*(*v);
1949       y[16*(*idx)+13] += alpha14*(*v);
1950       y[16*(*idx)+14] += alpha15*(*v);
1951       y[16*(*idx)+15] += alpha16*(*v);
1952       idx++; v++;
1953     }
1954   }
1955   ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr);
1956   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1957   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16"
1963 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1964 {
1965   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1966   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1967   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1968   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1969   PetscErrorCode ierr;
1970   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1971   PetscInt       n,i,jrow,j;
1972 
1973   PetscFunctionBegin;
1974   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
1975   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1976   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
1977   idx  = a->j;
1978   v    = a->a;
1979   ii   = a->i;
1980 
1981   for (i=0; i<m; i++) {
1982     jrow = ii[i];
1983     n    = ii[i+1] - jrow;
1984     sum1  = 0.0;
1985     sum2  = 0.0;
1986     sum3  = 0.0;
1987     sum4  = 0.0;
1988     sum5  = 0.0;
1989     sum6  = 0.0;
1990     sum7  = 0.0;
1991     sum8  = 0.0;
1992     sum9  = 0.0;
1993     sum10 = 0.0;
1994     sum11 = 0.0;
1995     sum12 = 0.0;
1996     sum13 = 0.0;
1997     sum14 = 0.0;
1998     sum15 = 0.0;
1999     sum16 = 0.0;
2000     for (j=0; j<n; j++) {
2001       sum1  += v[jrow]*x[16*idx[jrow]];
2002       sum2  += v[jrow]*x[16*idx[jrow]+1];
2003       sum3  += v[jrow]*x[16*idx[jrow]+2];
2004       sum4  += v[jrow]*x[16*idx[jrow]+3];
2005       sum5  += v[jrow]*x[16*idx[jrow]+4];
2006       sum6  += v[jrow]*x[16*idx[jrow]+5];
2007       sum7  += v[jrow]*x[16*idx[jrow]+6];
2008       sum8  += v[jrow]*x[16*idx[jrow]+7];
2009       sum9  += v[jrow]*x[16*idx[jrow]+8];
2010       sum10 += v[jrow]*x[16*idx[jrow]+9];
2011       sum11 += v[jrow]*x[16*idx[jrow]+10];
2012       sum12 += v[jrow]*x[16*idx[jrow]+11];
2013       sum13 += v[jrow]*x[16*idx[jrow]+12];
2014       sum14 += v[jrow]*x[16*idx[jrow]+13];
2015       sum15 += v[jrow]*x[16*idx[jrow]+14];
2016       sum16 += v[jrow]*x[16*idx[jrow]+15];
2017       jrow++;
2018      }
2019     y[16*i]    += sum1;
2020     y[16*i+1]  += sum2;
2021     y[16*i+2]  += sum3;
2022     y[16*i+3]  += sum4;
2023     y[16*i+4]  += sum5;
2024     y[16*i+5]  += sum6;
2025     y[16*i+6]  += sum7;
2026     y[16*i+7]  += sum8;
2027     y[16*i+8]  += sum9;
2028     y[16*i+9]  += sum10;
2029     y[16*i+10] += sum11;
2030     y[16*i+11] += sum12;
2031     y[16*i+12] += sum13;
2032     y[16*i+13] += sum14;
2033     y[16*i+14] += sum15;
2034     y[16*i+15] += sum16;
2035   }
2036 
2037   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2038   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2039   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 #undef __FUNCT__
2044 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16"
2045 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2046 {
2047   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2048   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2049   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2050   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2051   PetscErrorCode ierr;
2052   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;
2053 
2054   PetscFunctionBegin;
2055   if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);}
2056   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2057   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
2058   for (i=0; i<m; i++) {
2059     idx    = a->j + a->i[i] ;
2060     v      = a->a + a->i[i] ;
2061     n      = a->i[i+1] - a->i[i];
2062     alpha1 = x[16*i];
2063     alpha2 = x[16*i+1];
2064     alpha3 = x[16*i+2];
2065     alpha4 = x[16*i+3];
2066     alpha5 = x[16*i+4];
2067     alpha6 = x[16*i+5];
2068     alpha7 = x[16*i+6];
2069     alpha8 = x[16*i+7];
2070     alpha9  = x[16*i+8];
2071     alpha10 = x[16*i+9];
2072     alpha11 = x[16*i+10];
2073     alpha12 = x[16*i+11];
2074     alpha13 = x[16*i+12];
2075     alpha14 = x[16*i+13];
2076     alpha15 = x[16*i+14];
2077     alpha16 = x[16*i+15];
2078     while (n-->0) {
2079       y[16*(*idx)]   += alpha1*(*v);
2080       y[16*(*idx)+1] += alpha2*(*v);
2081       y[16*(*idx)+2] += alpha3*(*v);
2082       y[16*(*idx)+3] += alpha4*(*v);
2083       y[16*(*idx)+4] += alpha5*(*v);
2084       y[16*(*idx)+5] += alpha6*(*v);
2085       y[16*(*idx)+6] += alpha7*(*v);
2086       y[16*(*idx)+7] += alpha8*(*v);
2087       y[16*(*idx)+8]  += alpha9*(*v);
2088       y[16*(*idx)+9]  += alpha10*(*v);
2089       y[16*(*idx)+10] += alpha11*(*v);
2090       y[16*(*idx)+11] += alpha12*(*v);
2091       y[16*(*idx)+12] += alpha13*(*v);
2092       y[16*(*idx)+13] += alpha14*(*v);
2093       y[16*(*idx)+14] += alpha15*(*v);
2094       y[16*(*idx)+15] += alpha16*(*v);
2095       idx++; v++;
2096     }
2097   }
2098   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
2099   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2100   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 /*===================================================================================*/
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatMult_MPIMAIJ_dof"
2107 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2108 {
2109   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2110   PetscErrorCode ierr;
2111 
2112   PetscFunctionBegin;
2113   /* start the scatter */
2114   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2115   ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr);
2116   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2117   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof"
2123 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2124 {
2125   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2130   ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr);
2131   ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2132   ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof"
2138 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2139 {
2140   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   /* start the scatter */
2145   ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2146   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2147   ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr);
2148   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof"
2154 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2155 {
2156   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;
2157   PetscErrorCode ierr;
2158 
2159   PetscFunctionBegin;
2160   ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr);
2161   ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2162   ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr);
2163   ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr);
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
2169 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2170 {
2171   /* This routine requires testing -- but it's getting better. */
2172   PetscErrorCode     ierr;
2173   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2174   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2175   Mat                P=pp->AIJ;
2176   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2177   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2178   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2179   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2180   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2181   MatScalar          *ca;
2182 
2183   PetscFunctionBegin;
2184   /* Start timer */
2185   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2186 
2187   /* Get ij structure of P^T */
2188   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2189 
2190   cn = pn*ppdof;
2191   /* Allocate ci array, arrays for fill computation and */
2192   /* free space for accumulating nonzero column info */
2193   ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
2194   ci[0] = 0;
2195 
2196   /* Work arrays for rows of P^T*A */
2197   ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
2198   ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
2199   ptasparserow = ptadenserow  + an;
2200   denserow     = ptasparserow + an;
2201   sparserow    = denserow     + cn;
2202 
2203   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2204   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2205   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2206   ierr          = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2207   current_space = free_space;
2208 
2209   /* Determine symbolic info for each row of C: */
2210   for (i=0;i<pn;i++) {
2211     ptnzi  = pti[i+1] - pti[i];
2212     ptJ    = ptj + pti[i];
2213     for (dof=0;dof<ppdof;dof++) {
2214       ptanzi = 0;
2215       /* Determine symbolic row of PtA: */
2216       for (j=0;j<ptnzi;j++) {
2217         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2218         arow = ptJ[j]*ppdof + dof;
2219         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2220         anzj = ai[arow+1] - ai[arow];
2221         ajj  = aj + ai[arow];
2222         for (k=0;k<anzj;k++) {
2223           if (!ptadenserow[ajj[k]]) {
2224             ptadenserow[ajj[k]]    = -1;
2225             ptasparserow[ptanzi++] = ajj[k];
2226           }
2227         }
2228       }
2229       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2230       ptaj = ptasparserow;
2231       cnzi   = 0;
2232       for (j=0;j<ptanzi;j++) {
2233         /* Get offset within block of P */
2234         pshift = *ptaj%ppdof;
2235         /* Get block row of P */
2236         prow = (*ptaj++)/ppdof; /* integer division */
2237         /* P has same number of nonzeros per row as the compressed form */
2238         pnzj = pi[prow+1] - pi[prow];
2239         pjj  = pj + pi[prow];
2240         for (k=0;k<pnzj;k++) {
2241           /* Locations in C are shifted by the offset within the block */
2242           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2243           if (!denserow[pjj[k]*ppdof+pshift]) {
2244             denserow[pjj[k]*ppdof+pshift] = -1;
2245             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2246           }
2247         }
2248       }
2249 
2250       /* sort sparserow */
2251       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
2252 
2253       /* If free space is not available, make more free space */
2254       /* Double the amount of total space in the list */
2255       if (current_space->local_remaining<cnzi) {
2256         ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
2257       }
2258 
2259       /* Copy data into free space, and zero out denserows */
2260       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
2261       current_space->array           += cnzi;
2262       current_space->local_used      += cnzi;
2263       current_space->local_remaining -= cnzi;
2264 
2265       for (j=0;j<ptanzi;j++) {
2266         ptadenserow[ptasparserow[j]] = 0;
2267       }
2268       for (j=0;j<cnzi;j++) {
2269         denserow[sparserow[j]] = 0;
2270       }
2271       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2272       /*        For now, we will recompute what is needed. */
2273       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2274     }
2275   }
2276   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2277   /* Allocate space for cj, initialize cj, and */
2278   /* destroy list of free space and other temporary array(s) */
2279   ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
2280   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
2281   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
2282 
2283   /* Allocate space for ca */
2284   ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
2285   ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
2286 
2287   /* put together the new matrix */
2288   ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr);
2289 
2290   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2291   /* Since these are PETSc arrays, change flags to free them as necessary. */
2292   c          = (Mat_SeqAIJ *)((*C)->data);
2293   c->free_a  = PETSC_TRUE;
2294   c->free_ij = PETSC_TRUE;
2295   c->nonew   = 0;
2296 
2297   /* Clean up. */
2298   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
2299 
2300   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ"
2306 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2307 {
2308   /* This routine requires testing -- first draft only */
2309   PetscErrorCode ierr;
2310   PetscInt       flops=0;
2311   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2312   Mat            P=pp->AIJ;
2313   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2314   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2315   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2316   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2317   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2318   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2319   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2320   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2321 
2322   PetscFunctionBegin;
2323   /* Allocate temporary array for storage of one row of A*P */
2324   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
2325   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
2326 
2327   apj      = (PetscInt *)(apa + cn);
2328   apjdense = apj + cn;
2329 
2330   /* Clear old values in C */
2331   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
2332 
2333   for (i=0;i<am;i++) {
2334     /* Form sparse row of A*P */
2335     anzi  = ai[i+1] - ai[i];
2336     apnzj = 0;
2337     for (j=0;j<anzi;j++) {
2338       /* Get offset within block of P */
2339       pshift = *aj%ppdof;
2340       /* Get block row of P */
2341       prow   = *aj++/ppdof; /* integer division */
2342       pnzj = pi[prow+1] - pi[prow];
2343       pjj  = pj + pi[prow];
2344       paj  = pa + pi[prow];
2345       for (k=0;k<pnzj;k++) {
2346         poffset = pjj[k]*ppdof+pshift;
2347         if (!apjdense[poffset]) {
2348           apjdense[poffset] = -1;
2349           apj[apnzj++]      = poffset;
2350         }
2351         apa[poffset] += (*aa)*paj[k];
2352       }
2353       flops += 2*pnzj;
2354       aa++;
2355     }
2356 
2357     /* Sort the j index array for quick sparse axpy. */
2358     /* Note: a array does not need sorting as it is in dense storage locations. */
2359     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
2360 
2361     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2362     prow    = i/ppdof; /* integer division */
2363     pshift  = i%ppdof;
2364     poffset = pi[prow];
2365     pnzi = pi[prow+1] - poffset;
2366     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2367     pJ   = pj+poffset;
2368     pA   = pa+poffset;
2369     for (j=0;j<pnzi;j++) {
2370       crow   = (*pJ)*ppdof+pshift;
2371       cjj    = cj + ci[crow];
2372       caj    = ca + ci[crow];
2373       pJ++;
2374       /* Perform sparse axpy operation.  Note cjj includes apj. */
2375       for (k=0,nextap=0;nextap<apnzj;k++) {
2376         if (cjj[k]==apj[nextap]) {
2377           caj[k] += (*pA)*apa[apj[nextap++]];
2378         }
2379       }
2380       flops += 2*apnzj;
2381       pA++;
2382     }
2383 
2384     /* Zero the current row info for A*P */
2385     for (j=0;j<apnzj;j++) {
2386       apa[apj[j]]      = 0.;
2387       apjdense[apj[j]] = 0;
2388     }
2389   }
2390 
2391   /* Assemble the final matrix and clean up */
2392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   ierr = PetscFree(apa);CHKERRQ(ierr);
2395   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
2396 
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 EXTERN_C_BEGIN
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ"
2403 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2404 {
2405   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2406   Mat               a = b->AIJ,B;
2407   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2408   PetscErrorCode    ierr;
2409   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2410   PetscInt          *cols;
2411   PetscScalar       *vals;
2412 
2413   PetscFunctionBegin;
2414   ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr);
2415   ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr);
2416   for (i=0; i<m; i++) {
2417     nmax = PetscMax(nmax,aij->ilen[i]);
2418     for (j=0; j<dof; j++) {
2419       ilen[dof*i+j] = aij->ilen[i];
2420     }
2421   }
2422   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr);
2423   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2424   ierr = PetscFree(ilen);CHKERRQ(ierr);
2425   ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr);
2426   ii   = 0;
2427   for (i=0; i<m; i++) {
2428     ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2429     for (j=0; j<dof; j++) {
2430       for (k=0; k<ncols; k++) {
2431         icols[k] = dof*cols[k]+j;
2432       }
2433       ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2434       ii++;
2435     }
2436     ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2437   }
2438   ierr = PetscFree(icols);CHKERRQ(ierr);
2439   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2440   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2441 
2442   if (reuse == MAT_REUSE_MATRIX) {
2443     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2444   } else {
2445     *newmat = B;
2446   }
2447   PetscFunctionReturn(0);
2448 }
2449 EXTERN_C_END
2450 
2451 #include "src/mat/impls/aij/mpi/mpiaij.h"
2452 
2453 EXTERN_C_BEGIN
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ"
2456 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2457 {
2458   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2459   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2460   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2461   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2462   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2463   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2464   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2465   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2466   PetscInt          rstart,cstart,*garray,ii,k;
2467   PetscErrorCode    ierr;
2468   PetscScalar       *vals,*ovals;
2469 
2470   PetscFunctionBegin;
2471   ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr);
2472   for (i=0; i<A->rmap.n/dof; i++) {
2473     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2474     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2475     for (j=0; j<dof; j++) {
2476       dnz[dof*i+j] = AIJ->ilen[i];
2477       onz[dof*i+j] = OAIJ->ilen[i];
2478     }
2479   }
2480   ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr);
2481   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2482   ierr = PetscFree2(dnz,onz);CHKERRQ(ierr);
2483 
2484   ierr   = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr);
2485   rstart = dof*maij->A->rmap.rstart;
2486   cstart = dof*maij->A->cmap.rstart;
2487   garray = mpiaij->garray;
2488 
2489   ii = rstart;
2490   for (i=0; i<A->rmap.n/dof; i++) {
2491     ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2492     ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2493     for (j=0; j<dof; j++) {
2494       for (k=0; k<ncols; k++) {
2495         icols[k] = cstart + dof*cols[k]+j;
2496       }
2497       for (k=0; k<oncols; k++) {
2498         oicols[k] = dof*garray[ocols[k]]+j;
2499       }
2500       ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr);
2501       ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr);
2502       ii++;
2503     }
2504     ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr);
2505     ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr);
2506   }
2507   ierr = PetscFree2(icols,oicols);CHKERRQ(ierr);
2508 
2509   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2510   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2511 
2512   if (reuse == MAT_REUSE_MATRIX) {
2513     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2514   } else {
2515     *newmat = B;
2516   }
2517   PetscFunctionReturn(0);
2518 }
2519 EXTERN_C_END
2520 
2521 
2522 /* ---------------------------------------------------------------------------------- */
2523 /*MC
2524   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2525   operations for multicomponent problems.  It interpolates each component the same
2526   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2527   and MATMPIAIJ for distributed matrices.
2528 
2529   Operations provided:
2530 + MatMult
2531 . MatMultTranspose
2532 . MatMultAdd
2533 . MatMultTransposeAdd
2534 - MatView
2535 
2536   Level: advanced
2537 
2538 M*/
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatCreateMAIJ"
2541 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2542 {
2543   PetscErrorCode ierr;
2544   PetscMPIInt    size;
2545   PetscInt       n;
2546   Mat_MPIMAIJ    *b;
2547   Mat            B;
2548 
2549   PetscFunctionBegin;
2550   ierr   = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
2551 
2552   if (dof == 1) {
2553     *maij = A;
2554   } else {
2555     ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
2556     ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr);
2557     B->assembled    = PETSC_TRUE;
2558 
2559     ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2560     if (size == 1) {
2561       ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr);
2562       B->ops->destroy = MatDestroy_SeqMAIJ;
2563       B->ops->view    = MatView_SeqMAIJ;
2564       b      = (Mat_MPIMAIJ*)B->data;
2565       b->dof = dof;
2566       b->AIJ = A;
2567       if (dof == 2) {
2568         B->ops->mult             = MatMult_SeqMAIJ_2;
2569         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2570         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2571         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2572       } else if (dof == 3) {
2573         B->ops->mult             = MatMult_SeqMAIJ_3;
2574         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2575         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2576         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2577       } else if (dof == 4) {
2578         B->ops->mult             = MatMult_SeqMAIJ_4;
2579         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2580         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2581         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2582       } else if (dof == 5) {
2583         B->ops->mult             = MatMult_SeqMAIJ_5;
2584         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2585         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2586         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2587       } else if (dof == 6) {
2588         B->ops->mult             = MatMult_SeqMAIJ_6;
2589         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2590         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2591         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2592       } else if (dof == 7) {
2593         B->ops->mult             = MatMult_SeqMAIJ_7;
2594         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2595         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2596         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2597       } else if (dof == 8) {
2598         B->ops->mult             = MatMult_SeqMAIJ_8;
2599         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2600         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2601         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2602       } else if (dof == 9) {
2603         B->ops->mult             = MatMult_SeqMAIJ_9;
2604         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2605         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2606         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2607       } else if (dof == 10) {
2608         B->ops->mult             = MatMult_SeqMAIJ_10;
2609         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2610         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2611         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2612       } else if (dof == 16) {
2613         B->ops->mult             = MatMult_SeqMAIJ_16;
2614         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2615         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2616         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2617       } else {
2618         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2619       }
2620       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2621       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2622       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr);
2623     } else {
2624       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2625       IS         from,to;
2626       Vec        gvec;
2627       PetscInt   *garray,i;
2628 
2629       ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr);
2630       B->ops->destroy = MatDestroy_MPIMAIJ;
2631       B->ops->view    = MatView_MPIMAIJ;
2632       b      = (Mat_MPIMAIJ*)B->data;
2633       b->dof = dof;
2634       b->A   = A;
2635       ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr);
2636       ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr);
2637 
2638       ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
2639       ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr);
2640       ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr);
2641 
2642       /* create two temporary Index sets for build scatter gather */
2643       ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2644       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2645       ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr);
2646       ierr = PetscFree(garray);CHKERRQ(ierr);
2647       ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr);
2648 
2649       /* create temporary global vector to generate scatter context */
2650       ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr);
2651       ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr);
2652 
2653       /* generate the scatter context */
2654       ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr);
2655 
2656       ierr = ISDestroy(from);CHKERRQ(ierr);
2657       ierr = ISDestroy(to);CHKERRQ(ierr);
2658       ierr = VecDestroy(gvec);CHKERRQ(ierr);
2659 
2660       B->ops->mult             = MatMult_MPIMAIJ_dof;
2661       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2662       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2663       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2664       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr);
2665     }
2666     *maij = B;
2667     ierr = MatView_Private(B);CHKERRQ(ierr);
2668   }
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 
2673 
2674 
2675 
2676 
2677 
2678 
2679 
2680 
2681 
2682 
2683